Compare commits
7 commits
master
...
deepgram-a
Author | SHA1 | Date | |
---|---|---|---|
86efaa1936 | |||
df2340e0ee | |||
49d0c65288 | |||
d973730b25 | |||
e9a3d38d58 | |||
88267ddca0 | |||
87c8a4618e |
4 changed files with 3979 additions and 341 deletions
7
index.js
Executable file → Normal file
7
index.js
Executable file → Normal file
|
@ -33,6 +33,11 @@ const argv = require("yargs/yargs")(process.argv.slice(2))
|
|||
type: "number",
|
||||
group: "Server",
|
||||
},
|
||||
provider: {
|
||||
desc: "Speech-to-text provider",
|
||||
default: "google",
|
||||
type: "string",
|
||||
},
|
||||
})
|
||||
.strict()
|
||||
.argv;
|
||||
|
@ -46,7 +51,7 @@ server.on("connection", (client) => {
|
|||
codecs: codecs,
|
||||
languages: languages,
|
||||
transport: client,
|
||||
provider: getProvider("google", argv),
|
||||
provider: getProvider(argv.provider, argv),
|
||||
});
|
||||
});
|
||||
|
||||
|
|
205
lib/provider.js
205
lib/provider.js
|
@ -15,8 +15,16 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
const { Writable } = require('stream');
|
||||
const speech = require('@google-cloud/speech');
|
||||
const {
|
||||
Writable,
|
||||
} = require('stream');
|
||||
const GoogleSpeech = require('@google-cloud/speech');
|
||||
const {
|
||||
TranscribeStreamingClient,
|
||||
StartStreamTranscriptionCommand,
|
||||
} = require('@aws-sdk/client-transcribe-streaming');
|
||||
const fs = require('fs');
|
||||
const { WaveFile } = require('wavefile');
|
||||
|
||||
/*
|
||||
* For speech provider implementer.
|
||||
|
@ -114,7 +122,7 @@ class GoogleProvider extends Writable {
|
|||
}
|
||||
|
||||
_construct(callback) {
|
||||
this.client = new speech.SpeechClient();
|
||||
this.client = new GoogleSpeech.SpeechClient();
|
||||
|
||||
callback();
|
||||
}
|
||||
|
@ -253,19 +261,188 @@ class GoogleProvider extends Writable {
|
|||
return;
|
||||
}
|
||||
|
||||
this.cork(); // Buffer any incoming data
|
||||
class AWSProvider extends Writable {
|
||||
constructor(options) {
|
||||
super();
|
||||
|
||||
this.LanguageCode = "en-GB";
|
||||
this.MediaEncoding = "pcm";
|
||||
this.credentials = {
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
|
||||
}
|
||||
|
||||
console.debug(process.env.AWS_SECRET_KEY_ID)
|
||||
|
||||
this.stream = new TransformStream({ highWaterMark: 1 * 1024 });
|
||||
this.readStream = this.stream.readable.getReader({ highWaterMark: 1 * 1024 });
|
||||
this.writeStream = this.stream.writable;
|
||||
|
||||
this.recognizeStream.end();
|
||||
this.recognizeStream = null;
|
||||
|
||||
this.fullStream = [];
|
||||
|
||||
console.log(this.writeStream);
|
||||
}
|
||||
|
||||
_construct(callback) {
|
||||
this.client = new TranscribeStreamingClient({
|
||||
region: "eu-west-2",
|
||||
credentials: this.credentials
|
||||
});
|
||||
|
||||
callback();
|
||||
}
|
||||
|
||||
_write(chunk, encoding, callback) {
|
||||
this.fullStream.push(chunk);
|
||||
|
||||
const wav = new WaveFile();
|
||||
|
||||
wav.fromScratch(1, 8000, '8m', chunk);
|
||||
wav.fromMuLaw();
|
||||
|
||||
wav.toSampleRate(16000);
|
||||
|
||||
this.recognizeStream.write(wav.data.samples);
|
||||
|
||||
callback();
|
||||
}
|
||||
|
||||
_writev(chunks, callback) {
|
||||
for (let chunk in chunks) {
|
||||
this._write(chunk, null, callback);
|
||||
}
|
||||
|
||||
callback();
|
||||
}
|
||||
|
||||
_final(callback) {
|
||||
this.stop();
|
||||
|
||||
callback();
|
||||
}
|
||||
|
||||
start(config) {
|
||||
// this.setConfig(config);
|
||||
// config = this.config;
|
||||
|
||||
console.log("START");
|
||||
|
||||
this.recognizeStream = this.writeStream.getWriter();
|
||||
|
||||
// const passthrough = new PassThrough();
|
||||
// this.readStream.pipe(passthrough);
|
||||
|
||||
const readStream = this.readStream;
|
||||
|
||||
async function* audioSource() {
|
||||
// await readStream.start();
|
||||
while (readStream.ends !== true) {
|
||||
const chunk = await readStream.read();
|
||||
yield chunk;
|
||||
}
|
||||
}
|
||||
|
||||
async function* audioStream() {
|
||||
for await (const chunk of audioSource()) {
|
||||
yield {AudioEvent: {AudioChunk: chunk.value}};
|
||||
}
|
||||
}
|
||||
|
||||
this.param = {
|
||||
LanguageCode: this.LanguageCode,
|
||||
MediaEncoding: this.MediaEncoding,
|
||||
MediaSampleRateHertz: 16000,
|
||||
AudioStream: audioStream(),
|
||||
}
|
||||
|
||||
this.command = new StartStreamTranscriptionCommand(this.param);
|
||||
|
||||
this.client.send(this.command).then(async (res) => {
|
||||
for await (const event of res.TranscriptResultStream) {
|
||||
if (event.TranscriptEvent) {
|
||||
const results = event.TranscriptEvent.Transcript.Results;
|
||||
if(results[0] !== undefined) {
|
||||
if(!results[0].IsPartial) {
|
||||
console.debug("AWSProvider: result: " + results[0].Alternatives[0].Transcript);
|
||||
const result = {
|
||||
"text": results[0].Alternatives[0].Transcript
|
||||
};
|
||||
|
||||
this.emit('result', result);
|
||||
}
|
||||
}
|
||||
|
||||
// Print all the possible transcripts
|
||||
}
|
||||
};
|
||||
}).catch((err) => {
|
||||
console.debug(err);
|
||||
})
|
||||
return;
|
||||
}
|
||||
|
||||
stop() {
|
||||
if(!this.recognizeStream) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
const buffer = Buffer.concat(this.fullStream);
|
||||
console.debug(buffer);
|
||||
|
||||
const wav = new WaveFile();
|
||||
|
||||
wav.fromScratch(1, 8000, '8m', buffer);
|
||||
wav.fromMuLaw();
|
||||
|
||||
wav.toSampleRate(16000);
|
||||
|
||||
fs.writeFileSync('stream.wav', wav.toBuffer());
|
||||
|
||||
// this.recognizeStream.close();
|
||||
|
||||
console.log("End of stream");
|
||||
|
||||
// return;
|
||||
}
|
||||
|
||||
restart(config) {
|
||||
this.stop()
|
||||
this.start(config)
|
||||
}
|
||||
}
|
||||
|
||||
class DeepgramProvider extends Writable {
|
||||
constructor(options) {
|
||||
|
||||
}
|
||||
|
||||
_construct(callback) {
|
||||
|
||||
}
|
||||
|
||||
_write(chunk, encoding, callback) {
|
||||
|
||||
}
|
||||
|
||||
_writev(chunks, callback) {
|
||||
|
||||
}
|
||||
|
||||
_final(callback) {
|
||||
|
||||
}
|
||||
|
||||
start(config) {
|
||||
|
||||
}
|
||||
|
||||
stop() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Restarts the recognition stream.
|
||||
*
|
||||
* @param {Object} [config] - configuration to use
|
||||
* @param {Object} [config.codec] - the codec to map to an encoding
|
||||
* @param {string} [config.language] - the language to use
|
||||
*/
|
||||
restart(config) {
|
||||
this.stop();
|
||||
this.start(config);
|
||||
|
@ -284,6 +461,10 @@ function getProvider(name, options) {
|
|||
return new GoogleProvider(options);
|
||||
}
|
||||
|
||||
if (name == "aws") {
|
||||
return new AWSProvider(options);
|
||||
}
|
||||
|
||||
throw new Error("Unsupported speech provider '" + name + "'");
|
||||
}
|
||||
|
||||
|
|
4103
package-lock.json
generated
4103
package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -18,7 +18,10 @@
|
|||
"speech"
|
||||
],
|
||||
"dependencies": {
|
||||
"@google-cloud/speech": "^4.9.0",
|
||||
"@aws-sdk/client-transcribe-streaming": "^3.362.0",
|
||||
"@deepgram/sdk": "^2.4.0",
|
||||
"@google-cloud/speech": "^5.6.0",
|
||||
"wavefile": "^11.0.0",
|
||||
"ws": "^8.3.0",
|
||||
"yargs": "^17.3.1"
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue