Compare commits

...

7 commits

4 changed files with 3831 additions and 343 deletions

7
index.js Executable file → Normal file
View file

@ -33,6 +33,11 @@ const argv = require("yargs/yargs")(process.argv.slice(2))
type: "number", type: "number",
group: "Server", group: "Server",
}, },
provider: {
desc: "Speech-to-text provider",
default: "google",
type: "string",
},
}) })
.strict() .strict()
.argv; .argv;
@ -46,7 +51,7 @@ server.on("connection", (client) => {
codecs: codecs, codecs: codecs,
languages: languages, languages: languages,
transport: client, transport: client,
provider: getProvider("google", argv), provider: getProvider(argv.provider, argv),
}); });
}); });

View file

@ -15,8 +15,16 @@
* limitations under the License. * limitations under the License.
*/ */
const { Writable } = require('stream'); const {
const speech = require('@google-cloud/speech'); Writable,
} = require('stream');
const GoogleSpeech = require('@google-cloud/speech');
const {
TranscribeStreamingClient,
StartStreamTranscriptionCommand,
} = require('@aws-sdk/client-transcribe-streaming');
const fs = require('fs');
const { WaveFile } = require('wavefile');
/* /*
* For speech provider implementer. * For speech provider implementer.
@ -114,7 +122,7 @@ class GoogleProvider extends Writable {
} }
_construct(callback) { _construct(callback) {
this.client = new speech.SpeechClient(); this.client = new GoogleSpeech.SpeechClient();
callback(); callback();
} }
@ -252,23 +260,159 @@ class GoogleProvider extends Writable {
if (!this.recognizeStream) { if (!this.recognizeStream) {
return; return;
} }
}
}
this.cork(); // Buffer any incoming data class AWSProvider extends Writable {
constructor(options) {
super();
this.recognizeStream.end(); this.LanguageCode = "en-GB";
this.recognizeStream = null; this.MediaEncoding = "pcm";
this.credentials = {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
} }
/** console.debug(process.env.AWS_SECRET_KEY_ID)
* Restarts the recognition stream.
* this.stream = new TransformStream({ highWaterMark: 1 * 1024 });
* @param {Object} [config] - configuration to use this.readStream = this.stream.readable.getReader({ highWaterMark: 1 * 1024 });
* @param {Object} [config.codec] - the codec to map to an encoding this.writeStream = this.stream.writable;
* @param {string} [config.language] - the language to use
*/ this.recognizeStream = null;
restart(config) {
this.fullStream = [];
console.log(this.writeStream);
}
_construct(callback) {
this.client = new TranscribeStreamingClient({
region: "eu-west-2",
credentials: this.credentials
});
callback();
}
_write(chunk, encoding, callback) {
this.fullStream.push(chunk);
const wav = new WaveFile();
wav.fromScratch(1, 8000, '8m', chunk);
wav.fromMuLaw();
wav.toSampleRate(16000);
this.recognizeStream.write(wav.data.samples);
callback();
}
_writev(chunks, callback) {
for (let chunk in chunks) {
this._write(chunk, null, callback);
}
callback();
}
_final(callback) {
this.stop(); this.stop();
this.start(config);
callback();
}
start(config) {
// this.setConfig(config);
// config = this.config;
console.log("START");
this.recognizeStream = this.writeStream.getWriter();
// const passthrough = new PassThrough();
// this.readStream.pipe(passthrough);
const readStream = this.readStream;
async function* audioSource() {
// await readStream.start();
while (readStream.ends !== true) {
const chunk = await readStream.read();
yield chunk;
}
}
async function* audioStream() {
for await (const chunk of audioSource()) {
yield {AudioEvent: {AudioChunk: chunk.value}};
}
}
this.param = {
LanguageCode: this.LanguageCode,
MediaEncoding: this.MediaEncoding,
MediaSampleRateHertz: 16000,
AudioStream: audioStream(),
}
this.command = new StartStreamTranscriptionCommand(this.param);
this.client.send(this.command).then(async (res) => {
for await (const event of res.TranscriptResultStream) {
if (event.TranscriptEvent) {
const results = event.TranscriptEvent.Transcript.Results;
if(results[0] !== undefined) {
if(!results[0].IsPartial) {
console.debug("AWSProvider: result: " + results[0].Alternatives[0].Transcript);
const result = {
"text": results[0].Alternatives[0].Transcript
};
this.emit('result', result);
}
}
// Print all the possible transcripts
}
};
}).catch((err) => {
console.debug(err);
})
return;
}
stop() {
if(!this.recognizeStream) {
return;
}
const buffer = Buffer.concat(this.fullStream);
console.debug(buffer);
const wav = new WaveFile();
wav.fromScratch(1, 8000, '8m', buffer);
wav.fromMuLaw();
wav.toSampleRate(16000);
fs.writeFileSync('stream.wav', wav.toBuffer());
// this.recognizeStream.close();
console.log("End of stream");
// return;
}
restart(config) {
this.stop()
this.start(config)
} }
} }
@ -284,6 +428,10 @@ function getProvider(name, options) {
return new GoogleProvider(options); return new GoogleProvider(options);
} }
if (name == "aws") {
return new AWSProvider(options);
}
throw new Error("Unsupported speech provider '" + name + "'"); throw new Error("Unsupported speech provider '" + name + "'");
} }

3983
package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -18,7 +18,9 @@
"speech" "speech"
], ],
"dependencies": { "dependencies": {
"@google-cloud/speech": "^4.9.0", "@aws-sdk/client-transcribe-streaming": "^3.362.0",
"@google-cloud/speech": "^5.6.0",
"wavefile": "^11.0.0",
"ws": "^8.3.0", "ws": "^8.3.0",
"yargs": "^17.3.1" "yargs": "^17.3.1"
} }