From 88267ddca04245917fbcaa561998f289eec555ac Mon Sep 17 00:00:00 2001 From: Dovi Cowan Date: Fri, 30 Jun 2023 20:07:56 +0100 Subject: [PATCH] modify encoding before sending to AWS --- lib/provider.js | 58 +++++++++++++++++++++++++++++++++++------------ package-lock.json | 17 ++++++++++++++ package.json | 1 + 3 files changed, 61 insertions(+), 15 deletions(-) diff --git a/lib/provider.js b/lib/provider.js index 9af3acd..efeafee 100644 --- a/lib/provider.js +++ b/lib/provider.js @@ -17,15 +17,14 @@ const { Writable, - Transform, - PassThrough, } = require('stream'); const GoogleSpeech = require('@google-cloud/speech'); const { TranscribeStreamingClient, StartStreamTranscriptionCommand, } = require('@aws-sdk/client-transcribe-streaming'); -const { config } = require('process'); +const fs = require('fs'); +const { WaveFile } = require('wavefile'); /* * For speech provider implementer. @@ -406,12 +405,13 @@ class AWSProvider extends Writable { console.debug(process.env.AWS_SECRET_KEY_ID) - this.stream = new TransformStream(); - this.readStream = this.stream.readable.getReader(); + this.stream = new TransformStream({ highWaterMark: 1 * 1024 }); + this.readStream = this.stream.readable.getReader({ highWaterMark: 1 * 1024 }); this.writeStream = this.stream.writable; this.recognizeStream = null; + this.fullStream = []; console.log(this.writeStream); } @@ -426,11 +426,16 @@ class AWSProvider extends Writable { } _write(chunk, encoding, callback) { - this.recognizeStream.write(chunk); + this.fullStream.push(chunk); - // this.readStream.read().then((res) => { - // console.log(res) - // }) + const wav = new WaveFile(); + + wav.fromScratch(1, 8000, '8m', chunk); + wav.fromMuLaw(); + + wav.toSampleRate(16000); + + this.recognizeStream.write(wav.data.samples); callback(); } @@ -472,12 +477,11 @@ class AWSProvider extends Writable { async function* audioStream() { for await (const chunk of audioSource()) { - console.debug("CHUNKING"); yield {AudioEvent: {AudioChunk: chunk.value}}; } } - audioStream().next().then(res => console.debug(res.value.AudioEvent.AudioChunk)); + // audioStream().next().then(res => console.debug(res.value.AudioEvent.AudioChunk)); // console.debug('AUDIO'); // this.audioStream().next().then(res => console.debug(res)); @@ -486,16 +490,27 @@ class AWSProvider extends Writable { this.param = { LanguageCode: this.LanguageCode, MediaEncoding: this.MediaEncoding, - MediaSampleRateHertz: 8000, + MediaSampleRateHertz: 16000, AudioStream: audioStream(), } - const command = new StartStreamTranscriptionCommand(this.param); + this.command = new StartStreamTranscriptionCommand(this.param); - this.client.send(command).then(async (res) => { + this.client.send(this.command).then(async (res) => { for await (const event of res.TranscriptResultStream) { - console.debug(event); + if (event.TranscriptEvent) { + const results = event.TranscriptEvent.Transcript.Results; + // Print all the possible transcripts + results.map((result) => { + (result.Alternatives || []).map((alternative) => { + const transcript = alternative.Items.map((item) => item.Content).join(" "); + console.log(transcript); + }); + }); + } }; + }).catch((err) => { + console.debug(err); }) return; @@ -510,6 +525,19 @@ class AWSProvider extends Writable { return; } + + const buffer = Buffer.concat(this.fullStream); + console.debug(buffer); + + const wav = new WaveFile(); + + wav.fromScratch(1, 8000, '8m', buffer); + wav.fromMuLaw(); + + wav.toSampleRate(16000); + + fs.writeFileSync('stream.wav', wav.toBuffer()); + // this.recognizeStream.close(); // console.log(this.recognizeStream); diff --git a/package-lock.json b/package-lock.json index 2b88595..fe487ad 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@aws-sdk/client-transcribe-streaming": "^3.362.0", "@google-cloud/speech": "^4.9.0", + "wavefile": "^11.0.0", "ws": "^8.3.0", "yargs": "^17.3.1" }, @@ -2039,6 +2040,17 @@ "uuid": "dist/bin/uuid" } }, + "node_modules/wavefile": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/wavefile/-/wavefile-11.0.0.tgz", + "integrity": "sha512-/OBiAALgWU24IG7sC84cDO/KfFuvajWc5Uec0oV2zrpOOZZDgGdOwHwgEzOrwh8jkubBk7PtZfQBIcI1OaE5Ng==", + "bin": { + "wavefile": "bin/wavefile.js" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", @@ -3772,6 +3784,11 @@ "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==" }, + "wavefile": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/wavefile/-/wavefile-11.0.0.tgz", + "integrity": "sha512-/OBiAALgWU24IG7sC84cDO/KfFuvajWc5Uec0oV2zrpOOZZDgGdOwHwgEzOrwh8jkubBk7PtZfQBIcI1OaE5Ng==" + }, "webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", diff --git a/package.json b/package.json index dcb2762..423df64 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "dependencies": { "@aws-sdk/client-transcribe-streaming": "^3.362.0", "@google-cloud/speech": "^4.9.0", + "wavefile": "^11.0.0", "ws": "^8.3.0", "yargs": "^17.3.1" }