From 8c05d033d7db416b5702434196b9573dfecacfa1 Mon Sep 17 00:00:00 2001 From: Naomi Carrigan Date: Wed, 25 Sep 2024 12:34:38 -0700 Subject: [PATCH 1/3] feat: send back raw buffer instead of ArrayBuffer --- src/packages/SpeakLiveClient.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/packages/SpeakLiveClient.ts b/src/packages/SpeakLiveClient.ts index 01052ae..2c189a2 100644 --- a/src/packages/SpeakLiveClient.ts +++ b/src/packages/SpeakLiveClient.ts @@ -81,7 +81,7 @@ export class SpeakLiveClient extends AbstractLiveClient { * Handles binary messages received from the WebSocket connection. * @param data - The binary data. */ - protected handleBinaryMessage(data: ArrayBuffer): void { + protected handleBinaryMessage(data: Buffer): void { this.emit(LiveTTSEvents.Audio, data); } @@ -149,9 +149,9 @@ export class SpeakLiveClient extends AbstractLiveClient { }); } } else if (event.data instanceof ArrayBuffer) { - this.handleBinaryMessage(event.data); + this.handleBinaryMessage(Buffer.from(event.data)); } else if (Buffer.isBuffer(event.data)) { - this.handleBinaryMessage(event.data.buffer); + this.handleBinaryMessage(event.data); } else { console.log("Received unknown data type", event.data); this.emit(LiveTTSEvents.Error, { From 74e40fdf12ca10819e5ef77b24e8f568ef18d666 Mon Sep 17 00:00:00 2001 From: Naomi Carrigan Date: Wed, 9 Oct 2024 15:43:13 -0700 Subject: [PATCH 2/3] fix: handle blob response, append wav header --- examples/node-speak-live/index.js | 30 +++++++++++++++++++++++++----- src/packages/SpeakLiveClient.ts | 4 ++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/examples/node-speak-live/index.js b/examples/node-speak-live/index.js index 4cc1ceb..685a6d6 100644 --- a/examples/node-speak-live/index.js +++ b/examples/node-speak-live/index.js @@ -1,14 +1,34 @@ const fs = require("fs"); const { createClient, LiveTTSEvents } = require("../../dist/main/index"); +// Add a wav audio container header to the file if you want to play the audio +// using the AudioContext or media player like VLC, Media Player, or Apple Music +// Without this header in the Chrome browser case, the audio will not play. +// prettier-ignore +const wavHeader = [ + 0x52, 0x49, 0x46, 0x46, // "RIFF" + 0x00, 0x00, 0x00, 0x00, // Placeholder for file size + 0x57, 0x41, 0x56, 0x45, // "WAVE" + 0x66, 0x6D, 0x74, 0x20, // "fmt " + 0x10, 0x00, 0x00, 0x00, // Chunk size (16) + 0x01, 0x00, // Audio format (1 for PCM) + 0x01, 0x00, // Number of channels (1) + 0x80, 0xBB, 0x00, 0x00, // Sample rate (48000) + 0x00, 0xEE, 0x02, 0x00, // Byte rate (48000 * 2) + 0x02, 0x00, // Block align (2) + 0x10, 0x00, // Bits per sample (16) + 0x64, 0x61, 0x74, 0x61, // "data" + 0x00, 0x00, 0x00, 0x00 // Placeholder for data size +]; + const live = async () => { const text = "Hello, how can I help you today?"; const deepgram = createClient(process.env.DEEPGRAM_API_KEY); - const dgConnection = deepgram.speak.live({ model: "aura-asteria-en" }); + const dgConnection = deepgram.speak.live({ model: "aura-asteria-en", encoding: "linear16" }); - let audioBuffer = Buffer.alloc(0); + let audioBuffer = Buffer.from(wavHeader); dgConnection.on(LiveTTSEvents.Open, () => { console.log("Connection opened"); @@ -47,14 +67,14 @@ const live = async () => { const writeFile = () => { if (audioBuffer.length > 0) { - fs.writeFile("output.mp3", audioBuffer, (err) => { + fs.writeFile("output.wav", audioBuffer, (err) => { if (err) { console.error("Error writing audio file:", err); } else { - console.log("Audio file saved as output.mp3"); + console.log("Audio file saved as output.wav"); } }); - audioBuffer = Buffer.alloc(0); // Reset buffer after writing + audioBuffer = Buffer.from(wavHeader); // Reset buffer after writing } }; }; diff --git a/src/packages/SpeakLiveClient.ts b/src/packages/SpeakLiveClient.ts index 2c189a2..7beff50 100644 --- a/src/packages/SpeakLiveClient.ts +++ b/src/packages/SpeakLiveClient.ts @@ -148,6 +148,10 @@ export class SpeakLiveClient extends AbstractLiveClient { error, }); } + } else if (event.data instanceof Blob) { + event.data.arrayBuffer().then((buffer) => { + this.handleBinaryMessage(Buffer.from(buffer)); + }); } else if (event.data instanceof ArrayBuffer) { this.handleBinaryMessage(Buffer.from(event.data)); } else if (Buffer.isBuffer(event.data)) { From 6a5ae5c0661927103655683c3c6907327296b7c7 Mon Sep 17 00:00:00 2001 From: Naomi Carrigan Date: Thu, 10 Oct 2024 12:08:13 -0700 Subject: [PATCH 3/3] fix: pass sample rate option --- examples/node-speak-live/index.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/node-speak-live/index.js b/examples/node-speak-live/index.js index 685a6d6..60645e7 100644 --- a/examples/node-speak-live/index.js +++ b/examples/node-speak-live/index.js @@ -26,7 +26,11 @@ const live = async () => { const deepgram = createClient(process.env.DEEPGRAM_API_KEY); - const dgConnection = deepgram.speak.live({ model: "aura-asteria-en", encoding: "linear16" }); + const dgConnection = deepgram.speak.live({ + model: "aura-asteria-en", + encoding: "linear16", + sample_rate: 48000, + }); let audioBuffer = Buffer.from(wavHeader);