diff --git a/Deepgram.Dev.sln b/Deepgram.Dev.sln index b8a4b06c..4c4f993b 100644 --- a/Deepgram.Dev.sln +++ b/Deepgram.Dev.sln @@ -145,6 +145,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "speech-to-text", "speech-to EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "rest", "rest", "{C1A7ADF7-ACAC-4B10-8266-C7224156C012}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "websocket", "websocket", "{889B3075-777E-476D-BB18-B1CD647F1893}" +EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "file", "file", "{F2DCE1E6-FC12-4CA5-A738-5A3F359B8A96}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "hello-world", "hello-world", "{F31817AD-AC9F-4021-A9E0-7C26C31D5744}" @@ -159,6 +161,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "models", "models", "{1CC0C0 EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Models", "examples\manage\models\Models.csproj", "{918E56D3-FABF-4F50-AC01-8DDFF58FB0CE}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "simple", "simple", "{2F92D959-D3C7-4EFF-8549-C6162E517644}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Speak", "examples\text-to-speech\websocket\simple\Speak.csproj", "{ECB0B55E-54C1-4723-8641-9249E7507FB0}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -293,6 +299,10 @@ Global {918E56D3-FABF-4F50-AC01-8DDFF58FB0CE}.Debug|Any CPU.Build.0 = Debug|Any CPU {918E56D3-FABF-4F50-AC01-8DDFF58FB0CE}.Release|Any CPU.ActiveCfg = Release|Any CPU {918E56D3-FABF-4F50-AC01-8DDFF58FB0CE}.Release|Any CPU.Build.0 = Release|Any CPU + {ECB0B55E-54C1-4723-8641-9249E7507FB0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {ECB0B55E-54C1-4723-8641-9249E7507FB0}.Debug|Any CPU.Build.0 = Debug|Any CPU + {ECB0B55E-54C1-4723-8641-9249E7507FB0}.Release|Any CPU.ActiveCfg = Release|Any CPU + {ECB0B55E-54C1-4723-8641-9249E7507FB0}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -362,6 +372,7 @@ Global {B98F6A00-292E-431F-8B11-0CFCA5FD1E37} = {02746530-6811-4F1B-8851-544C89CA66DA} {5EEA8D9D-0C0D-4EE3-93B1-F0133C629478} = {C673DFD1-528A-4BAE-94E6-02EF058AC363} {C1A7ADF7-ACAC-4B10-8266-C7224156C012} = {E2E3000D-FBBA-450E-A4E0-3542B38ADAFD} + {889B3075-777E-476D-BB18-B1CD647F1893} = {E2E3000D-FBBA-450E-A4E0-3542B38ADAFD} {F2DCE1E6-FC12-4CA5-A738-5A3F359B8A96} = {C1A7ADF7-ACAC-4B10-8266-C7224156C012} {F31817AD-AC9F-4021-A9E0-7C26C31D5744} = {F2DCE1E6-FC12-4CA5-A738-5A3F359B8A96} {E1B8DE3D-2B86-4A60-BDC1-A7F425986DC1} = {F2DCE1E6-FC12-4CA5-A738-5A3F359B8A96} @@ -369,6 +380,8 @@ Global {12115887-AFBF-4EEF-953D-936B9D810E97} = {E1B8DE3D-2B86-4A60-BDC1-A7F425986DC1} {1CC0C0DE-55D9-4B83-9070-1668A4472A27} = {FA5723B3-74E9-4221-80EF-4833C1C3DD9F} {918E56D3-FABF-4F50-AC01-8DDFF58FB0CE} = {1CC0C0DE-55D9-4B83-9070-1668A4472A27} + {2F92D959-D3C7-4EFF-8549-C6162E517644} = {889B3075-777E-476D-BB18-B1CD647F1893} + {ECB0B55E-54C1-4723-8641-9249E7507FB0} = {2F92D959-D3C7-4EFF-8549-C6162E517644} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {8D4ABC6D-7126-4EE2-9303-43A954616B2A} diff --git a/Deepgram.Tests/UnitTests/ClientTests/SpeakClientTests.cs b/Deepgram.Tests/UnitTests/ClientTests/SpeakClientTests.cs index b2cc6108..b9b84e5e 100644 --- a/Deepgram.Tests/UnitTests/ClientTests/SpeakClientTests.cs +++ b/Deepgram.Tests/UnitTests/ClientTests/SpeakClientTests.cs @@ -152,6 +152,7 @@ public async Task StreamCallBack_Should_Throw_ArgumentException_With_No_CallBack var expectedResponse = new AutoFaker().Generate(); var speakSchema = new AutoFaker().Generate(); var source = new TextSource("Hello World!"); + speakSchema.CallBack = null; // Fake Client var httpClient = MockHttpClient.CreateHttpClientWithResult(expectedResponse); @@ -160,8 +161,6 @@ public async Task StreamCallBack_Should_Throw_ArgumentException_With_No_CallBack // Mock Methods speakClient.When(x => x.PostAsync(Arg.Any(), Arg.Any(), Arg.Any())).DoNotCallBase(); speakClient.PostAsync(url, Arg.Any(), Arg.Any()).Returns(expectedResponse); - - speakSchema.CallBack = null; // Act and Assert await speakClient.Invoking(y => y.StreamCallBack(source, null, speakSchema)) diff --git a/Deepgram.Tests/UnitTests/HttpExtensionsTests/HttpClientExtensionTests.cs b/Deepgram.Tests/UnitTests/HttpExtensionsTests/HttpClientExtensionTests.cs index 7786e38e..d581d796 100644 --- a/Deepgram.Tests/UnitTests/HttpExtensionsTests/HttpClientExtensionTests.cs +++ b/Deepgram.Tests/UnitTests/HttpExtensionsTests/HttpClientExtensionTests.cs @@ -70,7 +70,7 @@ public void Should_Return_HttpClient_With_Default_BaseAddress_And_Custom_Headers { // Input and Output var _apiKey = new Faker().Random.Guid().ToString(); - var _clientOptions = new DeepgramHttpClientOptions(_apiKey, null, null, FakeHeaders()); + var _clientOptions = new DeepgramHttpClientOptions(_apiKey, null, null, null, FakeHeaders()); // Fake Clients var httpClient = MockHttpClient.CreateHttpClientWithResult(new MessageResponse(), HttpStatusCode.OK); @@ -95,7 +95,7 @@ public void Should_Return_HttpClient_With_Custom_BaseAddress_And_Custom_Headers( var expectedBaseAddress = $"https://{_customUrl}/v1"; var customBaseAddress = $"https://{_customUrl}"; var _apiKey = new Faker().Random.Guid().ToString(); - var _clientOptions = new DeepgramHttpClientOptions(_apiKey, customBaseAddress, null, FakeHeaders()); + var _clientOptions = new DeepgramHttpClientOptions(_apiKey, customBaseAddress, null, null, FakeHeaders()); // Fake Clients var httpClient = MockHttpClient.CreateHttpClientWithResult(new MessageResponse(), HttpStatusCode.OK); @@ -122,7 +122,7 @@ public void Should_Return_HttpClient_With_Predefined_Values() var expectedBaseAddress = $"https://{_customUrl}/v1"; var customBaseAddress = $"https://{_customUrl}"; var _apiKey = new Faker().Random.Guid().ToString(); - var _clientOptions = new DeepgramHttpClientOptions(_apiKey, customBaseAddress, null, FakeHeaders()); + var _clientOptions = new DeepgramHttpClientOptions(_apiKey, customBaseAddress, null, null, FakeHeaders()); // Fake Clients var httpClient = MockHttpClient.CreateHttpClientWithResult(new MessageResponse(), HttpStatusCode.OK, expectedBaseAddress); diff --git a/Deepgram/ClientFactory.cs b/Deepgram/ClientFactory.cs index b617755d..7fed455a 100644 --- a/Deepgram/ClientFactory.cs +++ b/Deepgram/ClientFactory.cs @@ -144,14 +144,14 @@ public static ISpeakRESTClient CreateSpeakRESTClient(string apiKey = "", Deepgra return new SpeakRESTClient(apiKey, options, httpId); } - ///// - ///// Create a new AnalyzeClient - ///// - ///// - ///// - ///// - //public static ISpeakWebSocketClient CreateSpeakWebSocketClient(string apiKey = "", DeepgramWsClientOptions? options = null) - //{ - // return new SpeakWebSocketClient(apiKey, options); - //} + /// + /// Create a new AnalyzeClient + /// + /// + /// + /// + public static ISpeakWebSocketClient CreateSpeakWebSocketClient(string apiKey = "", DeepgramWsClientOptions? options = null) + { + return new SpeakWebSocketClient(apiKey, options); + } } diff --git a/Deepgram/Clients/Interfaces/v1/ISpeakWebSocketClient.cs b/Deepgram/Clients/Interfaces/v1/ISpeakWebSocketClient.cs new file mode 100644 index 00000000..9f4b0c4b --- /dev/null +++ b/Deepgram/Clients/Interfaces/v1/ISpeakWebSocketClient.cs @@ -0,0 +1,149 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +using Deepgram.Models.Speak.v1.WebSocket; + +namespace Deepgram.Clients.Interfaces.v1; + +/// +/// Implements version 1 of the Live Client. +/// +public interface ISpeakWebSocketClient +{ + #region Connect and Disconnect + public Task Connect(SpeakSchema options, CancellationTokenSource? cancelToken = null, Dictionary? addons = null, + Dictionary? headers = null); + + public Task Stop(CancellationTokenSource? cancelToken = null); + #endregion + + #region Subscribe Event + /// + /// Subscribe to an Open event from the Deepgram API + /// + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler); + + /// + /// Subscribe to a Metadata event from the Deepgram API + /// + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler); + + /// + /// Subscribe to a Flushed event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler); + + /// + /// Subscribe to a Cleared event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler); + + /// + /// Subscribe to a Audio buffer/event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler); + + /// + /// Subscribe to a Close event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler); + + /// + /// Subscribe to an Unhandled event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler); + + /// + /// Subscribe to an Warning event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler); + + + /// + /// Subscribe to an Error event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler); + #endregion + + #region Send Functions + /// + /// This method sends a binary message over the WebSocket connection. + /// + /// + public void SpeakWithText(string data); + + ///// + ///// This method Flushes the text buffer on Deepgram to be converted to audio + ///// + public void Flush(); + + ///// + ///// This method Resets the text buffer on Deepgram to be converted to audio + ///// + public void Clear(); + + ///// + ///// This method tells Deepgram to initiate the close server-side. + ///// + public void Close(); + + ///// + ///// This method sends a binary message over the WebSocket connection. + ///// + ///// + //public void SpeakWithStream(byte[] data); + + /// + /// Sends a binary message over the WebSocket connection. + /// + /// The data to be sent over the WebSocket. + public void Send(byte[] data); + + ///// + ///// This method sends a binary message over the WebSocket connection. + ///// + ///// + //public void SendBinary(byte[] data); + + /// + /// This method sends a text message over the WebSocket connection. + /// + public void SendMessage(byte[] data); + + ///// + ///// This method sends a binary message over the WebSocket connection immediately without queueing. + ///// + //public void SendBinaryImmediately(byte[] data); + + /// + /// This method sends a text message over the WebSocket connection immediately without queueing. + /// + public void SendMessageImmediately(byte[] data); + #endregion + + #region Helpers + /// + /// Retrieves the connection state of the WebSocket + /// + /// Returns the connection state of the WebSocket + public WebSocketState State(); + + /// + /// Indicates whether the WebSocket is connected + /// + /// Returns true if the WebSocket is connected + public bool IsConnected(); + #endregion +} diff --git a/Deepgram/Clients/Listen/v1/WebSocket/Client.cs b/Deepgram/Clients/Listen/v1/WebSocket/Client.cs index dabd532b..dba20455 100644 --- a/Deepgram/Clients/Listen/v1/WebSocket/Client.cs +++ b/Deepgram/Clients/Listen/v1/WebSocket/Client.cs @@ -613,7 +613,7 @@ internal void ProcessDataReceived(WebSocketReceiveResult result, MemoryStream ms Log.Verbose("ProcessDataReceived", $"Type: {val}"); - if (_deepgramClientOptions.InspectMessage()) + if (_deepgramClientOptions.InspectListenMessage()) { Log.Debug("ProcessDataReceived", "Call InspectMessage..."); InspectMessage(val, data); diff --git a/Deepgram/Clients/Speak/v1/WebSocket/Client.cs b/Deepgram/Clients/Speak/v1/WebSocket/Client.cs new file mode 100644 index 00000000..4ba6f6af --- /dev/null +++ b/Deepgram/Clients/Speak/v1/WebSocket/Client.cs @@ -0,0 +1,1079 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + + +using Deepgram.Models.Authenticate.v1; +using Deepgram.Models.Speak.v1.WebSocket; +using Deepgram.Clients.Interfaces.v1; + +namespace Deepgram.Clients.Speak.v1.WebSocket; + +/// +/// Implements version 1 of the Live Client. +/// +public class Client : IDisposable, ISpeakWebSocketClient +{ + #region Fields + private readonly IDeepgramClientOptions _deepgramClientOptions; + + private ClientWebSocket? _clientWebSocket; + private CancellationTokenSource? _cancellationTokenSource; + + private DateTime? _lastReceived = null; + private int _flushCount = 0; + + private readonly SemaphoreSlim _mutexSubscribe = new SemaphoreSlim(1, 1); + private readonly SemaphoreSlim _mutexSend = new SemaphoreSlim(1, 1); + private readonly SemaphoreSlim _mutexLastDatagram = new SemaphoreSlim(1, 1); + #endregion + + /// Required DeepgramApiKey + /// for HttpClient Configuration + public Client(string? apiKey = null, IDeepgramClientOptions? options = null) + { + Log.Verbose("SpeakClient", "ENTER"); + + options ??= new DeepgramWsClientOptions(apiKey); + _deepgramClientOptions = options; + + Log.Debug("SpeakClient", $"APIVersion: {options.APIVersion}"); + Log.Debug("SpeakClient", $"BaseAddress: {options.BaseAddress}"); + Log.Debug("SpeakClient", $"options: {options.OnPrem}"); + Log.Debug("LiveClient", $"Autoflush: {options.AutoFlushSpeakDelta}"); + Log.Verbose("SpeakClient", "LEAVE"); + } + + #region Event Handlers + /// + /// Fires when an event is received from the Deepgram API + /// + private event EventHandler? _openReceived; + private event EventHandler? _metadataReceived; + private event EventHandler? _flushedReceived; + private event EventHandler? _clearedReceived; + private event EventHandler? _audioReceived; + private event EventHandler? _closeReceived; + private event EventHandler? _unhandledReceived; + private event EventHandler? _warningReceived; + private event EventHandler? _errorReceived; + #endregion + + /// + /// Connect to a Deepgram API Web Socket to begin transcribing audio + /// + /// Options to use when transcribing audio + /// The task object representing the asynchronous operation. + public async Task Connect(SpeakSchema options, CancellationTokenSource? cancelToken = null, Dictionary? addons = null, + Dictionary? headers = null) + { + Log.Verbose("SpeakClient.Connect", "ENTER"); + Log.Information("Connect", $"options:\n{JsonSerializer.Serialize(options, JsonSerializeOptions.DefaultOptions)}"); + Log.Debug("Connect", $"addons: {addons}"); + + // check if the client is disposed + if (_clientWebSocket != null) + { + // client has already connected + var exStr = "Client has already been initialized"; + Log.Error("Connect", exStr); + Log.Verbose("SpeakClient.Connect", "LEAVE"); + throw new InvalidOperationException(exStr); + } + + if (cancelToken == null) + { + Log.Information("Connect", "Using default connect cancellation token"); + cancelToken = new CancellationTokenSource(Constants.DefaultConnectTimeout); + } + + // create client + _clientWebSocket = new ClientWebSocket(); + + // set headers + _clientWebSocket.Options.SetRequestHeader("Authorization", $"token {_deepgramClientOptions.ApiKey}"); + if (_deepgramClientOptions.Headers is not null) + { + foreach (var header in _deepgramClientOptions.Headers) + { + var tmp = header.Key.ToLower(); + if (!(tmp.Contains("password") || tmp.Contains("token") || tmp.Contains("authorization") || tmp.Contains("auth"))) + { + Log.Debug("PutAsync", $"Add Header {header.Key}={header.Value}"); + } + _clientWebSocket.Options.SetRequestHeader(header.Key, header.Value); + } + } + if (headers is not null) + { + foreach (var header in headers) + { + var tmp = header.Key.ToLower(); + if (!(tmp.Contains("password") || tmp.Contains("token") || tmp.Contains("authorization") || tmp.Contains("auth"))) + { + Log.Debug("PutAsync", $"Add Header {header.Key}={header.Value}"); + } + _clientWebSocket.Options.SetRequestHeader(header.Key, header.Value); + } + } + + // internal cancelation token for internal threads + _cancellationTokenSource = new CancellationTokenSource(); + + try + { + var _uri = GetUri(_deepgramClientOptions, options, addons); + Log.Debug("Connect", $"uri: {_uri}"); + + Log.Debug("Connect", "Connecting to Deepgram API..."); + await _clientWebSocket.ConnectAsync(_uri, cancelToken.Token).ConfigureAwait(false); + + Log.Debug("Connect", "Starting Sender Thread..."); + StartSenderBackgroundThread(); + + Log.Debug("Connect", "Starting Receiver Thread..."); + StartReceiverBackgroundThread(); + + if (_deepgramClientOptions.AutoFlushSpeakDelta > 0) + { + Log.Debug("Connect", "Starting AutoFlush Thread..."); + StartAutoFlushBackgroundThread(); + } + + // send a OpenResponse event + if (_openReceived != null) + { + Log.Debug("Connect", "Sending OpenResponse event..."); + var data = new OpenResponse(); + data.Type = SpeakType.Open; + _openReceived.Invoke(null, data); + } + + Log.Debug("Connect", "Connect Succeeded"); + Log.Verbose("SpeakClient.Connect", "LEAVE"); + } + catch (TaskCanceledException ex) + { + Log.Debug("Connect", "Connect cancelled."); + Log.Verbose("Connect", $"Connect cancelled. Info: {ex}"); + Log.Verbose("SpeakClient.Connect", "LEAVE"); + } + catch (Exception ex) + { + Log.Error("Connect", $"{ex.GetType()} thrown {ex.Message}"); + Log.Verbose("Connect", $"Excepton: {ex}"); + Log.Verbose("SpeakClient.Connect", "LEAVE"); + throw; + } + + void StartSenderBackgroundThread() => _ = Task.Factory.StartNew( + _ => ProcessSendQueue(), + TaskCreationOptions.LongRunning); + + void StartReceiverBackgroundThread() => _ = Task.Factory.StartNew( + _ => ProcessReceiveQueue(), + TaskCreationOptions.LongRunning); + + void StartAutoFlushBackgroundThread() => _ = Task.Factory.StartNew( + _ => ProcessAutoFlush(), + TaskCreationOptions.LongRunning); + } + + #region Subscribe Event + /// + /// Subscribe to an Open event from the Deepgram API + /// + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler) + { + lock (_mutexSubscribe) + { + _openReceived += (sender, e) => eventHandler(sender, e); + } + + return true; + } + + /// + /// Subscribe to a Metadata event from the Deepgram API + /// + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler) + { + lock (_mutexSubscribe) + { + _metadataReceived += (sender, e) => eventHandler(sender, e); + } + return true; + } + + /// + /// Subscribe to a Flushed event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler) + { + lock (_mutexSubscribe) + { + _flushedReceived += (sender, e) => eventHandler(sender, e); + } + return true; + } + + /// + /// Subscribe to a Cleared event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler) + { + lock (_mutexSubscribe) + { + _clearedReceived += (sender, e) => eventHandler(sender, e); + } + return true; + } + + /// + /// Subscribe to an Audio event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler) + { + lock (_mutexSubscribe) + { + _audioReceived += (sender, e) => eventHandler(sender, e); + } + return true; + } + + /// + /// Subscribe to a Close event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler) + { + lock (_mutexSubscribe) + { + _closeReceived += (sender, e) => eventHandler(sender, e); + } + return true; + } + + /// + /// Subscribe to an Unhandled event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler) + { + lock (_mutexSubscribe) + { + _unhandledReceived += (sender, e) => eventHandler(sender, e); + } + return true; + } + + /// + /// Subscribe to an Warning event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler) + { + lock (_mutexSubscribe) + { + _warningReceived += (sender, e) => eventHandler(sender, e); + } + return true; + } + + /// + /// Subscribe to an Error event from the Deepgram API + /// + /// True if successful + public bool Subscribe(EventHandler eventHandler) + { + lock (_mutexSubscribe) + { + _errorReceived += (sender, e) => eventHandler(sender, e); + } + return true; + } + #endregion + + #region Send Functions + /// + /// This method sends a string to Deepgram for conversion to audio. + /// This is a convenience functions that will wrap the provided string in a TextSource object. + /// + /// The string of text you want to be converted to audio. + public void SpeakWithText(string text) + { + TextSource textSource = new TextSource(text); + byte[] byteArray = Encoding.UTF8.GetBytes(textSource.ToString()); + Send(byteArray); + } + + /// + /// This method Flushes the text buffer on Deepgram to be converted to audio. + /// + public void Flush() + { + ControlMessage controlMessage = new ControlMessage(Constants.Flush); + byte[] byteArray = Encoding.UTF8.GetBytes(controlMessage.ToString()); + Send(byteArray); + } + + /// + /// This method Clears the text buffer on Deepgram to be converted to audio + /// + public void Clear() + { + ControlMessage controlMessage = new ControlMessage(Constants.Clear); + byte[] byteArray = Encoding.UTF8.GetBytes(controlMessage.ToString()); + Send(byteArray); + } + + /// + /// This method tells Deepgram to initiate the close server-side. + /// + public void Close() + { + ControlMessage controlMessage = new ControlMessage(Constants.Close); + byte[] byteArray = Encoding.UTF8.GetBytes(controlMessage.ToString()); + Send(byteArray); + } + + /// + /// Sends a binary message over the WebSocket connection. + /// + /// The data to be sent over the WebSocket. + public void Send(byte[] data) => SendMessage(data); + + ///// + ///// This method sends a binary message over the WebSocket connection. + ///// Currently, this method has no use. + ///// + ///// + //public void SendBinary(byte[] data) => + // EnqueueSendMessage(new WebSocketMessage(data, WebSocketMessageType.Binary)); + + /// + /// This method sends a text message over the WebSocket connection. + /// + public void SendMessage(byte[] data) + { + // auto flush + if (_deepgramClientOptions.InspectSpeakMessage()) + { + string type = GetMessageType(data); + Log.Debug("SendMessage", $"Inspecting Message: Sending {type}"); + switch (type) + { + case Constants.Flush: + lock (_mutexLastDatagram) + { + _flushCount += 1; + Log.Debug("SendMessage", $"Increment Flush count: {_flushCount}"); + } + break; + case Constants.Speak: + InspectMessage(); + break; + } + } + + // send message + EnqueueSendMessage(new WebSocketMessage(data, WebSocketMessageType.Text)); + } + ///// + ///// This method sends a binary message over the WebSocket connection immediately without queueing. + ///// Currently, this method has no use. + ///// + //public void SendBinaryImmediately(byte[] data) + //{ + // lock (_mutexSend) + // { + // Log.Verbose("SendBinaryImmediately", "Sending binary message immediately.."); // TODO: dump this message + // _clientWebSocket.SendAsync(new ArraySegment(data), WebSocketMessageType.Binary, true, _cancellationTokenSource.Token) + // .ConfigureAwait(false); + // } + //} + + /// + /// This method sends a text message over the WebSocket connection immediately without queueing. + /// + public void SendMessageImmediately(byte[] data) + { + // auto flush + if (_deepgramClientOptions.InspectSpeakMessage()) + { + string type = GetMessageType(data); + Log.Debug("SendMessage", $"Inspecting Message: Sending {type}"); + switch (type) + { + case Constants.Flush: + lock (_mutexLastDatagram) + { + _flushCount += 1; + Log.Debug("SendMessage", $"Increment Flush count: {_flushCount}"); + } + break; + case Constants.Speak: + InspectMessage(); + break; + } + } + + lock (_mutexSend) + { + Log.Verbose("SendBinaryImmediately", "Sending text message immediately.."); // TODO: dump this message + _clientWebSocket.SendAsync(new ArraySegment(data), WebSocketMessageType.Text, true, _cancellationTokenSource.Token) + .ConfigureAwait(false); + } + } + #endregion + + internal void EnqueueSendMessage(WebSocketMessage message) + { + try + { + _sendChannel.Writer.TryWrite(message); + } + catch (Exception ex) + { + Log.Error("EnqueueSendMessage", $"{ex.GetType()} thrown {ex.Message}"); + Log.Verbose("EnqueueSendMessage", $"Excepton: {ex}"); + } + } + + internal async Task ProcessSendQueue() + { + Log.Verbose("SpeakClient.ProcessSendQueue", "ENTER"); + + if (_clientWebSocket == null) + { + var exStr = "Attempting to start a sender queue when the WebSocket has been disposed is not allowed."; + Log.Error("EnqueueSendMessage", exStr); + Log.Verbose("ProcessSendQueue", "LEAVE"); + + throw new InvalidOperationException(exStr); + } + + try + { + while (await _sendChannel.Reader.WaitToReadAsync(_cancellationTokenSource.Token)) + { + if (_cancellationTokenSource.Token.IsCancellationRequested) + { + Log.Information("ProcessSendQueue", "ProcessSendQueue cancelled"); + break; + } + + Log.Verbose("ProcessSendQueue", "Reading message of queue..."); + while (_sendChannel.Reader.TryRead(out var message)) + { + // TODO: Add logging for message capturing for possible playback + Log.Verbose("ProcessSendQueue", "Sending message..."); + lock (_mutexSend) + { + _clientWebSocket.SendAsync(message.Message, message.MessageType, true, _cancellationTokenSource.Token) + .ConfigureAwait(false); + } + } + } + + Log.Verbose("ProcessSendQueue", "Exit"); + Log.Verbose("SpeakClient.ProcessSendQueue", "LEAVE"); + } + catch (OperationCanceledException ex) + { + Log.Debug("ProcessSendQueue", "SendThread cancelled."); + Log.Verbose("ProcessSendQueue", $"SendThread cancelled. Info: {ex}"); + Log.Verbose("SpeakClient.ProcessSendQueue", "LEAVE"); + } + catch (Exception ex) + { + Log.Error("ProcessSendQueue", $"{ex.GetType()} thrown {ex.Message}"); + Log.Verbose("ProcessSendQueue", $"Excepton: {ex}"); + Log.Verbose("SpeakClient.ProcessSendQueue", "LEAVE"); + } + } + + internal async void ProcessAutoFlush() + { + Log.Verbose("LiveClient.ProcessAutoFlush", "ENTER"); + + var diffTicks = TimeSpan.FromMilliseconds((double)_deepgramClientOptions.AutoFlushSpeakDelta); + + try + { + while (true) + { + Log.Verbose("ProcessAutoFlush", "Waiting for AutoFlush..."); + await Task.Delay(Constants.DefaultFlushPeriodInMs, _cancellationTokenSource.Token); + + if (_cancellationTokenSource.Token.IsCancellationRequested) + { + Log.Information("ProcessAutoFlush", "ProcessAutoFlush cancelled"); + break; + } + + lock (_mutexLastDatagram) + { + if (_lastReceived == null) + { + Log.Debug("ProcessAutoFlush", "No datagram received. Skipping..."); + continue; + } + + var deltaTicks = DateTime.Now - _lastReceived; + if (deltaTicks < diffTicks) + { + Log.Debug("ProcessAutoFlush", $"AutoFlush delta is less than threshold: {deltaTicks}. Skipping..."); + continue; + } + + Log.Debug("ProcessAutoFlush", $"AutoFlush delta exceeded threshold: {deltaTicks}. Skipping..."); + Flush(); + _lastReceived = null; + } + } + + Log.Verbose("ProcessAutoFlush", "Exit"); + Log.Verbose("LiveClient.ProcessAutoFlush", "LEAVE"); + } + catch (TaskCanceledException ex) + { + Log.Debug("ProcessAutoFlush", "KeepAliveThread cancelled."); + Log.Verbose("ProcessAutoFlush", $"KeepAliveThread cancelled. Info: {ex}"); + Log.Verbose("LiveClient.ProcessAutoFlush", "LEAVE"); + } + catch (Exception ex) + { + Log.Error("ProcessAutoFlush", $"{ex.GetType()} thrown {ex.Message}"); + Log.Verbose("ProcessAutoFlush", $"Excepton: {ex}"); + Log.Verbose("LiveClient.ProcessAutoFlush", "LEAVE"); + } + } + + internal async Task ProcessReceiveQueue() + { + Log.Verbose("SpeakClient.ProcessReceiveQueue", "ENTER"); + + while (_clientWebSocket?.State == WebSocketState.Open) + { + try + { + if (_cancellationTokenSource.Token.IsCancellationRequested) + { + Log.Information("ProcessReceiveQueue", "ReceiveThread cancelled"); + await Stop(); + Log.Verbose("ProcessReceiveQueue", "LEAVE"); + return; + } + + var buffer = new ArraySegment(new byte[Constants.BufferSize]); + WebSocketReceiveResult result; + + using (var ms = new MemoryStream()) + { + do + { + // get the result of the receive operation + result = await _clientWebSocket.ReceiveAsync(buffer, _cancellationTokenSource.Token); + + ms.Write( + buffer.Array ?? throw new InvalidOperationException("buffer cannot be null"), + buffer.Offset, + result.Count + ); + } while (!result.EndOfMessage); + + if (result.MessageType != WebSocketMessageType.Close) + { + Log.Verbose("ProcessReceiveQueue", $"Received message: {result} / {ms}"); + ProcessDataReceived(result, ms); + } + } + + if (result.MessageType == WebSocketMessageType.Close) + { + Log.Information("ProcessReceiveQueue", "Received WebSocket Close. Trigger cancel..."); + await Stop(); + Log.Verbose("ProcessReceiveQueue", "LEAVE"); + return; + } + } + catch (TaskCanceledException ex) + { + Log.Debug("ProcessReceiveQueue", "ReceiveThread cancelled."); + Log.Verbose("ProcessReceiveQueue", $"ReceiveThread cancelled. Info: {ex}"); + Log.Verbose("SpeakClient.ProcessReceiveQueue", "LEAVE"); + } + catch (Exception ex) + { + Log.Error("ProcessReceiveQueue", $"{ex.GetType()} thrown {ex.Message}"); + Log.Verbose("ProcessReceiveQueue", $"Excepton: {ex}"); + Log.Verbose("SpeakClient.ProcessReceiveQueue", "LEAVE"); + } + } + } + + internal void ProcessDataReceived(WebSocketReceiveResult result, MemoryStream ms) + { + Log.Verbose("SpeakClient.ProcessDataReceived", "ENTER"); + + ms.Seek(0, SeekOrigin.Begin); + + try + { + if (result.MessageType == WebSocketMessageType.Binary) + { + Log.Debug("ProcessDataReceived", "Received WebSocketMessageType.Binary"); + + if (_audioReceived == null) + { + Log.Debug("ProcessDataReceived", "_audioReceived has no listeners"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + + var audioResponse = new AudioResponse() + { + Stream = ms + }; + + Log.Debug("ProcessDataReceived", "Invoking AudioResponse"); + InvokeParallel(_audioReceived, audioResponse); + + } + else if (result.MessageType == WebSocketMessageType.Text) + { + Log.Debug("ProcessDataReceived", "Received WebSocketMessageType.Text"); + + var response = Encoding.UTF8.GetString(ms.ToArray()); + if (response == null) + { + Log.Warning("ProcessDataReceived", "Response is null"); + Log.Verbose("SpeakClient.ProcessDataReceived", "LEAVE"); + return; + } + + Log.Verbose("ProcessDataReceived", $"raw response: {response}"); + var data = JsonDocument.Parse(response); + var val = Enum.Parse(typeof(SpeakType), data.RootElement.GetProperty("type").GetString()!); + + Log.Verbose("ProcessDataReceived", $"Type: {val}"); + + switch (val) + { + case SpeakType.Open: + var openResponse = data.Deserialize(); + if (_openReceived == null) + { + Log.Debug("ProcessDataReceived", "_openReceived has no listeners"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + if (openResponse == null) + { + Log.Warning("ProcessDataReceived", "OpenResponse is invalid"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + + Log.Debug("ProcessDataReceived", $"Invoking OpenResponse. event: {openResponse}"); + InvokeParallel(_openReceived, openResponse); + break; + case SpeakType.Metadata: + var metadataResponse = data.Deserialize(); + if (_metadataReceived == null) + { + Log.Debug("ProcessDataReceived", "_metadataReceived has no listeners"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + if (metadataResponse == null) + { + Log.Warning("ProcessDataReceived", "MetadataResponse is invalid"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + + Log.Debug("ProcessDataReceived", $"Invoking MetadataResponse. event: {metadataResponse}"); + InvokeParallel(_metadataReceived, metadataResponse); + break; + case SpeakType.Flushed: + var flushedResponse = data.Deserialize(); + if (_flushedReceived == null) + { + Log.Debug("ProcessDataReceived", "_flushedReceived has no listeners"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + if (flushedResponse == null) + { + Log.Warning("ProcessDataReceived", "FlushedResponse is invalid"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + + // auto flush + if (_deepgramClientOptions.InspectSpeakMessage()) + { + lock (_mutexLastDatagram) + { + _flushCount -= 1; + Log.Debug("ProcessDataReceived", $"Decrement Flush count: {_flushCount}"); + } + } + + Log.Debug("ProcessDataReceived", $"Invoking FlushedResponse. event: {flushedResponse}"); + InvokeParallel(_flushedReceived, flushedResponse); + break; + case SpeakType.Cleared: + var clearResponse = data.Deserialize(); + if (_clearedReceived == null) + { + Log.Debug("ProcessDataReceived", "_clearedReceived has no listeners"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + if (clearResponse == null) + { + Log.Warning("ProcessDataReceived", "ClearedResponse is invalid"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + + Log.Debug("ProcessDataReceived", $"Invoking ClearedResponse. event: {clearResponse}"); + InvokeParallel(_clearedReceived, clearResponse); + break; + case SpeakType.Close: + var closeResponse = data.Deserialize(); + if (_closeReceived == null) + { + Log.Debug("ProcessDataReceived", "_closeReceived has no listeners"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + if (closeResponse == null) + { + Log.Warning("ProcessDataReceived", "CloseResponse is invalid"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + + Log.Debug("ProcessDataReceived", $"Invoking CloseResponse. event: {closeResponse}"); + InvokeParallel(_closeReceived, closeResponse); + break; + case SpeakType.Warning: + var warningResponse = data.Deserialize(); + if (_warningReceived == null) + { + Log.Debug("ProcessDataReceived", "_warningReceived has no listeners"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + if (warningResponse == null) + { + Log.Warning("ProcessDataReceived", "WarningResponse is invalid"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + + Log.Debug("ProcessDataReceived", $"Invoking WarningResponse. event: {warningResponse}"); + InvokeParallel(_warningReceived, warningResponse); + break; + case SpeakType.Error: + var errorResponse = data.Deserialize(); + if (_errorReceived == null) + { + Log.Debug("ProcessDataReceived", "_errorReceived has no listeners"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + if (errorResponse == null) + { + Log.Warning("ProcessDataReceived", "ErrorResponse is invalid"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + + Log.Debug("ProcessDataReceived", $"Invoking ErrorResponse. event: {errorResponse}"); + InvokeParallel(_errorReceived, errorResponse); + break; + default: + if (_unhandledReceived == null) + { + Log.Debug("ProcessDataReceived", "_unhandledReceived has no listeners"); + Log.Verbose("ProcessDataReceived", "LEAVE"); + return; + } + + var unhandledResponse = new UnhandledResponse(); + unhandledResponse.Type = SpeakType.Unhandled; + unhandledResponse.Raw = response; + + Log.Debug("ProcessDataReceived", $"Invoking UnhandledResponse. event: {unhandledResponse}"); + InvokeParallel(_unhandledReceived, unhandledResponse); + break; + } + } + else + { + Log.Error("ProcessDataReceived", $"Received WebSocketMessageType.{result.MessageType.ToString()}"); + Log.Error("ProcessDataReceived", $"Data: {ms.ToString()}"); + } + + Log.Debug("ProcessDataReceived", "Succeeded"); + Log.Verbose("SpeakClient.ProcessDataReceived", "LEAVE"); + } + catch (JsonException ex) + { + Log.Error("ProcessDataReceived", $"{ex.GetType()} thrown {ex.Message}"); + Log.Verbose("ProcessDataReceived", $"Excepton: {ex}"); + Log.Verbose("SpeakClient.ProcessDataReceived", "LEAVE"); + } + catch (Exception ex) + { + Log.Error("ProcessDataReceived", $"{ex.GetType()} thrown {ex.Message}"); + Log.Verbose("ProcessDataReceived", $"Excepton: {ex}"); + Log.Verbose("SpeakClient.ProcessDataReceived", "LEAVE"); + } + } + + /// + /// Closes the Web Socket connection to the Deepgram API + /// + /// The task object representing the asynchronous operation. + public async Task Stop(CancellationTokenSource? cancelToken = null) + { + Log.Verbose("SpeakClient.Stop", "ENTER"); + + // client is already disposed + if (_clientWebSocket == null) + { + Log.Information("Stop", "Client has already been disposed"); + Log.Verbose("SpeakClient.Stop", "LEAVE"); + return; + } + + if (cancelToken == null) + { + Log.Information("Stop", "Using default disconnect cancellation token"); + cancelToken = new CancellationTokenSource(Constants.DefaultDisconnectTimeout); + } + + try + { + // cancel the internal token to stop all threads + if (_cancellationTokenSource != null) + { + Log.Debug("Stop", "Cancelling native token..."); + _cancellationTokenSource.Cancel(); + } + + // if websocket is open, send a close message + if (_clientWebSocket!.State == WebSocketState.Open) + { + Log.Debug("Stop", "Sending Close message..."); + // send a close to Deepgram + lock (_mutexSend) + { + _clientWebSocket.SendAsync(new ArraySegment([0]), WebSocketMessageType.Binary, true, cancelToken.Token) + .ConfigureAwait(false); + } + } + + // send a CloseResponse event + if (_closeReceived != null) + { + Log.Debug("Stop", "Sending CloseResponse event..."); + var data = new CloseResponse(); + data.Type = SpeakType.Close; + InvokeParallel(_closeReceived, data); + } + + // attempt to stop the connection + if (_clientWebSocket!.State != WebSocketState.Closed && _clientWebSocket!.State != WebSocketState.Aborted) + { + Log.Debug("Stop", "Closing WebSocket connection..."); + await _clientWebSocket.CloseOutputAsync(WebSocketCloseStatus.NormalClosure, string.Empty, cancelToken.Token) + .ConfigureAwait(false); + } + + // clean up internal token + if (_cancellationTokenSource != null) + { + Log.Debug("Stop", "Disposing internal token..."); + _cancellationTokenSource.Dispose(); + _cancellationTokenSource = null; + } + + // release the socket + Log.Debug("Stop", "Disposing WebSocket socket..."); + _clientWebSocket = null; + + Log.Debug("Stop", "Succeeded"); + Log.Verbose("SpeakClient.Stop", "LEAVE"); + } + catch (TaskCanceledException ex) + { + Log.Debug("Stop", "Stop cancelled."); + Log.Verbose("Stop", $"Stop cancelled. Info: {ex}"); + Log.Verbose("SpeakClient.Stop", "LEAVE"); + } + catch (Exception ex) + { + Log.Error("Stop", $"{ex.GetType()} thrown {ex.Message}"); + Log.Verbose("Stop", $"Excepton: {ex}"); + Log.Verbose("SpeakClient.Stop", "LEAVE"); + throw; + } + } + + #region Helpers + /// + /// Retrieves the connection state of the WebSocket + /// + /// Returns the connection state of the WebSocket + public WebSocketState State() + { + if (_clientWebSocket == null) + { + return WebSocketState.None; + } + return _clientWebSocket.State; + } + + /// + /// Indicates whether the WebSocket is connected + /// + /// Returns true if the WebSocket is connected + public bool IsConnected() + { + if (_clientWebSocket == null) + { + return false; + } + + return _clientWebSocket.State == WebSocketState.Open; + } + + /// + /// Handle channel options + /// + internal readonly Channel _sendChannel = System.Threading.Channels.Channel + .CreateUnbounded(new UnboundedChannelOptions { SingleReader = true, SingleWriter = true, }); + + /// + /// Get the URI for the WebSocket connection + /// + internal static Uri GetUri(IDeepgramClientOptions options, SpeakSchema parameter, Dictionary? addons = null) + { + var propertyInfoList = parameter.GetType() + .GetProperties() + .Where(v => v.GetValue(parameter) is not null); + + var queryString = QueryParameterUtil.UrlEncode(parameter, propertyInfoList, addons); + + return new Uri($"{options.BaseAddress}/{UriSegments.SPEAK}?{queryString}"); + } + + internal void InvokeParallel(EventHandler eventHandler, T e) + { + if (eventHandler != null) + { + try + { + Parallel.ForEach( + eventHandler.GetInvocationList().Cast>(), + (handler) => + handler(null, e)); + } + catch (AggregateException ae) + { + Log.Error("InvokeParallel", $"AggregateException occurred in one or more event handlers: {ae}"); + } + catch (Exception ex) + { + Log.Error("InvokeParallel", $"Exception occurred in event handler: {ex}"); + } + } + } + + private void InspectMessage() + { + Log.Verbose("InspectMessage", "ENTER"); + + if (_deepgramClientOptions.AutoFlushSpeakDelta > 0) + { + var now = DateTime.Now; + Log.Debug("InspectMessage", $"AutoFlush last received. Time: {now}"); + lock (_mutexLastDatagram) + { + _lastReceived = now; + } + } + + Log.Debug("InspectMessage", "Succeeded"); + Log.Verbose("InspectMessage", "LEAVE"); + } + #endregion + + #region Dispose + /// + /// Disposes of the resources used by the client + /// + public void Dispose() + { + if (_clientWebSocket == null) + { + return; + } + + if (_cancellationTokenSource != null) + { + if (!_cancellationTokenSource.Token.IsCancellationRequested) + { + _cancellationTokenSource.Cancel(); + } + _cancellationTokenSource.Dispose(); + _cancellationTokenSource = null; + } + + if (_sendChannel != null) + { + _sendChannel.Writer.Complete(); + } + + if (_clientWebSocket != null) + { + _clientWebSocket.Dispose(); + _clientWebSocket = null; + } + + GC.SuppressFinalize(this); + } + + internal string GetMessageType(byte[] msg) + { + // Convert the byte array to a string + string response = Encoding.UTF8.GetString(msg); + if (response == null) + { + return ""; + } + + Log.Verbose("ProcessDataReceived", $"raw response: {response}"); + var data = JsonDocument.Parse(response); + + string val = data.RootElement.GetProperty("type").GetString() ?? ""; + Log.Debug("ProcessDataReceived", $"Type: {val}"); + + return val; + } +} +#endregion diff --git a/Deepgram/Clients/Speak/v1/WebSocket/Constants.cs b/Deepgram/Clients/Speak/v1/WebSocket/Constants.cs new file mode 100644 index 00000000..a65f309e --- /dev/null +++ b/Deepgram/Clients/Speak/v1/WebSocket/Constants.cs @@ -0,0 +1,27 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Clients.Speak.v1.WebSocket; + +/// +/// Headers of interest in the return values from the Deepgram Speak API. +/// +public static class Constants +{ + // WS buffer size + public const int BufferSize = 1024 * 16; + + // Default timeout for connect/disconnect + public const int DefaultConnectTimeout = 5000; + public const int DefaultDisconnectTimeout = 5000; + + public const int DefaultFlushPeriodInMs = 500; + + // user message types + public const string Speak = "Speak"; + public const string Flush = "Flush"; + public const string Clear = "Clear"; + public const string Close = "Close"; +} + diff --git a/Deepgram/Clients/Speak/v1/WebSocket/ResponseEvent.cs b/Deepgram/Clients/Speak/v1/WebSocket/ResponseEvent.cs new file mode 100644 index 00000000..01dbcc7a --- /dev/null +++ b/Deepgram/Clients/Speak/v1/WebSocket/ResponseEvent.cs @@ -0,0 +1,11 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Clients.Speak.v1.WebSocket; + +public class ResponseEvent(T? response) : EventArgs +{ + public T? Response { get; set; } = response; +} + diff --git a/Deepgram/Clients/Speak/v1/WebSocket/UriSegments.cs b/Deepgram/Clients/Speak/v1/WebSocket/UriSegments.cs new file mode 100644 index 00000000..dd711a67 --- /dev/null +++ b/Deepgram/Clients/Speak/v1/WebSocket/UriSegments.cs @@ -0,0 +1,12 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Clients.Speak.v1.WebSocket; + +public static class UriSegments +{ + //using constants instead of inline value(magic strings) make consistence + //across SDK And Test Projects Simpler and Easier to change + public const string SPEAK = "speak"; +} diff --git a/Deepgram/Clients/Speak/v1/WebSocket/WebSocketMessage.cs b/Deepgram/Clients/Speak/v1/WebSocket/WebSocketMessage.cs new file mode 100644 index 00000000..51fa8fbe --- /dev/null +++ b/Deepgram/Clients/Speak/v1/WebSocket/WebSocketMessage.cs @@ -0,0 +1,12 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Clients.Speak.v1.WebSocket; + +internal readonly struct WebSocketMessage(byte[] message, WebSocketMessageType type) +{ + public ArraySegment Message { get; } = new ArraySegment(message); + + public WebSocketMessageType MessageType { get; } = type; +} diff --git a/Deepgram/Models/Authenticate/v1/Constants.cs b/Deepgram/Models/Authenticate/v1/Constants.cs new file mode 100644 index 00000000..361a3c51 --- /dev/null +++ b/Deepgram/Models/Authenticate/v1/Constants.cs @@ -0,0 +1,15 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Authenticate.v1; + +/// +/// Headers of interest in the return values from the Deepgram Speak API. +/// +public static class Constants +{ + public const string AutoFlushReplyDelta = "auto_flush_reply_delta"; + public const string AutoFlushSpeakDelta = "auto_flush_speak_delta"; +} + diff --git a/Deepgram/Models/Authenticate/v1/DeepgramHttpClientOptions.cs b/Deepgram/Models/Authenticate/v1/DeepgramHttpClientOptions.cs index 5389417a..00eff659 100644 --- a/Deepgram/Models/Authenticate/v1/DeepgramHttpClientOptions.cs +++ b/Deepgram/Models/Authenticate/v1/DeepgramHttpClientOptions.cs @@ -33,6 +33,11 @@ public class DeepgramHttpClientOptions : IDeepgramClientOptions /// public Dictionary Headers { get; set; } + /// + /// Global addons to always be added to the request + /// + public Dictionary Addons { get; set; } + /*****************************/ // Prerecorded /*****************************/ @@ -46,12 +51,20 @@ public class DeepgramHttpClientOptions : IDeepgramClientOptions /// /// Based on the options set, do we want to inspect the Messages. If yes, then return true. /// - public bool InspectMessage() + public bool InspectListenMessage() { // This is only a WebSocket capability return false; } + /// + /// Based on the options set, do we want to inspect the Speak Messages. If yes, then return true. + /// + public bool InspectSpeakMessage() + { + return false; + } + /*****************************/ // OnPrem /*****************************/ @@ -72,10 +85,12 @@ public bool InspectMessage() // Speak /*****************************/ + public decimal AutoFlushSpeakDelta { get; } + /*****************************/ // Constructor /*****************************/ - public DeepgramHttpClientOptions(string? apiKey = null, string? baseAddress = null, bool? onPrem = null, Dictionary? headers = null) + public DeepgramHttpClientOptions(string? apiKey = null, string? baseAddress = null, bool? onPrem = null, Dictionary? options = null, Dictionary? headers = null) { Log.Verbose("DeepgramHttpClientOptions", "ENTER"); Log.Debug("DeepgramHttpClientOptions", apiKey == null ? "API KEY is null" : "API KEY provided"); @@ -87,6 +102,7 @@ public DeepgramHttpClientOptions(string? apiKey = null, string? baseAddress = nu ApiKey = apiKey ?? ""; BaseAddress = baseAddress ?? Defaults.DEFAULT_URI; OnPrem = onPrem ?? false; + Addons = headers ?? new Dictionary(); Headers = headers ?? new Dictionary(); Log.Information("DeepgramHttpClientOptions", $"OnPrem: {OnPrem}"); diff --git a/Deepgram/Models/Authenticate/v1/DeepgramOptionsFromEnv.cs b/Deepgram/Models/Authenticate/v1/DeepgramOptionsFromEnv.cs index c0055541..77ad1072 100644 --- a/Deepgram/Models/Authenticate/v1/DeepgramOptionsFromEnv.cs +++ b/Deepgram/Models/Authenticate/v1/DeepgramOptionsFromEnv.cs @@ -30,6 +30,11 @@ public class DeepgramOptionsFromEnv : IDeepgramClientOptions /// public Dictionary Headers { get; set; } + /// + /// Global addons to always be added to the request + /// + public Dictionary Addons { get; set; } + /*****************************/ // Prerecorded /*****************************/ @@ -43,18 +48,26 @@ public class DeepgramOptionsFromEnv : IDeepgramClientOptions public bool KeepAlive { get; set; } = false; /// - /// Enable sending KeepAlives for Streaming + /// Enable sending KeepAlives for Listen Streaming /// public decimal AutoFlushReplyDelta { get; set; } = 0; /// - /// Based on the options set, do we want to inspect the Messages. If yes, then return true. + /// Based on the options set, do we want to inspect the Listen Messages. If yes, then return true. /// - public bool InspectMessage() + public bool InspectListenMessage() { return AutoFlushReplyDelta > 0; } + /// + /// Based on the options set, do we want to inspect the Speak Messages. If yes, then return true. + /// + public bool InspectSpeakMessage() + { + return AutoFlushSpeakDelta > 0; + } + /*****************************/ // OnPrem /*****************************/ @@ -75,6 +88,11 @@ public bool InspectMessage() // Speak /*****************************/ + /// + /// Enable sending Flush for Speak Streaming + /// + public decimal AutoFlushSpeakDelta { get; set; } = 0; + /*****************************/ // Constructor /*****************************/ @@ -87,14 +105,30 @@ public DeepgramOptionsFromEnv() var onPrem = Environment.GetEnvironmentVariable("DEEPGRAM_ON_PREM") ?? ""; var keepAlive = Environment.GetEnvironmentVariable("DEEPGRAM_KEEP_ALIVE") ?? ""; var autoFlushReplyDelta = Environment.GetEnvironmentVariable("DEEPGRAM_WEBSOCKET_AUTO_FLUSH") ?? ""; + var autoFlushSpeakDelta = Environment.GetEnvironmentVariable("DEEPGRAM_WEBSOCKET_AUTO_FLUSH") ?? ""; - Headers = new Dictionary(); + Addons = new Dictionary(); for (int x = 0; x < 20; x++) { var param = Environment.GetEnvironmentVariable($"DEEPGRAM_PARAM_{x}"); if (param != null) { var value = Environment.GetEnvironmentVariable($"DEEPGRAM_PARAM_VALUE_{x}") ?? ""; + Addons[param] = value; + } + else + { + break; + } + } + + Headers = new Dictionary(); + for (int x = 0; x < 20; x++) + { + var param = Environment.GetEnvironmentVariable($"DEEPGRAM_HEADERS_{x}"); + if (param != null) + { + var value = Environment.GetEnvironmentVariable($"DEEPGRAM_HEADERS_VALUE_{x}") ?? ""; Headers[param] = value; } else @@ -106,6 +140,26 @@ public DeepgramOptionsFromEnv() OnPrem = onPrem.ToLower() == "true"; KeepAlive = keepAlive.ToLower() == "true"; AutoFlushReplyDelta = Convert.ToDecimal(autoFlushReplyDelta); - } + AutoFlushSpeakDelta = Convert.ToDecimal(autoFlushSpeakDelta); + // addons + if (Addons.ContainsKey(Constants.AutoFlushReplyDelta)) + { + var addonValue = Addons[Constants.AutoFlushReplyDelta]; + if (decimal.TryParse(addonValue, out var parsedValue)) + { + Log.Verbose("DeepgramWsClientOptions", $"AutoFlushReplyDelta: {parsedValue}"); + AutoFlushReplyDelta = parsedValue; + } + } + if (Addons.ContainsKey(Constants.AutoFlushSpeakDelta)) + { + var addonValue = Addons[Constants.AutoFlushSpeakDelta]; + if (decimal.TryParse(addonValue, out var parsedValue)) + { + Log.Verbose("DeepgramWsClientOptions", $"AutoFlushSpeakDelta: {parsedValue}"); + AutoFlushSpeakDelta = parsedValue; + } + } + } } diff --git a/Deepgram/Models/Authenticate/v1/DeepgramWsClientOptions.cs b/Deepgram/Models/Authenticate/v1/DeepgramWsClientOptions.cs index 064efdbe..08a62fe9 100644 --- a/Deepgram/Models/Authenticate/v1/DeepgramWsClientOptions.cs +++ b/Deepgram/Models/Authenticate/v1/DeepgramWsClientOptions.cs @@ -33,6 +33,11 @@ public class DeepgramWsClientOptions : IDeepgramClientOptions /// public Dictionary Headers { get; set; } + /// + /// Global addons to always be added to the request + /// + public Dictionary Addons { get; set; } + /*****************************/ // Live /*****************************/ @@ -42,18 +47,26 @@ public class DeepgramWsClientOptions : IDeepgramClientOptions public bool KeepAlive { get; set; } = false; /// - /// Enable sending KeepAlives for Streaming + /// Enable sending KeepAlives for Listen Streaming /// public decimal AutoFlushReplyDelta { get; set; } = 0; /// - /// Based on the options set, do we want to inspect the Messages. If yes, then return true. + /// Based on the options set, do we want to inspect the Listen Messages. If yes, then return true. /// - public bool InspectMessage() + public bool InspectListenMessage() { return AutoFlushReplyDelta > 0; } + /// + /// Based on the options set, do we want to inspect the Speak Messages. If yes, then return true. + /// + public bool InspectSpeakMessage() + { + return AutoFlushSpeakDelta > 0; + } + /*****************************/ // OnPrem /*****************************/ @@ -66,22 +79,29 @@ public bool InspectMessage() // Speak /*****************************/ + /// + /// Enable sending Flush for Speak Streaming + /// + public decimal AutoFlushSpeakDelta { get; set; } = 0; + /*****************************/ // Constructor /*****************************/ - public DeepgramWsClientOptions(string? apiKey = null, string? baseAddress = null, bool? keepAlive = null, bool? onPrem = null, Dictionary? headers = null) + public DeepgramWsClientOptions(string? apiKey = null, string? baseAddress = null, bool? keepAlive = null, bool? onPrem = null, Dictionary? addons = null, Dictionary? headers = null) { Log.Verbose("DeepgramWsClientOptions", "ENTER"); Log.Debug("DeepgramWsClientOptions", apiKey == null ? "API KEY is null" : "API KEY provided"); Log.Debug("DeepgramWsClientOptions", baseAddress == null ? "BaseAddress is null" : "BaseAddress provided"); Log.Debug("DeepgramWsClientOptions", keepAlive == null ? "KeepAlive is null" : "KeepAlive provided"); Log.Debug("DeepgramWsClientOptions", onPrem == null ? "OnPrem is null" : "OnPrem provided"); - Log.Debug("DeepgramWsClientOptions", headers == null ? "Headers is null" : "Headers provided"); + Log.Debug("DeepgramWsClientOptions", headers == null ? "Addons is null" : "Addons provided"); + Log.Debug("DeepgramWsClientOptions", addons == null ? "Headers is null" : "Headers provided"); ApiKey = apiKey ?? ""; BaseAddress = baseAddress ?? Defaults.DEFAULT_URI; KeepAlive = keepAlive ?? false; OnPrem = onPrem ?? false; + Addons = addons ?? new Dictionary(); Headers = headers ?? new Dictionary(); Log.Information("DeepgramWsClientOptions", $"KeepAlive: {KeepAlive}"); @@ -138,6 +158,26 @@ public DeepgramWsClientOptions(string? apiKey = null, string? baseAddress = null } BaseAddress = BaseAddress.TrimEnd('/'); + // addons + if (Addons.ContainsKey(Constants.AutoFlushReplyDelta)) + { + var addonValue = Addons[Constants.AutoFlushReplyDelta]; + if (decimal.TryParse(addonValue, out var parsedValue)) + { + Log.Verbose("DeepgramWsClientOptions", $"AutoFlushReplyDelta: {parsedValue}"); + AutoFlushReplyDelta = parsedValue; + } + } + if (Addons.ContainsKey(Constants.AutoFlushSpeakDelta)) + { + var addonValue = Addons[Constants.AutoFlushSpeakDelta]; + if (decimal.TryParse(addonValue, out var parsedValue)) + { + Log.Verbose("DeepgramWsClientOptions", $"AutoFlushSpeakDelta: {parsedValue}"); + AutoFlushSpeakDelta = parsedValue; + } + } + Log.Information("DeepgramWsClientOptions", $"BaseAddress: {BaseAddress}"); Log.Verbose("DeepgramWsClientOptions", "LEAVE"); } diff --git a/Deepgram/Models/Authenticate/v1/IDeepgramOptions.cs b/Deepgram/Models/Authenticate/v1/IDeepgramOptions.cs index 451257f1..2d7dc825 100644 --- a/Deepgram/Models/Authenticate/v1/IDeepgramOptions.cs +++ b/Deepgram/Models/Authenticate/v1/IDeepgramOptions.cs @@ -30,6 +30,11 @@ public interface IDeepgramClientOptions /// public Dictionary Headers { get; } + /// + /// Global addons to always be added to the request + /// + public Dictionary Addons { get; } + /*****************************/ // Prerecorded /*****************************/ @@ -43,14 +48,19 @@ public interface IDeepgramClientOptions public bool KeepAlive { get; } /// - /// Enable sending KeepAlives for Streaming + /// Enable sending KeepAlives for Listen Streaming /// public decimal AutoFlushReplyDelta { get; } /// - /// Based on the options set, do we want to inspect the Messages. If yes, then return true. + /// Based on the options set, do we want to inspect the Listen Messages. If yes, then return true. + /// + public bool InspectListenMessage(); + + /// + /// Based on the options set, do we want to inspect the Speak Messages. If yes, then return true. /// - public bool InspectMessage(); + public bool InspectSpeakMessage(); /*****************************/ // OnPrem @@ -71,4 +81,9 @@ public interface IDeepgramClientOptions /*****************************/ // Speak /*****************************/ + + /// + /// Enable sending Flush for Speak Streaming + /// + public decimal AutoFlushSpeakDelta { get; } } diff --git a/Deepgram/Models/Speak/v1/WebSocket/AudioResponse.cs b/Deepgram/Models/Speak/v1/WebSocket/AudioResponse.cs new file mode 100644 index 00000000..f447050f --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/AudioResponse.cs @@ -0,0 +1,24 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public record AudioResponse +{ + /// + /// Open event type. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + [JsonConverter(typeof(JsonStringEnumConverter))] + public SpeakType? Type { get; set; } = SpeakType.Audio; + + /// + /// A stream of the audio file + /// + public MemoryStream? Stream { get; set; } + + // NOTE: There isn't a ToString() function because this will cause an odd Exception to be thrown: + // InvalidOperationException: "Timeouts are not supported on this stream." +} diff --git a/Deepgram/Models/Speak/v1/WebSocket/ClearedResponse.cs b/Deepgram/Models/Speak/v1/WebSocket/ClearedResponse.cs new file mode 100644 index 00000000..7f7bb982 --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/ClearedResponse.cs @@ -0,0 +1,31 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public record ClearedResponse +{ + /// + /// Clear event type. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + [JsonConverter(typeof(JsonStringEnumConverter))] + public SpeakType? Type { get; set; } = SpeakType.Cleared; + + /// + /// Sequence ID + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("sequence_id")] + public int? SequenceId { get; set; } + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} diff --git a/Deepgram/Models/Speak/v1/WebSocket/CloseResponse.cs b/Deepgram/Models/Speak/v1/WebSocket/CloseResponse.cs new file mode 100644 index 00000000..c149f286 --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/CloseResponse.cs @@ -0,0 +1,24 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public record CloseResponse +{ + /// + /// Close event type. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + [JsonConverter(typeof(JsonStringEnumConverter))] + public SpeakType? Type { get; set; } = SpeakType.Close; + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} diff --git a/Deepgram/Models/Speak/v1/WebSocket/ControlMessage.cs b/Deepgram/Models/Speak/v1/WebSocket/ControlMessage.cs new file mode 100644 index 00000000..7d67d049 --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/ControlMessage.cs @@ -0,0 +1,24 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public class ControlMessage(string text) +{ + /// + /// Text of the words to speak + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + public string? Type { get; set; } = text; + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} + diff --git a/Deepgram/Models/Speak/v1/WebSocket/ErrorResponse.cs b/Deepgram/Models/Speak/v1/WebSocket/ErrorResponse.cs new file mode 100644 index 00000000..ee1b602f --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/ErrorResponse.cs @@ -0,0 +1,45 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public record ErrorResponse +{ + /// + /// Error Description + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("description")] + public string? Description { get; set; } = ""; + + /// + /// Error Message + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("message")] + public string? Message { get; set; } = ""; + + /// + /// Error Variant + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("variant")] + public string? Variant { get; set; } = ""; + + /// + /// Error event type. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + [JsonConverter(typeof(JsonStringEnumConverter))] + public SpeakType? Type { get; set; } = SpeakType.Error; + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} diff --git a/Deepgram/Models/Speak/v1/WebSocket/FlushedResponse.cs b/Deepgram/Models/Speak/v1/WebSocket/FlushedResponse.cs new file mode 100644 index 00000000..91424532 --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/FlushedResponse.cs @@ -0,0 +1,31 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public record FlushedResponse +{ + /// + /// Flush event type. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + [JsonConverter(typeof(JsonStringEnumConverter))] + public SpeakType? Type { get; set; } = SpeakType.Flushed; + + /// + /// Sequence ID + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("sequence_id")] + public int? SequenceId { get; set; } + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} diff --git a/Deepgram/Models/Speak/v1/WebSocket/MetadataResponse.cs b/Deepgram/Models/Speak/v1/WebSocket/MetadataResponse.cs new file mode 100644 index 00000000..6e65cb64 --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/MetadataResponse.cs @@ -0,0 +1,31 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public record MetadataResponse +{ + /// + /// Request ID is a unique identifier for the request. It is useful for troubleshooting and support. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("request_id")] + public string? RequestId { get; set; } + + /// + /// Metadata event type. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + [JsonConverter(typeof(JsonStringEnumConverter))] + public SpeakType? Type { get; set; } = SpeakType.Metadata; + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} diff --git a/Deepgram/Models/Speak/v1/WebSocket/OpenResponse.cs b/Deepgram/Models/Speak/v1/WebSocket/OpenResponse.cs new file mode 100644 index 00000000..0484f36d --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/OpenResponse.cs @@ -0,0 +1,24 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public record OpenResponse +{ + /// + /// Open event type. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + [JsonConverter(typeof(JsonStringEnumConverter))] + public SpeakType? Type { get; set; } = SpeakType.Open; + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} diff --git a/Deepgram/Models/Speak/v1/WebSocket/SpeakSchema.cs b/Deepgram/Models/Speak/v1/WebSocket/SpeakSchema.cs new file mode 100644 index 00000000..53596393 --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/SpeakSchema.cs @@ -0,0 +1,56 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public class SpeakSchema +{ + /// + /// AI model used to process submitted audio + /// + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("model")] + public string? Model { get; set; } = "aura-asteria-en"; + + /// + /// Bit Rate allows you to specify the bit rate of your desired audio. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("bit_rate")] + public string? BitRate { get; set; } + + ///// + ///// Audio container format + ///// + //[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + //[JsonPropertyName("container")] + //public string? Container { get; set; } + + /// + /// Encoding allows you to specify the expected encoding of your submitted audio. + /// + /// supported encodings + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("encoding")] + public string? Encoding { get; set; } + + /// + /// Sample Rate allows you to specify the sample rate of your submitted audio. + /// + /// only applies when Encoding has a values + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("sample_rate")] + public string? SampleRate { get; set; } + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} diff --git a/Deepgram/Models/Speak/v1/WebSocket/SpeakType.cs b/Deepgram/Models/Speak/v1/WebSocket/SpeakType.cs new file mode 100644 index 00000000..671b55e0 --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/SpeakType.cs @@ -0,0 +1,19 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public enum SpeakType +{ + Open, + Metadata, + Flushed, + Cleared, + Reset, + Audio, + Close, + Unhandled, + Warning, + Error, +} diff --git a/Deepgram/Models/Speak/v1/WebSocket/TextSource.cs b/Deepgram/Models/Speak/v1/WebSocket/TextSource.cs new file mode 100644 index 00000000..2c46fc4d --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/TextSource.cs @@ -0,0 +1,31 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public class TextSource(string text) +{ + /// + /// Text of the words to speak + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + public string? Type { get; set; } = "Speak"; + + /// + /// Text of the words to speak + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("text")] + public string? Text { get; set; } = text; + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} + diff --git a/Deepgram/Models/Speak/v1/WebSocket/UnhandledResponse.cs b/Deepgram/Models/Speak/v1/WebSocket/UnhandledResponse.cs new file mode 100644 index 00000000..f0343863 --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/UnhandledResponse.cs @@ -0,0 +1,31 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public record UnhandledResponse +{ + /// + /// Raw JSON + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("raw")] + public string? Raw { get; set; } = ""; + + /// + /// Unhandled event type. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + [JsonConverter(typeof(JsonStringEnumConverter))] + public SpeakType? Type { get; set; } = SpeakType.Unhandled; + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} diff --git a/Deepgram/Models/Speak/v1/WebSocket/WarningResponse.cs b/Deepgram/Models/Speak/v1/WebSocket/WarningResponse.cs new file mode 100644 index 00000000..f7734222 --- /dev/null +++ b/Deepgram/Models/Speak/v1/WebSocket/WarningResponse.cs @@ -0,0 +1,45 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +namespace Deepgram.Models.Speak.v1.WebSocket; + +public record WarningResponse +{ + /// + /// Error Description + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("warn_code")] + public string? WarnCode { get; set; } = ""; + + /// + /// Error Message + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("warn_msg")] + public string? WarnMsg { get; set; } = ""; + + /// + /// Error Variant + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("variant")] + public string? Variant { get; set; } = ""; + + /// + /// Error event type. + /// + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("type")] + [JsonConverter(typeof(JsonStringEnumConverter))] + public SpeakType? Type { get; set; } = SpeakType.Warning; + + /// + /// Override ToString method to serialize the object + /// + public override string ToString() + { + return Regex.Unescape(JsonSerializer.Serialize(this, JsonSerializeOptions.DefaultOptions)); + } +} diff --git a/Deepgram/SpeakWebSocketClient.cs b/Deepgram/SpeakWebSocketClient.cs new file mode 100644 index 00000000..a6e184c5 --- /dev/null +++ b/Deepgram/SpeakWebSocketClient.cs @@ -0,0 +1,18 @@ +// Copyright 2024 Deepgram .NET SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +using Deepgram.Clients.Speak.v1.WebSocket; +using Deepgram.Models.Authenticate.v1; + +namespace Deepgram; + +/// +/// Implements the latest supported version of the Speak Client. +/// +public class SpeakWebSocketClient : Client +{ + public SpeakWebSocketClient(string apiKey = "", DeepgramWsClientOptions? deepgramClientOptions = null) : base(apiKey, deepgramClientOptions) + { + } +} diff --git a/README.md b/README.md index 73e4a7ea..a7f49f8b 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,10 @@ These examples provide: - Hello World - [examples/text-to-speech/rest/file](https://github.com/deepgram/deepgram-dotnet-sdk/blob/main/examples/text-to-speech/rest/file/hello-world/Program.cs) +- Text to Speech - WebSocket: + + - Simple - [example/speak/websocket/simple](https://github.com/deepgram/deepgram-dotnet-sdk/blob/main/examples/text-to-speech/websocket/simple/Program.cs) + - Analyze Text: - Intent Recognition - [examples/analyze/intent](https://github.com/deepgram/deepgram-dotnet-sdk/blob/main/examples/analyze/intent/Program.cs) diff --git a/examples/text-to-speech/websocket/simple/Program.cs b/examples/text-to-speech/websocket/simple/Program.cs index 2dc3486f..870dbf68 100644 --- a/examples/text-to-speech/websocket/simple/Program.cs +++ b/examples/text-to-speech/websocket/simple/Program.cs @@ -2,6 +2,8 @@ // Use of this source code is governed by a MIT license that can be found in the LICENSE file. // SPDX-License-Identifier: MIT +using System.Text; + using Deepgram.Models.Authenticate.v1; using Deepgram.Models.Speak.v1.WebSocket; using Deepgram.Logger; @@ -15,14 +17,13 @@ static async Task Main(string[] args) { // Initialize Library with default logging // Normal logging is "Info" level - Library.Initialize(); + //Library.Initialize(); // OR very chatty logging - //Library.Initialize(LogLevel.Debug); // LogLevel.Default, LogLevel.Debug, LogLevel.Verbose - - Console.WriteLine("\n\nPress any key to stop and exit...\n\n\n"); + Library.Initialize(LogLevel.Verbose); // LogLevel.Default, LogLevel.Debug, LogLevel.Verbose //// use the client factory with a API Key set with the "DEEPGRAM_API_KEY" environment variable - //DeepgramWsClientOptions options = new DeepgramWsClientOptions("", ""); + //DeepgramWsClientOptions options = new DeepgramWsClientOptions(null, "ENTER URL HERE"); + //options.AutoFlushSpeakDelta = 1000; //var speakClient = ClientFactory.CreateSpeakWebSocketClient("", options); var speakClient = ClientFactory.CreateSpeakWebSocketClient(); @@ -40,8 +41,9 @@ static async Task Main(string[] args) { Console.WriteLine($"----> {e.Type} received"); - if (e.Stream != null) { - using (BinaryWriter writer = new BinaryWriter(File.Open("output.mp3", FileMode.Create))) + if (e.Stream != null) + { + using (BinaryWriter writer = new BinaryWriter(File.Open("output.mp3", FileMode.Append))) { writer.Write(e.Stream.ToArray()); } @@ -51,6 +53,10 @@ static async Task Main(string[] args) { Console.WriteLine($"----> {e.Type} received"); })); + speakClient.Subscribe(new EventHandler((sender, e) => + { + Console.WriteLine($"----> {e.Type} received"); + })); speakClient.Subscribe(new EventHandler((sender, e) => { Console.WriteLine($"----> {e.Type} received"); @@ -79,6 +85,7 @@ static async Task Main(string[] args) speakClient.Flush(); // Wait for the user to press a key + Console.WriteLine("\n\nPress any key to stop and exit...\n\n\n"); Console.ReadKey(); // Stop the connection