diff --git a/OpenAI-DotNet-Tests/TestFixture_04_Chat.cs b/OpenAI-DotNet-Tests/TestFixture_04_Chat.cs index 7f2ce6ff..6b908a56 100644 --- a/OpenAI-DotNet-Tests/TestFixture_04_Chat.cs +++ b/OpenAI-DotNet-Tests/TestFixture_04_Chat.cs @@ -7,6 +7,7 @@ using OpenAI.Tests.Weather; using System; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Threading.Tasks; @@ -75,7 +76,7 @@ public async Task Test_01_02_GetChatStreamingCompletion() } [Test] - public async Task Test_01_03_GetChatCompletion_Modalities() + public async Task Test_01_03_01_GetChatCompletion_Modalities() { Assert.IsNotNull(OpenAIClient.ChatEndpoint); @@ -123,6 +124,51 @@ public async Task Test_01_03_GetChatCompletion_Modalities() response.GetUsage(); } + [Test] + public async Task Test_01_03_01_GetChatCompletion_Modalities_Streaming() + { + Assert.IsNotNull(OpenAIClient.ChatEndpoint); + var messages = new List + { + new(Role.System, "You are a helpful assistant."), + new(Role.User, "Is a golden retriever a good family dog?"), + }; + var chatRequest = new ChatRequest(messages, Model.GPT4oAudio, audioConfig: Voice.Alloy); + Assert.IsNotNull(chatRequest); + Assert.IsNotNull(chatRequest.AudioConfig); + Assert.AreEqual(Model.GPT4oAudio.Id, chatRequest.Model); + Assert.AreEqual(Voice.Alloy.Id, chatRequest.AudioConfig.Voice); + Assert.AreEqual(AudioFormat.Pcm16, chatRequest.AudioConfig.Format); + Assert.AreEqual(Modality.Text | Modality.Audio, chatRequest.Modalities); + var response = await OpenAIClient.ChatEndpoint.StreamCompletionAsync(chatRequest, Assert.IsNotNull, true); + Assert.IsNotNull(response); + Assert.IsNotNull(response.Choices); + Assert.IsNotEmpty(response.Choices); + Assert.AreEqual(1, response.Choices.Count); + Assert.IsNotNull(response.FirstChoice); + Console.WriteLine($"{response.FirstChoice.Message.Role}: {response.FirstChoice} | Finish Reason: {response.FirstChoice.FinishReason}"); + Assert.IsNotEmpty(response.FirstChoice.Message.AudioOutput.Transcript); + Assert.IsNotNull(response.FirstChoice.Message.AudioOutput.Data); + Assert.IsFalse(response.FirstChoice.Message.AudioOutput.Data.IsEmpty); + response.GetUsage(); + messages.Add(response.FirstChoice.Message); + messages.Add(new(Role.User, "What are some other good family dog breeds?")); + chatRequest = new ChatRequest(messages, Model.GPT4oAudio, audioConfig: Voice.Alloy); + Assert.IsNotNull(chatRequest); + Assert.IsNotNull(messages[2]); + Assert.AreEqual(Role.Assistant, messages[2].Role); + Assert.IsNotNull(messages[2].AudioOutput); + response = await OpenAIClient.ChatEndpoint.StreamCompletionAsync(chatRequest, Assert.IsNotNull, true); + Assert.IsNotNull(response); + Assert.IsNotNull(response.Choices); + Assert.IsNotEmpty(response.Choices); + Assert.AreEqual(1, response.Choices.Count); + Assert.IsNotEmpty(response.FirstChoice.Message.AudioOutput.Transcript); + Assert.IsNotNull(response.FirstChoice.Message.AudioOutput.Data); + Assert.IsFalse(response.FirstChoice.Message.AudioOutput.Data.IsEmpty); + Assert.IsFalse(string.IsNullOrWhiteSpace(response.FirstChoice)); + } + [Test] public async Task Test_01_04_JsonMode() { @@ -147,7 +193,7 @@ public async Task Test_01_04_JsonMode() } [Test] - public async Task Test_01_05_GetChatStreamingCompletionEnumerableAsync() + public async Task Test_01_05_01_GetChatStreamingCompletionEnumerableAsync() { Assert.IsNotNull(OpenAIClient.ChatEndpoint); var messages = new List @@ -159,19 +205,77 @@ public async Task Test_01_05_GetChatStreamingCompletionEnumerableAsync() }; var cumulativeDelta = string.Empty; var chatRequest = new ChatRequest(messages); + var didThrowException = false; + await foreach (var partialResponse in OpenAIClient.ChatEndpoint.StreamCompletionEnumerableAsync(chatRequest, true)) { - Assert.IsNotNull(partialResponse); - if (partialResponse.Usage != null) { return; } - Assert.NotNull(partialResponse.Choices); - Assert.NotZero(partialResponse.Choices.Count); + try + { + Assert.IsNotNull(partialResponse); + if (partialResponse.Usage != null) { continue; } + Assert.NotNull(partialResponse.Choices); + Assert.NotZero(partialResponse.Choices.Count); - foreach (var choice in partialResponse.Choices.Where(choice => choice.Delta?.Content != null)) + if (partialResponse.FirstChoice?.Delta?.Content is not null) + { + cumulativeDelta += partialResponse.FirstChoice.Delta.Content; + } + } + catch (Exception e) { - cumulativeDelta += choice.Delta.Content; + Console.WriteLine(e); + didThrowException = true; + } + } + + Assert.IsFalse(didThrowException); + Assert.IsNotEmpty(cumulativeDelta); + Console.WriteLine(cumulativeDelta); + } + + [Test] + public async Task Test_01_05_02_GetChatStreamingModalitiesEnumerableAsync() + { + Assert.IsNotNull(OpenAIClient.ChatEndpoint); + + var messages = new List + { + new(Role.System, "You are a helpful assistant."), + new(Role.User, "Count from 1 to 10. Whisper please.") + }; + + var cumulativeDelta = string.Empty; + using var audioStream = new MemoryStream(); + var chatRequest = new ChatRequest(messages, audioConfig: new AudioConfig(Voice.Nova), model: Model.GPT4oAudio); + Assert.IsNotNull(chatRequest); + Assert.IsNotNull(chatRequest.AudioConfig); + Assert.AreEqual(Model.GPT4oAudio.Id, chatRequest.Model); + Assert.AreEqual(Voice.Nova.Id, chatRequest.AudioConfig.Voice); + Assert.AreEqual(AudioFormat.Pcm16, chatRequest.AudioConfig.Format); + Assert.AreEqual(Modality.Text | Modality.Audio, chatRequest.Modalities); + var didThrowException = false; + + await foreach (var partialResponse in OpenAIClient.ChatEndpoint.StreamCompletionEnumerableAsync(chatRequest, true)) + { + try + { + Assert.IsNotNull(partialResponse); + if (partialResponse.Usage != null || partialResponse.Choices == null) { continue; } + + if (partialResponse.FirstChoice?.Delta?.AudioOutput is not null) + { + await audioStream.WriteAsync(partialResponse.FirstChoice.Delta.AudioOutput.Data); + } + } + catch (Exception e) + { + Console.WriteLine(e); + didThrowException = true; } } + Assert.IsFalse(didThrowException); + Assert.IsTrue(audioStream.Length > 0); Console.WriteLine(cumulativeDelta); } diff --git a/OpenAI-DotNet/Chat/AudioOutput.cs b/OpenAI-DotNet/Chat/AudioOutput.cs index 49701bd3..4cf15f9d 100644 --- a/OpenAI-DotNet/Chat/AudioOutput.cs +++ b/OpenAI-DotNet/Chat/AudioOutput.cs @@ -1,6 +1,7 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. using System; +using System.Linq; using System.Text.Json.Serialization; namespace OpenAI.Chat @@ -8,24 +9,53 @@ namespace OpenAI.Chat [JsonConverter(typeof(AudioOutputConverter))] public sealed class AudioOutput { - internal AudioOutput(string id, int expiresAtUnixSeconds, ReadOnlyMemory data, string transcript) + internal AudioOutput(string id, int? expiresAtUnixSeconds, Memory data, string transcript) { Id = id; - ExpiresAtUnixSeconds = expiresAtUnixSeconds; - Data = data; + this.data = data; Transcript = transcript; + ExpiresAtUnixSeconds = expiresAtUnixSeconds; } - public string Id { get; } + public string Id { get; private set; } + + public string Transcript { get; private set; } - public int ExpiresAtUnixSeconds { get; } + private Memory data; - public DateTime ExpiresAt => DateTimeOffset.FromUnixTimeSeconds(ExpiresAtUnixSeconds).DateTime; + public ReadOnlyMemory Data => data; - public ReadOnlyMemory Data { get; } + public int? ExpiresAtUnixSeconds { get; private set; } - public string Transcript { get; } + public DateTime? ExpiresAt => ExpiresAtUnixSeconds.HasValue + ? DateTimeOffset.FromUnixTimeSeconds(ExpiresAtUnixSeconds.Value).DateTime + : null; public override string ToString() => Transcript ?? string.Empty; + + internal void AppendFrom(AudioOutput other) + { + if (other == null) { return; } + + if (!string.IsNullOrWhiteSpace(other.Id)) + { + Id = other.Id; + } + + if (other.ExpiresAtUnixSeconds.HasValue) + { + ExpiresAtUnixSeconds = other.ExpiresAtUnixSeconds; + } + + if (!string.IsNullOrWhiteSpace(other.Transcript)) + { + Transcript += other.Transcript; + } + + if (other.Data.Length > 0) + { + data = data.ToArray().Concat(other.Data.ToArray()).ToArray(); + } + } } } diff --git a/OpenAI-DotNet/Chat/ChatEndpoint.cs b/OpenAI-DotNet/Chat/ChatEndpoint.cs index 770c0632..c3556dee 100644 --- a/OpenAI-DotNet/Chat/ChatEndpoint.cs +++ b/OpenAI-DotNet/Chat/ChatEndpoint.cs @@ -61,7 +61,7 @@ public async Task GetCompletionAsync(ChatRequest chatRequest, Canc /// Created a completion for the chat message and stream the results to the as they come in. /// /// The chat request which contains the message content. - /// An to be invoked as each new result arrives. + /// A to be invoked as each new result arrives. /// /// Optional, If set, an additional chunk will be streamed before the 'data: [DONE]' message. /// The 'usage' field on this chunk shows the token usage statistics for the entire request, @@ -82,7 +82,7 @@ public async Task StreamCompletionAsync(ChatRequest chatRequest, A /// /// to use for structured outputs. /// The chat request which contains the message content. - /// An to be invoked as each new result arrives. + /// A to be invoked as each new result arrives. /// /// Optional, If set, an additional chunk will be streamed before the 'data: [DONE]' message. /// The 'usage' field on this chunk shows the token usage statistics for the entire request, @@ -196,7 +196,7 @@ public async IAsyncEnumerable StreamCompletionEnumerableAsync(Chat await responseStream.WriteAsync("["u8.ToArray(), cancellationToken); } - while (await reader.ReadLineAsync() is { } streamData) + while (await reader.ReadLineAsync(cancellationToken) is { } streamData) { cancellationToken.ThrowIfCancellationRequested(); @@ -207,7 +207,10 @@ public async IAsyncEnumerable StreamCompletionEnumerableAsync(Chat continue; } - if (string.IsNullOrWhiteSpace(eventData)) { continue; } + if (string.IsNullOrWhiteSpace(eventData)) + { + continue; + } if (responseStream != null) { diff --git a/OpenAI-DotNet/Chat/Delta.cs b/OpenAI-DotNet/Chat/Delta.cs index 3cb5d4f7..c8e22926 100644 --- a/OpenAI-DotNet/Chat/Delta.cs +++ b/OpenAI-DotNet/Chat/Delta.cs @@ -35,6 +35,13 @@ public sealed class Delta [JsonPropertyName("tool_calls")] public IReadOnlyList ToolCalls { get; private set; } + /// + /// If the audio output modality is requested, this object contains data about the audio response from the model. + /// + [JsonInclude] + [JsonPropertyName("audio")] + public AudioOutput AudioOutput { get; private set; } + /// /// Optional, The name of the author of this message.
/// May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters. @@ -43,7 +50,15 @@ public sealed class Delta [JsonPropertyName("name")] public string Name { get; private set; } - public override string ToString() => Content ?? string.Empty; + public override string ToString() + { + if (string.IsNullOrWhiteSpace(Content)) + { + return AudioOutput?.ToString() ?? string.Empty; + } + + return Content ?? string.Empty; + } public static implicit operator string(Delta delta) => delta?.ToString(); } diff --git a/OpenAI-DotNet/Chat/Message.cs b/OpenAI-DotNet/Chat/Message.cs index c4e481ce..05cec6fc 100644 --- a/OpenAI-DotNet/Chat/Message.cs +++ b/OpenAI-DotNet/Chat/Message.cs @@ -190,6 +190,18 @@ internal void AppendFrom(Delta other) toolCalls ??= new List(); toolCalls.AppendFrom(other.ToolCalls); } + + if (other is { AudioOutput: not null }) + { + if (AudioOutput == null) + { + AudioOutput = other.AudioOutput; + } + else + { + AudioOutput.AppendFrom(other.AudioOutput); + } + } } } } diff --git a/OpenAI-DotNet/Extensions/AudioOutputConverter.cs b/OpenAI-DotNet/Extensions/AudioOutputConverter.cs index 7eb42992..e70b491d 100644 --- a/OpenAI-DotNet/Extensions/AudioOutputConverter.cs +++ b/OpenAI-DotNet/Extensions/AudioOutputConverter.cs @@ -12,10 +12,10 @@ internal class AudioOutputConverter : JsonConverter public override AudioOutput Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) { string id = null; - var expiresAt = 0; + int? expiresAt = null; string b64Data = null; string transcript = null; - ReadOnlyMemory data = null; + Memory data = null; while (reader.Read()) {