diff --git a/ElevenLabs-DotNet-Tests/ElevenLabs-DotNet-Tests.csproj b/ElevenLabs-DotNet-Tests/ElevenLabs-DotNet-Tests.csproj
index d96b7fe..4369072 100644
--- a/ElevenLabs-DotNet-Tests/ElevenLabs-DotNet-Tests.csproj
+++ b/ElevenLabs-DotNet-Tests/ElevenLabs-DotNet-Tests.csproj
@@ -13,6 +13,7 @@
+
diff --git a/ElevenLabs-DotNet-Tests/TestFixture_04_TextToSpeechEndpoint.cs b/ElevenLabs-DotNet-Tests/TestFixture_04_TextToSpeechEndpoint.cs
index 545f487..d855b39 100644
--- a/ElevenLabs-DotNet-Tests/TestFixture_04_TextToSpeechEndpoint.cs
+++ b/ElevenLabs-DotNet-Tests/TestFixture_04_TextToSpeechEndpoint.cs
@@ -1,5 +1,6 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.
+using ElevenLabs.TextToSpeech;
using NUnit.Framework;
using System;
using System.Collections.Generic;
@@ -16,8 +17,8 @@ public async Task Test_01_TextToSpeech()
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = Voices.Voice.Adam;
Assert.NotNull(voice);
- var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
- var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync("The quick brown fox jumps over the lazy dog.", voice, defaultVoiceSettings);
+ var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.");
+ var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
Assert.NotNull(voiceClip);
Console.WriteLine(voiceClip.Id);
}
@@ -28,20 +29,86 @@ public async Task Test_02_StreamTextToSpeech()
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = (await ElevenLabsClient.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
Assert.NotNull(voice);
- var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
var partialClips = new Queue();
- var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync("The quick brown fox jumps over the lazy dog.", voice, defaultVoiceSettings,
- partialClipCallback: async partialClip =>
+ var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", outputFormat: OutputFormat.PCM_24000);
+ var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request, async partialClip =>
{
Assert.IsNotNull(partialClip);
partialClips.Enqueue(partialClip);
await Task.CompletedTask;
});
+ Assert.NotNull(partialClips);
+ Assert.IsNotEmpty(partialClips);
+ Assert.NotNull(voiceClip);
+ Console.WriteLine(voiceClip.Id);
+ }
+ [Test]
+ public async Task Test_03_TextToSpeech_Transcription()
+ {
+ Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
+ var voice = Voices.Voice.Adam;
+ Assert.NotNull(voice);
+ var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", withTimestamps: true);
+ var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
+ Assert.NotNull(voiceClip);
+ Console.WriteLine(voiceClip.Id);
+ Assert.NotNull(voiceClip.TimestampedTranscriptCharacters);
+ Assert.IsNotEmpty(voiceClip.TimestampedTranscriptCharacters);
+ Console.WriteLine("| Character | Start Time | End Time |");
+ Console.WriteLine("| --------- | ---------- | -------- |");
+ foreach (var character in voiceClip.TimestampedTranscriptCharacters)
+ {
+ Console.WriteLine($"| {character.Character} | {character.StartTime} | {character.EndTime} |");
+ }
+ }
+
+ [Test]
+ public async Task Test_05_LanguageEnforced_TextToSpeech()
+ {
+ Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
+ var voice = Voices.Voice.Adam;
+ Assert.NotNull(voice);
+ var partialClips = new Queue();
+ var characters = new Queue();
+ Console.WriteLine("| Character | Start Time | End Time |");
+ Console.WriteLine("| --------- | ---------- | -------- |");
+ var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", outputFormat: OutputFormat.PCM_24000, withTimestamps: true);
+ var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request, async partialClip =>
+ {
+ await Task.CompletedTask;
+ partialClips.Enqueue(partialClip);
+ foreach (var character in partialClip.TimestampedTranscriptCharacters)
+ {
+ characters.Enqueue(character);
+ Console.WriteLine($"| {character.Character} | {character.StartTime} | {character.EndTime} |");
+ }
+ });
+ Assert.NotNull(partialClips);
Assert.NotNull(partialClips);
Assert.IsNotEmpty(partialClips);
Assert.NotNull(voiceClip);
Console.WriteLine(voiceClip.Id);
+ Assert.AreEqual(characters.ToArray(), voiceClip.TimestampedTranscriptCharacters);
+ }
+
+ [Test]
+ public async Task Test_TurboV2_5_LanguageEnforced_TextToSpeech()
+ {
+ Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
+ var voice = Voices.Voice.Adam;
+ Assert.NotNull(voice);
+ var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
+ var request = new TextToSpeechRequest(
+ voice: voice,
+ text: "Příliš žluťoučký kůň úpěl ďábelské ódy",
+ voiceSettings: defaultVoiceSettings,
+ model: Models.Model.TurboV2_5,
+ outputFormat: OutputFormat.MP3_44100_192,
+ languageCode: "cs");
+ var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
+ Assert.NotNull(voiceClip);
+ Console.WriteLine(voiceClip.Id);
}
}
}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/Common/GeneratedClip.cs b/ElevenLabs-DotNet/Common/GeneratedClip.cs
index d450ee9..d681c8d 100644
--- a/ElevenLabs-DotNet/Common/GeneratedClip.cs
+++ b/ElevenLabs-DotNet/Common/GeneratedClip.cs
@@ -7,12 +7,13 @@ namespace ElevenLabs
{
public class GeneratedClip
{
- internal GeneratedClip(string id, string text, ReadOnlyMemory clipData)
+ internal GeneratedClip(string id, string text, ReadOnlyMemory clipData, int sampleRate = 44100)
{
Id = id;
Text = text;
TextHash = $"{id}{text}".GenerateGuid().ToString();
ClipData = clipData;
+ SampleRate = sampleRate;
}
///
@@ -34,5 +35,7 @@ internal GeneratedClip(string id, string text, ReadOnlyMemory clipData)
/// The ray clip data.
///
public ReadOnlyMemory ClipData { get; }
+
+ public int SampleRate { get; }
}
}
\ No newline at end of file
diff --git a/ElevenLabs-DotNet/Common/TimestampedTranscriptCharacter.cs b/ElevenLabs-DotNet/Common/TimestampedTranscriptCharacter.cs
new file mode 100644
index 0000000..ff4b85a
--- /dev/null
+++ b/ElevenLabs-DotNet/Common/TimestampedTranscriptCharacter.cs
@@ -0,0 +1,42 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs
+{
+ ///
+ /// Represents timing information for a single character in the transcript
+ ///
+ public class TimestampedTranscriptCharacter
+ {
+ public TimestampedTranscriptCharacter() { }
+
+ internal TimestampedTranscriptCharacter(string character, double startTime, double endTime)
+ {
+ Character = character;
+ StartTime = startTime;
+ EndTime = endTime;
+ }
+
+ ///
+ /// The character being spoken
+ ///
+ [JsonInclude]
+ [JsonPropertyName("character")]
+ public string Character { get; private set; }
+
+ ///
+ /// The time in seconds when this character starts being spoken
+ ///
+ [JsonInclude]
+ [JsonPropertyName("character_start_times_seconds")]
+ public double StartTime { get; private set; }
+
+ ///
+ /// The time in seconds when this character finishes being spoken
+ ///
+ [JsonInclude]
+ [JsonPropertyName("character_end_times_seconds")]
+ public double EndTime { get; private set; }
+ }
+}
diff --git a/ElevenLabs-DotNet/Common/VoiceClip.cs b/ElevenLabs-DotNet/Common/VoiceClip.cs
index 8f75dbd..72986a1 100644
--- a/ElevenLabs-DotNet/Common/VoiceClip.cs
+++ b/ElevenLabs-DotNet/Common/VoiceClip.cs
@@ -7,11 +7,14 @@ namespace ElevenLabs
{
public sealed class VoiceClip : GeneratedClip
{
- internal VoiceClip(string id, string text, Voice voice, ReadOnlyMemory clipData) : base(id, text, clipData)
+ internal VoiceClip(string id, string text, Voice voice, ReadOnlyMemory clipData, int sampleRate = 44100)
+ : base(id, text, clipData, sampleRate)
{
Voice = voice;
}
public Voice Voice { get; }
+
+ public TimestampedTranscriptCharacter[] TimestampedTranscriptCharacters { get; internal init; }
}
}
diff --git a/ElevenLabs-DotNet/ElevenLabs-DotNet.csproj b/ElevenLabs-DotNet/ElevenLabs-DotNet.csproj
index 08a3542..197d6db 100644
--- a/ElevenLabs-DotNet/ElevenLabs-DotNet.csproj
+++ b/ElevenLabs-DotNet/ElevenLabs-DotNet.csproj
@@ -25,8 +25,13 @@ All copyrights, trademarks, logos, and assets are the property of their respecti
false
true
true
- 3.1.0
+ 3.4.0
+Version 3.4.0
+- Added additional request properties for TextToSpeechRequest
+ - previous_text, next_text, previous_request_ids, next_request_ids, languageCode, withTimestamps
+- Added support for transcription timestamps in TextToSpeechResponse
+- Added support for language code in TextToSpeechRequest
Version 3.1.0
- Refactored TextToSpeechEndpoint endpoint to accept TextToSpeechRequest object
- Added text encoding options to TextToSpeechRequest
diff --git a/ElevenLabs-DotNet/Extensions/Extensions.cs b/ElevenLabs-DotNet/Extensions/Extensions.cs
new file mode 100644
index 0000000..c6bf1a1
--- /dev/null
+++ b/ElevenLabs-DotNet/Extensions/Extensions.cs
@@ -0,0 +1,16 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+namespace ElevenLabs.Extensions
+{
+ public static class Extensions
+ {
+ public static int GetSampleRate(this OutputFormat format) => format switch
+ {
+ OutputFormat.PCM_16000 => 16000,
+ OutputFormat.PCM_22050 => 22050,
+ OutputFormat.PCM_24000 => 24000,
+ OutputFormat.PCM_44100 => 44100,
+ _ => 44100
+ };
+ }
+}
diff --git a/ElevenLabs-DotNet/TextToSpeech/Alignment.cs b/ElevenLabs-DotNet/TextToSpeech/Alignment.cs
new file mode 100644
index 0000000..d645415
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/Alignment.cs
@@ -0,0 +1,37 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech
+{
+ internal sealed class Alignment
+ {
+ [JsonInclude]
+ [JsonPropertyName("characters")]
+ public string[] Characters { get; private set; }
+
+ [JsonInclude]
+ [JsonPropertyName("character_start_times_seconds")]
+ public double[] StartTimes { get; private set; }
+
+ [JsonInclude]
+ [JsonPropertyName("character_end_times_seconds")]
+ public double[] EndTimes { get; private set; }
+
+ public static implicit operator TimestampedTranscriptCharacter[](Alignment alignment)
+ {
+ if (alignment == null) { return null; }
+ var characters = alignment.Characters;
+ var startTimes = alignment.StartTimes;
+ var endTimes = alignment.EndTimes;
+ var timestampedTranscriptCharacters = new TimestampedTranscriptCharacter[characters.Length];
+
+ for (var i = 0; i < characters.Length; i++)
+ {
+ timestampedTranscriptCharacters[i] = new TimestampedTranscriptCharacter(characters[i], startTimes[i], endTimes[i]);
+ }
+
+ return timestampedTranscriptCharacters;
+ }
+ }
+}
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs
index bfe7098..6b387b3 100644
--- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs
@@ -28,42 +28,7 @@ public TextToSpeechEndpoint(ElevenLabsClient client) : base(client) { }
protected override string Root => "text-to-speech";
- ///
- /// Converts text into speech using a voice of your choice and returns audio.
- ///
- ///
- /// Text input to synthesize speech for. Maximum 5000 characters.
- ///
- ///
- /// to use.
- ///
- ///
- /// Optional, that will override the default settings in .
- ///
- ///
- /// Optional, to use. Defaults to .
- ///
- ///
- /// Output format of the generated audio.
- /// Defaults to
- ///
- ///
- /// Optional, You can turn on latency optimizations at some cost of quality.
- /// The best possible final latency varies by model.
- /// Possible values:
- /// 0 - default mode (no latency optimizations)
- /// 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
- /// 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
- /// 3 - max latency optimizations
- /// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings
- /// (best latency, but can mispronounce e.g. numbers and dates).
- ///
- ///
- /// Optional, Callback to enable streaming audio as it comes in.
- /// Returns partial .
- ///
- /// Optional, .
- /// .
+ [Obsolete("use overload with TextToSpeechRequest")]
public async Task TextToSpeechAsync(string text, Voice voice, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, int? optimizeStreamingLatency = null, Func partialClipCallback = null, CancellationToken cancellationToken = default)
{
var defaultVoiceSettings = voiceSettings ?? voice.Settings ?? await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken);
@@ -82,6 +47,7 @@ public async Task TextToSpeechAsync(string text, Voice voice, VoiceSe
/// .
public async Task TextToSpeechAsync(TextToSpeechRequest request, Func partialClipCallback = null, CancellationToken cancellationToken = default)
{
+ request.VoiceSettings ??= await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken);
using var payload = JsonSerializer.Serialize(request, ElevenLabsClient.JsonSerializationOptions).ToJsonStringContent();
var parameters = new Dictionary
{
@@ -93,7 +59,19 @@ public async Task TextToSpeechAsync(TextToSpeechRequest request, Func
parameters.Add(OptimizeStreamingLatencyParameter, request.OptimizeStreamingLatency.Value.ToString());
}
- using var postRequest = new HttpRequestMessage(HttpMethod.Post, GetUrl($"/{request.Voice.Id}{(partialClipCallback == null ? string.Empty : "/stream")}", parameters));
+ var endpoint = $"/{request.Voice.Id}";
+
+ if (partialClipCallback != null)
+ {
+ endpoint += "/stream";
+ }
+
+ if (request.WithTimestamps)
+ {
+ endpoint += "/with-timestamps";
+ }
+
+ using var postRequest = new HttpRequestMessage(HttpMethod.Post, GetUrl(endpoint, parameters));
postRequest.Content = payload;
var requestOption = partialClipCallback == null
? HttpCompletionOption.ResponseContentRead
@@ -107,32 +85,85 @@ public async Task TextToSpeechAsync(TextToSpeechRequest request, Func
throw new ArgumentException("Failed to parse clip id!");
}
- await using var responseStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
- await using var memoryStream = new MemoryStream();
- int bytesRead;
- var totalBytesRead = 0;
- var buffer = new byte[8192];
+ return request.WithTimestamps
+ ? await StreamWithTimeStampsAsync(response).ConfigureAwait(false)
+ : await StreamAsync(response).ConfigureAwait(false);
- while ((bytesRead = await responseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0)
+ async Task StreamWithTimeStampsAsync(HttpResponseMessage messageResponse)
{
- await memoryStream.WriteAsync(new ReadOnlyMemory(buffer, 0, bytesRead), cancellationToken).ConfigureAwait(false);
+ await using var audioDataStream = new MemoryStream();
+ var accumulatedTranscriptData = new List();
+ await using var stream = await messageResponse.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
+ using var reader = new StreamReader(stream);
- if (partialClipCallback != null)
+ while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
{
- try
+ const string data = "data: ";
+ const string done = "[DONE]";
+
+ if (line.StartsWith(data)) { line = line[data.Length..]; }
+ if (line == done) { break; }
+ if (string.IsNullOrWhiteSpace(line)) { continue; }
+
+ var transcriptData = JsonSerializer.Deserialize(line, ElevenLabsClient.JsonSerializationOptions);
+ var timestampedTranscriptCharacters = (TimestampedTranscriptCharacter[])transcriptData.Alignment ?? [];
+
+ if (partialClipCallback != null)
{
- await partialClipCallback(new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory(memoryStream.GetBuffer(), totalBytesRead, bytesRead))).ConfigureAwait(false);
+ try
+ {
+ var partialClip = new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory(transcriptData.AudioBytes), request.OutputFormat.GetSampleRate())
+ {
+ TimestampedTranscriptCharacters = timestampedTranscriptCharacters
+ };
+ await partialClipCallback(partialClip).ConfigureAwait(false);
+ }
+ catch (Exception e)
+ {
+ Console.WriteLine(e);
+ }
}
- catch (Exception e)
+
+ accumulatedTranscriptData.AddRange(timestampedTranscriptCharacters);
+ await audioDataStream.WriteAsync(transcriptData.AudioBytes, 0, transcriptData.AudioBytes.Length, cancellationToken).ConfigureAwait(false);
+ }
+
+ return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory(audioDataStream.GetBuffer(), 0, (int)audioDataStream.Length), request.OutputFormat.GetSampleRate())
+ {
+ TimestampedTranscriptCharacters = accumulatedTranscriptData.ToArray()
+ };
+ }
+
+ async Task StreamAsync(HttpResponseMessage messageResponse)
+ {
+ int bytesRead;
+ var totalBytesRead = 0;
+ var buffer = new byte[8192];
+ await using var audioDataStream = new MemoryStream();
+ await using var responseStream = await messageResponse.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
+
+ while ((bytesRead = await responseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0)
+ {
+ await audioDataStream.WriteAsync(new ReadOnlyMemory(buffer, 0, bytesRead), cancellationToken).ConfigureAwait(false);
+
+ if (partialClipCallback != null)
{
- Console.WriteLine(e);
+ try
+ {
+ var partialClip = new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory(audioDataStream.GetBuffer(), totalBytesRead, bytesRead), request.OutputFormat.GetSampleRate());
+ await partialClipCallback(partialClip).ConfigureAwait(false);
+ }
+ catch (Exception e)
+ {
+ Console.WriteLine(e);
+ }
}
+
+ totalBytesRead += bytesRead;
}
- totalBytesRead += bytesRead;
+ return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory(audioDataStream.GetBuffer(), 0, totalBytesRead), request.OutputFormat.GetSampleRate());
}
-
- return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory(memoryStream.GetBuffer(), 0, totalBytesRead));
}
}
}
diff --git a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs
index 1ca2a29..ca77782 100644
--- a/ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs
+++ b/ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs
@@ -10,8 +10,9 @@ namespace ElevenLabs.TextToSpeech
{
public sealed class TextToSpeechRequest
{
- public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings) :
- this(null, text, voiceSettings: voiceSettings, model: model)
+ [Obsolete]
+ public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings)
+ : this(null, text, voiceSettings: voiceSettings, model: model)
{
}
@@ -29,7 +30,7 @@ public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings
/// Optional, that will override the default settings in .
///
///
- /// Optional, to use. Defaults to .
+ /// Optional, to use. Defaults to .
///
///
/// Output format of the generated audio.
@@ -47,8 +48,14 @@ public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings
/// (best latency, but can mispronounce e.g. numbers and dates).
///
///
- ///
- ///
+ ///
+ ///
+ ///
+ ///
+ /// Optional, Language code (ISO 639-1) used to enforce a language for the model. Currently only supports language enforcement.
+ /// For other models, an error will be returned if language code is provided.
+ ///
+ ///
public TextToSpeechRequest(
Voice voice,
string text,
@@ -57,7 +64,12 @@ public TextToSpeechRequest(
OutputFormat outputFormat = OutputFormat.MP3_44100_128,
int? optimizeStreamingLatency = null,
Model model = null,
- string previousText = null)
+ string previousText = null,
+ string nextText = null,
+ string[] previousRequestIds = null,
+ string[] nextRequestIds = null,
+ string languageCode = null,
+ bool withTimestamps = false)
{
if (string.IsNullOrWhiteSpace(text))
{
@@ -81,12 +93,25 @@ public TextToSpeechRequest(
}
Text = text;
- Model = model ?? Models.Model.MultiLingualV2;
+ Model = model ?? Models.Model.TurboV2_5;
Voice = voice;
- VoiceSettings = voiceSettings ?? voice.Settings ?? throw new ArgumentNullException(nameof(voiceSettings));
- PreviousText = previousText;
+ VoiceSettings = voiceSettings ?? voice.Settings;
OutputFormat = outputFormat;
OptimizeStreamingLatency = optimizeStreamingLatency;
+ PreviousText = previousText;
+ NextText = nextText;
+ if (previousRequestIds?.Length > 3)
+ {
+ previousRequestIds = previousRequestIds[..3];
+ }
+ PreviousRequestIds = previousRequestIds;
+ if (nextRequestIds?.Length > 3)
+ {
+ nextRequestIds = nextRequestIds[..3];
+ }
+ NextRequestIds = nextRequestIds;
+ LanguageCode = languageCode;
+ WithTimestamps = withTimestamps;
}
[JsonPropertyName("text")]
@@ -99,7 +124,7 @@ public TextToSpeechRequest(
public Voice Voice { get; }
[JsonPropertyName("voice_settings")]
- public VoiceSettings VoiceSettings { get; }
+ public VoiceSettings VoiceSettings { get; internal set; }
[JsonPropertyName("previous_text")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
@@ -110,5 +135,27 @@ public TextToSpeechRequest(
[JsonIgnore]
public int? OptimizeStreamingLatency { get; }
+
+ [JsonPropertyName("next_text")]
+ public string NextText { get; }
+
+ ///
+ /// A maximum of three next or previous history item ids can be sent
+ ///
+ [JsonPropertyName("previous_request_ids")]
+ public string[] PreviousRequestIds { get; }
+
+ ///
+ /// A maximum of three next or previous history item ids can be sent
+ ///
+ [JsonPropertyName("next_request_ids")]
+ public string[] NextRequestIds { get; }
+
+
+ [JsonPropertyName("language_code")]
+ public string LanguageCode { get; }
+
+ [JsonIgnore]
+ public bool WithTimestamps { get; }
}
}
diff --git a/ElevenLabs-DotNet/TextToSpeech/TranscriptionResponse.cs b/ElevenLabs-DotNet/TextToSpeech/TranscriptionResponse.cs
new file mode 100644
index 0000000..0f8cc48
--- /dev/null
+++ b/ElevenLabs-DotNet/TextToSpeech/TranscriptionResponse.cs
@@ -0,0 +1,21 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.Text.Json.Serialization;
+
+namespace ElevenLabs.TextToSpeech
+{
+ internal sealed class TranscriptionResponse
+ {
+ [JsonInclude]
+ [JsonPropertyName("audio_base64")]
+ public string AudioBase64 { get; private set; }
+
+ [JsonIgnore]
+ public byte[] AudioBytes => Convert.FromBase64String(AudioBase64);
+
+ [JsonInclude]
+ [JsonPropertyName("alignment")]
+ public Alignment Alignment { get; private set; }
+ }
+}
diff --git a/README.md b/README.md
index de5f34c..d1741b3 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ dotnet add package ElevenLabs-DotNet
- [Text to Speech](#text-to-speech)
- [Stream Text To Speech](#stream-text-to-speech)
- [Voices](#voices)
- - [Get Shared Voices](#get-shared-voices) :new:
+ - [Get Shared Voices](#get-shared-voices)
- [Get All Voices](#get-all-voices)
- [Get Default Voice Settings](#get-default-voice-settings)
- [Get Voice](#get-voice)
@@ -58,13 +58,13 @@ dotnet add package ElevenLabs-DotNet
- [Samples](#samples)
- [Download Voice Sample](#download-voice-sample)
- [Delete Voice Sample](#delete-voice-sample)
-- [Dubbing](#dubbing) :new:
- - [Dub](#dub) :new:
- - [Get Dubbing Metadata](#get-dubbing-metadata) :new:
- - [Get Transcript for Dub](#get-transcript-for-dub) :new:
- - [Get dubbed file](#get-dubbed-file) :new:
- - [Delete Dubbing Project](#delete-dubbing-project) :new:
-- [SFX Generation](#sfx-generation) :new:
+- [Dubbing](#dubbing)
+ - [Dub](#dub)
+ - [Get Dubbing Metadata](#get-dubbing-metadata)
+ - [Get Transcript for Dub](#get-transcript-for-dub)
+ - [Get dubbed file](#get-dubbed-file)
+ - [Delete Dubbing Project](#delete-dubbing-project)
+- [SFX Generation](#sfx-generation)
- [History](#history)
- [Get History](#get-history)
- [Get History Item](#get-history-item)
@@ -204,8 +204,8 @@ Convert text to speech.
var api = new ElevenLabsClient();
var text = "The quick brown fox jumps over the lazy dog.";
var voice = (await api.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
-var defaultVoiceSettings = await api.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
-var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(text, voice, defaultVoiceSettings);
+var request = new TextToSpeechRequest(voice, text);
+var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(request);
await File.WriteAllBytesAsync($"{voiceClip.Id}.mp3", voiceClip.ClipData.ToArray());
```
@@ -219,7 +219,8 @@ var text = "The quick brown fox jumps over the lazy dog.";
var voice = (await api.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
string fileName = "myfile.mp3";
using var outputFileStream = File.OpenWrite(fileName);
-var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(text, voice,
+var request = new TextToSpeechRequest(voice, text);
+var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(request,
partialClipCallback: async (partialClip) =>
{
// Write the incoming data to the output file stream.