Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ElevenLabs-DotNet 3.4.0 #69

Merged
merged 3 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ElevenLabs-DotNet-Tests/ElevenLabs-DotNet-Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<PackageReference Include="NUnit" Version="3.13.3" />
<PackageReference Include="NUnit3TestAdapter" Version="4.4.2" />
<PackageReference Include="coverlet.collector" Version="1.0.1" />
<PackageReference Include="System.Text.Json" Version="8.0.5" />
<ProjectReference Include="..\ElevenLabs-DotNet\ElevenLabs-DotNet.csproj" />
<ProjectReference Include="..\ElevenLabs-DotNet-Tests-Proxy\ElevenLabs-DotNet-Tests-Proxy.csproj" />
</ItemGroup>
Expand Down
77 changes: 72 additions & 5 deletions ElevenLabs-DotNet-Tests/TestFixture_04_TextToSpeechEndpoint.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using ElevenLabs.TextToSpeech;
using NUnit.Framework;
using System;
using System.Collections.Generic;
Expand All @@ -16,8 +17,8 @@ public async Task Test_01_TextToSpeech()
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = Voices.Voice.Adam;
Assert.NotNull(voice);
var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync("The quick brown fox jumps over the lazy dog.", voice, defaultVoiceSettings);
var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.");
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
Assert.NotNull(voiceClip);
Console.WriteLine(voiceClip.Id);
}
Expand All @@ -28,20 +29,86 @@ public async Task Test_02_StreamTextToSpeech()
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = (await ElevenLabsClient.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
Assert.NotNull(voice);
var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
var partialClips = new Queue<VoiceClip>();
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync("The quick brown fox jumps over the lazy dog.", voice, defaultVoiceSettings,
partialClipCallback: async partialClip =>
var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", outputFormat: OutputFormat.PCM_24000);
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request, async partialClip =>
{
Assert.IsNotNull(partialClip);
partialClips.Enqueue(partialClip);
await Task.CompletedTask;
});
Assert.NotNull(partialClips);
Assert.IsNotEmpty(partialClips);
Assert.NotNull(voiceClip);
Console.WriteLine(voiceClip.Id);
}

[Test]
public async Task Test_03_TextToSpeech_Transcription()
{
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = Voices.Voice.Adam;
Assert.NotNull(voice);
var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", withTimestamps: true);
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
Assert.NotNull(voiceClip);
Console.WriteLine(voiceClip.Id);
Assert.NotNull(voiceClip.TimestampedTranscriptCharacters);
Assert.IsNotEmpty(voiceClip.TimestampedTranscriptCharacters);
Console.WriteLine("| Character | Start Time | End Time |");
Console.WriteLine("| --------- | ---------- | -------- |");
foreach (var character in voiceClip.TimestampedTranscriptCharacters)
{
Console.WriteLine($"| {character.Character} | {character.StartTime} | {character.EndTime} |");
}
}

[Test]
public async Task Test_05_LanguageEnforced_TextToSpeech()
{
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = Voices.Voice.Adam;
Assert.NotNull(voice);
var partialClips = new Queue<VoiceClip>();
var characters = new Queue<TimestampedTranscriptCharacter>();
Console.WriteLine("| Character | Start Time | End Time |");
Console.WriteLine("| --------- | ---------- | -------- |");
var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", outputFormat: OutputFormat.PCM_24000, withTimestamps: true);
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request, async partialClip =>
{
await Task.CompletedTask;
partialClips.Enqueue(partialClip);
foreach (var character in partialClip.TimestampedTranscriptCharacters)
{
characters.Enqueue(character);
Console.WriteLine($"| {character.Character} | {character.StartTime} | {character.EndTime} |");
}
});
Assert.NotNull(partialClips);
Assert.NotNull(partialClips);
Assert.IsNotEmpty(partialClips);
Assert.NotNull(voiceClip);
Console.WriteLine(voiceClip.Id);
Assert.AreEqual(characters.ToArray(), voiceClip.TimestampedTranscriptCharacters);
}

[Test]
public async Task Test_TurboV2_5_LanguageEnforced_TextToSpeech()
{
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = Voices.Voice.Adam;
Assert.NotNull(voice);
var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
var request = new TextToSpeechRequest(
voice: voice,
text: "Příliš žluťoučký kůň úpěl ďábelské ódy",
voiceSettings: defaultVoiceSettings,
model: Models.Model.TurboV2_5,
outputFormat: OutputFormat.MP3_44100_192,
languageCode: "cs");
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
Assert.NotNull(voiceClip);
Console.WriteLine(voiceClip.Id);
}
}
}
5 changes: 4 additions & 1 deletion ElevenLabs-DotNet/Common/GeneratedClip.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ namespace ElevenLabs
{
public class GeneratedClip
{
internal GeneratedClip(string id, string text, ReadOnlyMemory<byte> clipData)
internal GeneratedClip(string id, string text, ReadOnlyMemory<byte> clipData, int sampleRate = 44100)
{
Id = id;
Text = text;
TextHash = $"{id}{text}".GenerateGuid().ToString();
ClipData = clipData;
SampleRate = sampleRate;
}

/// <summary>
Expand All @@ -34,5 +35,7 @@ internal GeneratedClip(string id, string text, ReadOnlyMemory<byte> clipData)
/// The ray clip data.
/// </summary>
public ReadOnlyMemory<byte> ClipData { get; }

public int SampleRate { get; }
}
}
42 changes: 42 additions & 0 deletions ElevenLabs-DotNet/Common/TimestampedTranscriptCharacter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System.Text.Json.Serialization;

namespace ElevenLabs
{
/// <summary>
/// Represents timing information for a single character in the transcript
/// </summary>
public class TimestampedTranscriptCharacter
{
public TimestampedTranscriptCharacter() { }

internal TimestampedTranscriptCharacter(string character, double startTime, double endTime)
{
Character = character;
StartTime = startTime;
EndTime = endTime;
}

/// <summary>
/// The character being spoken
/// </summary>
[JsonInclude]
[JsonPropertyName("character")]
public string Character { get; private set; }

/// <summary>
/// The time in seconds when this character starts being spoken
/// </summary>
[JsonInclude]
[JsonPropertyName("character_start_times_seconds")]
public double StartTime { get; private set; }

/// <summary>
/// The time in seconds when this character finishes being spoken
/// </summary>
[JsonInclude]
[JsonPropertyName("character_end_times_seconds")]
public double EndTime { get; private set; }
}
}
5 changes: 4 additions & 1 deletion ElevenLabs-DotNet/Common/VoiceClip.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@ namespace ElevenLabs
{
public sealed class VoiceClip : GeneratedClip
{
internal VoiceClip(string id, string text, Voice voice, ReadOnlyMemory<byte> clipData) : base(id, text, clipData)
internal VoiceClip(string id, string text, Voice voice, ReadOnlyMemory<byte> clipData, int sampleRate = 44100)
: base(id, text, clipData, sampleRate)
{
Voice = voice;
}

public Voice Voice { get; }

public TimestampedTranscriptCharacter[] TimestampedTranscriptCharacters { get; internal init; }
}
}
7 changes: 6 additions & 1 deletion ElevenLabs-DotNet/ElevenLabs-DotNet.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@ All copyrights, trademarks, logos, and assets are the property of their respecti
<SignAssembly>false</SignAssembly>
<IncludeSymbols>true</IncludeSymbols>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Version>3.1.0</Version>
<Version>3.4.0</Version>
<PackageReleaseNotes>
Version 3.4.0
- Added additional request properties for TextToSpeechRequest
- previous_text, next_text, previous_request_ids, next_request_ids, languageCode, withTimestamps
- Added support for transcription timestamps in TextToSpeechResponse
- Added support for language code in TextToSpeechRequest
Version 3.1.0
- Refactored TextToSpeechEndpoint endpoint to accept TextToSpeechRequest object
- Added text encoding options to TextToSpeechRequest
Expand Down
16 changes: 16 additions & 0 deletions ElevenLabs-DotNet/Extensions/Extensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

namespace ElevenLabs.Extensions
{
public static class Extensions
{
public static int GetSampleRate(this OutputFormat format) => format switch
{
OutputFormat.PCM_16000 => 16000,
OutputFormat.PCM_22050 => 22050,
OutputFormat.PCM_24000 => 24000,
OutputFormat.PCM_44100 => 44100,
_ => 44100
};
}
}
37 changes: 37 additions & 0 deletions ElevenLabs-DotNet/TextToSpeech/Alignment.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System.Text.Json.Serialization;

namespace ElevenLabs.TextToSpeech
{
internal sealed class Alignment
{
[JsonInclude]
[JsonPropertyName("characters")]
public string[] Characters { get; private set; }

[JsonInclude]
[JsonPropertyName("character_start_times_seconds")]
public double[] StartTimes { get; private set; }

[JsonInclude]
[JsonPropertyName("character_end_times_seconds")]
public double[] EndTimes { get; private set; }

public static implicit operator TimestampedTranscriptCharacter[](Alignment alignment)
{
if (alignment == null) { return null; }
var characters = alignment.Characters;
var startTimes = alignment.StartTimes;
var endTimes = alignment.EndTimes;
var timestampedTranscriptCharacters = new TimestampedTranscriptCharacter[characters.Length];

for (var i = 0; i < characters.Length; i++)
{
timestampedTranscriptCharacters[i] = new TimestampedTranscriptCharacter(characters[i], startTimes[i], endTimes[i]);
}

return timestampedTranscriptCharacters;
}
}
}
Loading
Loading