Skip to content

Commit

Permalink
Add CMake option to disable/enable TTS support
Browse files Browse the repository at this point in the history
  • Loading branch information
rkjaran committed Dec 21, 2023
1 parent 7634f5f commit b0b444a
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 119 deletions.
13 changes: 9 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ option(SHERPA_ONNX_ENABLE_C_API "Whether to build C API" ON)
option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" ON)
option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF)
option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON)
option(SHERPA_ONNX_ENABLE_TTS "Whether to build with TTS capability" ON)

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
Expand Down Expand Up @@ -99,6 +100,8 @@ message(STATUS "SHERPA_ONNX_ENABLE_JNI ${SHERPA_ONNX_ENABLE_JNI}")
message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}")
message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}")
message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}")
message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}")


if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
Expand Down Expand Up @@ -193,10 +196,12 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET)
include(asio)
endif()

include(espeak-ng-for-piper)
set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR})
message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}")
include(piper-phonemize)
if(SHERPA_ONNX_ENABLE_TTS)
include(espeak-ng-for-piper)
set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR})
message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}")
include(piper-phonemize)
endif()

add_subdirectory(sherpa-onnx)

Expand Down
10 changes: 9 additions & 1 deletion sherpa-onnx/c-api/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
include_directories(${CMAKE_SOURCE_DIR})
add_library(sherpa-onnx-c-api c-api.cc)

if(SHERPA_ONNX_ENABLE_TTS)
target_sources(sherpa-onnx-c-api PRIVATE c-api-tts.cc)
endif()

target_link_libraries(sherpa-onnx-c-api sherpa-onnx-core)

if(BUILD_SHARED_LIBS)
target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_SHARED_LIBS=1)
target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_MAIN_LIB=1)
endif()

if(SHERPA_ONNX_ENABLE_TTS)
target_compile_definitions(sherpa-onnx-c-api PUBLIC SHERPA_ONNX_ENABLE_TTS=1)
endif()

install(TARGETS sherpa-onnx-c-api DESTINATION lib)

install(FILES c-api.h
DESTINATION include/sherpa-onnx/c-api
)

98 changes: 98 additions & 0 deletions sherpa-onnx/c-api/c-api-tts.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// sherpa-onnx/c-api/c-api-tts.cc
//
// Copyright (c) 2023 Xiaomi Corporation
#include "sherpa-onnx/c-api/c-api.h"

#include <cstdio>
#include <memory>

#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/csrc/wave-writer.h"

#define SHERPA_ONNX_OR(x, y) (x ? x : y)

struct SherpaOnnxOfflineTts {
std::unique_ptr<sherpa_onnx::OfflineTts> impl;
};

SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
const SherpaOnnxOfflineTtsConfig *config) {
sherpa_onnx::OfflineTtsConfig tts_config;

tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, "");
tts_config.model.vits.lexicon =
SHERPA_ONNX_OR(config->model.vits.lexicon, "");
tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, "");
tts_config.model.vits.data_dir =
SHERPA_ONNX_OR(config->model.vits.data_dir, "");
tts_config.model.vits.noise_scale =
SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667);
tts_config.model.vits.noise_scale_w =
SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8);
tts_config.model.vits.length_scale =
SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0);

tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
tts_config.model.debug = config->model.debug;
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);

if (tts_config.model.debug) {
fprintf(stderr, "%s\n", tts_config.ToString().c_str());
}

SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;

tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);

return tts;
}

void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }

int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) {
return tts->impl->SampleRate();
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
float speed) {
return SherpaOnnxOfflineTtsGenerateWithCallback(tts, text, sid, speed,
nullptr);
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
SherpaOnnxGeneratedAudioCallback callback) {
sherpa_onnx::GeneratedAudio audio =
tts->impl->Generate(text, sid, speed, callback);

if (audio.samples.empty()) {
return nullptr;
}

SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio;

float *samples = new float[audio.samples.size()];
std::copy(audio.samples.begin(), audio.samples.end(), samples);

ans->samples = samples;
ans->n = audio.samples.size();
ans->sample_rate = audio.sample_rate;

return ans;
}

void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
const SherpaOnnxGeneratedAudio *p) {
if (p) {
delete[] p->samples;
delete p;
}
}

int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
int32_t sample_rate, const char *filename) {
return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
}
88 changes: 0 additions & 88 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-writer.h"

struct SherpaOnnxOnlineRecognizer {
std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
Expand Down Expand Up @@ -534,89 +532,3 @@ void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) {
void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) {
p->impl->Reset();
}

struct SherpaOnnxOfflineTts {
std::unique_ptr<sherpa_onnx::OfflineTts> impl;
};

SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
const SherpaOnnxOfflineTtsConfig *config) {
sherpa_onnx::OfflineTtsConfig tts_config;

tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, "");
tts_config.model.vits.lexicon =
SHERPA_ONNX_OR(config->model.vits.lexicon, "");
tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, "");
tts_config.model.vits.data_dir =
SHERPA_ONNX_OR(config->model.vits.data_dir, "");
tts_config.model.vits.noise_scale =
SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667);
tts_config.model.vits.noise_scale_w =
SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8);
tts_config.model.vits.length_scale =
SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0);

tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
tts_config.model.debug = config->model.debug;
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);

if (tts_config.model.debug) {
fprintf(stderr, "%s\n", tts_config.ToString().c_str());
}

SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;

tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);

return tts;
}

void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }

int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) {
return tts->impl->SampleRate();
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
float speed) {
return SherpaOnnxOfflineTtsGenerateWithCallback(tts, text, sid, speed,
nullptr);
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
SherpaOnnxGeneratedAudioCallback callback) {
sherpa_onnx::GeneratedAudio audio =
tts->impl->Generate(text, sid, speed, callback);

if (audio.samples.empty()) {
return nullptr;
}

SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio;

float *samples = new float[audio.samples.size()];
std::copy(audio.samples.begin(), audio.samples.end(), samples);

ans->samples = samples;
ans->n = audio.samples.size();
ans->sample_rate = audio.sample_rate;

return ans;
}

void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
const SherpaOnnxGeneratedAudio *p) {
if (p) {
delete[] p->samples;
delete p;
}
}

int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
int32_t sample_rate, const char *filename) {
return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
}
4 changes: 4 additions & 0 deletions sherpa-onnx/c-api/c-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,8 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
SherpaOnnxVoiceActivityDetector *p);

#if defined(SHERPA_ONNX_ENABLE_TTS)

// ============================================================
// For offline Text-to-Speech (i.e., non-streaming TTS)
// ============================================================
Expand Down Expand Up @@ -677,6 +679,8 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
int32_t sample_rate,
const char *filename);

#endif // SHERPA_ONNX_ENABLE_TTS

#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
Expand Down
Loading

0 comments on commit b0b444a

Please sign in to comment.