From 0fdb2044e15c71725b41e568dad875bd4d1efaa0 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 23 Oct 2023 12:31:54 +0800 Subject: [PATCH] Add jni interface and kotlin API examples for TTS. (#381) --- kotlin-api-examples/.gitignore | 3 + kotlin-api-examples/Main.kt | 22 +++ kotlin-api-examples/Tts.kt | 112 +++++++++++++ kotlin-api-examples/run.sh | 27 +-- sherpa-onnx/csrc/lexicon.cc | 56 +++++-- sherpa-onnx/csrc/lexicon.h | 16 +- sherpa-onnx/csrc/offline-tts-impl.cc | 8 + sherpa-onnx/csrc/offline-tts-impl.h | 10 ++ sherpa-onnx/csrc/offline-tts-vits-impl.h | 13 ++ sherpa-onnx/csrc/offline-tts-vits-model.cc | 17 ++ sherpa-onnx/csrc/offline-tts-vits-model.h | 8 + sherpa-onnx/csrc/offline-tts.cc | 5 + sherpa-onnx/csrc/offline-tts.h | 10 ++ sherpa-onnx/csrc/symbol-table.cc | 3 +- sherpa-onnx/jni/jni.cc | 181 +++++++++++++++++++-- 15 files changed, 454 insertions(+), 37 deletions(-) create mode 100644 kotlin-api-examples/.gitignore create mode 100644 kotlin-api-examples/Tts.kt diff --git a/kotlin-api-examples/.gitignore b/kotlin-api-examples/.gitignore new file mode 100644 index 000000000..196e33f05 --- /dev/null +++ b/kotlin-api-examples/.gitignore @@ -0,0 +1,3 @@ +hs_err* +main.jar +vits-zh-aishell3 diff --git a/kotlin-api-examples/Main.kt b/kotlin-api-examples/Main.kt index 811b3d68d..69c41dfd1 100644 --- a/kotlin-api-examples/Main.kt +++ b/kotlin-api-examples/Main.kt @@ -3,6 +3,28 @@ package com.k2fsa.sherpa.onnx import android.content.res.AssetManager fun main() { + testTts() + testAsr() +} + +fun testTts() { + var config = OfflineTtsConfig( + model=OfflineTtsModelConfig( + vits=OfflineTtsVitsModelConfig( + model="./vits-zh-aishell3/vits-aishell3.onnx", + lexicon="./vits-zh-aishell3/lexicon.txt", + tokens="./vits-zh-aishell3/tokens.txt", + ), + numThreads=1, + debug=true, + ) + ) + val tts = OfflineTts(config=config) + val audio = tts.generate(text="林美丽最美丽!", sid=99, speed=1.2f) + audio.save(filename="99.wav") +} + +fun testAsr() { var featConfig = FeatureConfig( sampleRate = 16000, featureDim = 80, diff --git a/kotlin-api-examples/Tts.kt b/kotlin-api-examples/Tts.kt new file mode 100644 index 000000000..09f3eb901 --- /dev/null +++ b/kotlin-api-examples/Tts.kt @@ -0,0 +1,112 @@ +// Copyright (c) 2023 Xiaomi Corporation +package com.k2fsa.sherpa.onnx + +import android.content.res.AssetManager + +data class OfflineTtsVitsModelConfig( + var model: String, + var lexicon: String, + var tokens: String, + var noiseScale: Float = 0.667f, + var noiseScaleW: Float = 0.8f, + var lengthScale: Float = 1.0f, +) + +data class OfflineTtsModelConfig( + var vits: OfflineTtsVitsModelConfig, + var numThreads: Int = 1, + var debug: Boolean = false, + var provider: String = "cpu", +) + +data class OfflineTtsConfig( + var model: OfflineTtsModelConfig, +) + +class GeneratedAudio( + val samples : FloatArray, + val sampleRate: Int, +) { + fun save(filename: String) = saveImpl(filename=filename, samples=samples, sampleRate=sampleRate) + + private external fun saveImpl( + filename: String, + samples: FloatArray, + sampleRate: Int + ): Boolean +} + +class OfflineTts( + assetManager: AssetManager? = null, + var config: OfflineTtsConfig, +) { + private var ptr: Long + + init { + if (assetManager != null) { + ptr = new(assetManager, config) + } else { + ptr = newFromFile(config) + } + } + + fun generate( + text: String, + sid: Int = 0, + speed: Float = 1.0f + ): GeneratedAudio { + var objArray = generateImpl(ptr, text=text, sid=sid, speed=speed) + return GeneratedAudio(samples=objArray[0] as FloatArray, + sampleRate=objArray[1] as Int) + } + + fun allocate(assetManager: AssetManager? = null) { + if (ptr == 0L) { + if (assetManager != null) { + ptr = new(assetManager, config) + } else { + ptr = newFromFile(config) + } + } + } + + fun free() { + if (ptr != 0L) { + delete(ptr) + ptr = 0 + } + } + + protected fun finalize() { + delete(ptr) + } + + private external fun new( + assetManager: AssetManager, + config: OfflineTtsConfig, + ): Long + + private external fun newFromFile( + config: OfflineTtsConfig, + ): Long + + private external fun delete(ptr: Long) + + // The returned array has two entries: + // - the first entry is an 1-D float array containing audio samples. + // Each sample is normalized to the range [-1, 1] + // - the second entry is the sample rate + external fun generateImpl( + ptr: Long, + text: String, + sid: Int = 0, + speed: Float = 1.0f + ): Array + + companion object { + init { + System.loadLibrary("sherpa-onnx-jni") + } + } + +} diff --git a/kotlin-api-examples/run.sh b/kotlin-api-examples/run.sh index 6b8ba65ce..a4c368b18 100755 --- a/kotlin-api-examples/run.sh +++ b/kotlin-api-examples/run.sh @@ -6,21 +6,24 @@ set -e + cd .. mkdir -p build cd build -cmake \ - -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ - -DSHERPA_ONNX_ENABLE_TESTS=OFF \ - -DSHERPA_ONNX_ENABLE_CHECK=OFF \ - -DBUILD_SHARED_LIBS=ON \ - -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_ONNX_ENABLE_JNI=ON \ - .. - -make -j4 -ls -lh lib +if [ ! -f ../build/lib/libsherpa-onnx-jni.dylib ]; then + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib +fi export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH @@ -31,7 +34,7 @@ if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21 fi -kotlinc-jvm -include-runtime -d main.jar Main.kt WaveReader.kt SherpaOnnx.kt faked-asset-manager.kt +kotlinc-jvm -include-runtime -d main.jar Main.kt WaveReader.kt SherpaOnnx.kt faked-asset-manager.kt Tts.kt ls -lh main.jar diff --git a/sherpa-onnx/csrc/lexicon.cc b/sherpa-onnx/csrc/lexicon.cc index 8dafe7bfe..717d426d9 100644 --- a/sherpa-onnx/csrc/lexicon.cc +++ b/sherpa-onnx/csrc/lexicon.cc @@ -10,7 +10,15 @@ #include #include +#if __ANDROID_API__ >= 9 +#include + +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + #include "sherpa-onnx/csrc/macros.h" +#include "sherpa-onnx/csrc/onnx-utils.h" #include "sherpa-onnx/csrc/text-utils.h" namespace sherpa_onnx { @@ -22,11 +30,9 @@ static void ToLowerCase(std::string *in_out) { // Note: We don't use SymbolTable here since tokens may contain a blank // in the first column -static std::unordered_map ReadTokens( - const std::string &tokens) { +static std::unordered_map ReadTokens(std::istream &is) { std::unordered_map token2id; - std::ifstream is(tokens); std::string line; std::string sym; @@ -80,11 +86,43 @@ Lexicon::Lexicon(const std::string &lexicon, const std::string &tokens, bool debug /*= false*/) : debug_(debug) { InitLanguage(language); - InitTokens(tokens); - InitLexicon(lexicon); + + { + std::ifstream is(tokens); + InitTokens(is); + } + + { + std::ifstream is(lexicon); + InitLexicon(is); + } + InitPunctuations(punctuations); } +#if __ANDROID_API__ >= 9 +Lexicon::Lexicon(AAssetManager *mgr, const std::string &lexicon, + const std::string &tokens, const std::string &punctuations, + const std::string &language, bool debug /*= false*/) + : debug_(debug) { + InitLanguage(language); + + { + auto buf = ReadFile(mgr, tokens); + std::istrstream is(buf.data(), buf.size()); + InitTokens(is); + } + + { + auto buf = ReadFile(mgr, lexicon); + std::istrstream is(buf.data(), buf.size()); + InitLexicon(is); + } + + InitPunctuations(punctuations); +} +#endif + std::vector Lexicon::ConvertTextToTokenIds( const std::string &text) const { switch (language_) { @@ -192,9 +230,7 @@ std::vector Lexicon::ConvertTextToTokenIdsEnglish( return ans; } -void Lexicon::InitTokens(const std::string &tokens) { - token2id_ = ReadTokens(tokens); -} +void Lexicon::InitTokens(std::istream &is) { token2id_ = ReadTokens(is); } void Lexicon::InitLanguage(const std::string &_lang) { std::string lang(_lang); @@ -209,9 +245,7 @@ void Lexicon::InitLanguage(const std::string &_lang) { } } -void Lexicon::InitLexicon(const std::string &lexicon) { - std::ifstream is(lexicon); - +void Lexicon::InitLexicon(std::istream &is) { std::string word; std::vector token_list; std::string line; diff --git a/sherpa-onnx/csrc/lexicon.h b/sherpa-onnx/csrc/lexicon.h index 211eb607f..a01004f24 100644 --- a/sherpa-onnx/csrc/lexicon.h +++ b/sherpa-onnx/csrc/lexicon.h @@ -6,11 +6,17 @@ #define SHERPA_ONNX_CSRC_LEXICON_H_ #include +#include #include #include #include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + namespace sherpa_onnx { // TODO(fangjun): Refactor it to an abstract class @@ -20,6 +26,12 @@ class Lexicon { const std::string &punctuations, const std::string &language, bool debug = false); +#if __ANDROID_API__ >= 9 + Lexicon(AAssetManager *mgr, const std::string &lexicon, + const std::string &tokens, const std::string &punctuations, + const std::string &language, bool debug = false); +#endif + std::vector ConvertTextToTokenIds(const std::string &text) const; private: @@ -30,8 +42,8 @@ class Lexicon { const std::string &text) const; void InitLanguage(const std::string &lang); - void InitTokens(const std::string &tokens); - void InitLexicon(const std::string &lexicon); + void InitTokens(std::istream &is); + void InitLexicon(std::istream &is); void InitPunctuations(const std::string &punctuations); private: diff --git a/sherpa-onnx/csrc/offline-tts-impl.cc b/sherpa-onnx/csrc/offline-tts-impl.cc index f260499b6..063730db8 100644 --- a/sherpa-onnx/csrc/offline-tts-impl.cc +++ b/sherpa-onnx/csrc/offline-tts-impl.cc @@ -16,4 +16,12 @@ std::unique_ptr OfflineTtsImpl::Create( return std::make_unique(config); } +#if __ANDROID_API__ >= 9 +std::unique_ptr OfflineTtsImpl::Create( + AAssetManager *mgr, const OfflineTtsConfig &config) { + // TODO(fangjun): Support other types + return std::make_unique(mgr, config); +} +#endif + } // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/offline-tts-impl.h b/sherpa-onnx/csrc/offline-tts-impl.h index 41835a99d..d064018da 100644 --- a/sherpa-onnx/csrc/offline-tts-impl.h +++ b/sherpa-onnx/csrc/offline-tts-impl.h @@ -8,6 +8,11 @@ #include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + #include "sherpa-onnx/csrc/offline-tts.h" namespace sherpa_onnx { @@ -18,6 +23,11 @@ class OfflineTtsImpl { static std::unique_ptr Create(const OfflineTtsConfig &config); +#if __ANDROID_API__ >= 9 + static std::unique_ptr Create(AAssetManager *mgr, + const OfflineTtsConfig &config); +#endif + virtual GeneratedAudio Generate(const std::string &text, int64_t sid = 0, float speed = 1.0) const = 0; }; diff --git a/sherpa-onnx/csrc/offline-tts-vits-impl.h b/sherpa-onnx/csrc/offline-tts-vits-impl.h index 142035689..847fb305b 100644 --- a/sherpa-onnx/csrc/offline-tts-vits-impl.h +++ b/sherpa-onnx/csrc/offline-tts-vits-impl.h @@ -9,6 +9,11 @@ #include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + #include "sherpa-onnx/csrc/lexicon.h" #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-tts-impl.h" @@ -24,6 +29,14 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { model_->Punctuations(), model_->Language(), config.model.debug) {} +#if __ANDROID_API__ >= 9 + OfflineTtsVitsImpl(AAssetManager *mgr, const OfflineTtsConfig &config) + : model_(std::make_unique(mgr, config.model)), + lexicon_(mgr, config.model.vits.lexicon, config.model.vits.tokens, + model_->Punctuations(), model_->Language(), + config.model.debug) {} +#endif + GeneratedAudio Generate(const std::string &text, int64_t sid = 0, float speed = 1.0) const override { int32_t num_speakers = model_->NumSpeakers(); diff --git a/sherpa-onnx/csrc/offline-tts-vits-model.cc b/sherpa-onnx/csrc/offline-tts-vits-model.cc index 060aa8b94..53d8449e0 100644 --- a/sherpa-onnx/csrc/offline-tts-vits-model.cc +++ b/sherpa-onnx/csrc/offline-tts-vits-model.cc @@ -26,6 +26,17 @@ class OfflineTtsVitsModel::Impl { Init(buf.data(), buf.size()); } +#if __ANDROID_API__ >= 9 + Impl(AAssetManager *mgr, const OfflineTtsModelConfig &config) + : config_(config), + env_(ORT_LOGGING_LEVEL_WARNING), + sess_opts_(GetSessionOptions(config)), + allocator_{} { + auto buf = ReadFile(mgr, config.vits.model); + Init(buf.data(), buf.size()); + } +#endif + Ort::Value Run(Ort::Value x, int64_t sid, float speed) { auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); @@ -141,6 +152,12 @@ class OfflineTtsVitsModel::Impl { OfflineTtsVitsModel::OfflineTtsVitsModel(const OfflineTtsModelConfig &config) : impl_(std::make_unique(config)) {} +#if __ANDROID_API__ >= 9 +OfflineTtsVitsModel::OfflineTtsVitsModel(AAssetManager *mgr, + const OfflineTtsModelConfig &config) + : impl_(std::make_unique(mgr, config)) {} +#endif + OfflineTtsVitsModel::~OfflineTtsVitsModel() = default; Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, int64_t sid /*=0*/, diff --git a/sherpa-onnx/csrc/offline-tts-vits-model.h b/sherpa-onnx/csrc/offline-tts-vits-model.h index 31addfdfe..dfe743cab 100644 --- a/sherpa-onnx/csrc/offline-tts-vits-model.h +++ b/sherpa-onnx/csrc/offline-tts-vits-model.h @@ -8,6 +8,11 @@ #include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + #include "onnxruntime_cxx_api.h" // NOLINT #include "sherpa-onnx/csrc/offline-tts-model-config.h" @@ -18,6 +23,9 @@ class OfflineTtsVitsModel { ~OfflineTtsVitsModel(); explicit OfflineTtsVitsModel(const OfflineTtsModelConfig &config); +#if __ANDROID_API__ >= 9 + OfflineTtsVitsModel(AAssetManager *mgr, const OfflineTtsModelConfig &config); +#endif /** Run the model. * diff --git a/sherpa-onnx/csrc/offline-tts.cc b/sherpa-onnx/csrc/offline-tts.cc index 94a288354..b4d19476e 100644 --- a/sherpa-onnx/csrc/offline-tts.cc +++ b/sherpa-onnx/csrc/offline-tts.cc @@ -26,6 +26,11 @@ std::string OfflineTtsConfig::ToString() const { OfflineTts::OfflineTts(const OfflineTtsConfig &config) : impl_(OfflineTtsImpl::Create(config)) {} +#if __ANDROID_API__ >= 9 +OfflineTts::OfflineTts(AAssetManager *mgr, const OfflineTtsConfig &config) + : impl_(OfflineTtsImpl::Create(mgr, config)) {} +#endif + OfflineTts::~OfflineTts() = default; GeneratedAudio OfflineTts::Generate(const std::string &text, int64_t sid /*=0*/, diff --git a/sherpa-onnx/csrc/offline-tts.h b/sherpa-onnx/csrc/offline-tts.h index c2d87461b..0b6427aa5 100644 --- a/sherpa-onnx/csrc/offline-tts.h +++ b/sherpa-onnx/csrc/offline-tts.h @@ -9,6 +9,11 @@ #include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + #include "sherpa-onnx/csrc/offline-tts-model-config.h" #include "sherpa-onnx/csrc/parse-options.h" @@ -38,6 +43,11 @@ class OfflineTts { public: ~OfflineTts(); explicit OfflineTts(const OfflineTtsConfig &config); + +#if __ANDROID_API__ >= 9 + OfflineTts(AAssetManager *mgr, const OfflineTtsConfig &config); +#endif + // @param text A string containing words separated by spaces // @param sid Speaker ID. Used only for multi-speaker models, e.g., models // trained using the VCTK dataset. It is not used for diff --git a/sherpa-onnx/csrc/symbol-table.cc b/sherpa-onnx/csrc/symbol-table.cc index 6898d8987..e18f0bab3 100644 --- a/sherpa-onnx/csrc/symbol-table.cc +++ b/sherpa-onnx/csrc/symbol-table.cc @@ -7,12 +7,13 @@ #include #include #include -#include #include "sherpa-onnx/csrc/base64-decode.h" #include "sherpa-onnx/csrc/onnx-utils.h" #if __ANDROID_API__ >= 9 +#include + #include "android/asset_manager.h" #include "android/asset_manager_jni.h" #endif diff --git a/sherpa-onnx/jni/jni.cc b/sherpa-onnx/jni/jni.cc index 92e6e7203..299f7d751 100644 --- a/sherpa-onnx/jni/jni.cc +++ b/sherpa-onnx/jni/jni.cc @@ -21,10 +21,12 @@ #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-recognizer.h" +#include "sherpa-onnx/csrc/offline-tts.h" #include "sherpa-onnx/csrc/online-recognizer.h" #include "sherpa-onnx/csrc/onnx-utils.h" #include "sherpa-onnx/csrc/voice-activity-detector.h" #include "sherpa-onnx/csrc/wave-reader.h" +#include "sherpa-onnx/csrc/wave-writer.h" #define SHERPA_ONNX_EXTERN_C extern "C" @@ -124,7 +126,7 @@ class SherpaOnnxVad { void Pop() { vad_.Pop(); } - void Clear() { vad_.Clear();} + void Clear() { vad_.Clear(); } const SpeechSegment &Front() const { return vad_.Front(); } @@ -491,8 +493,172 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) { return ans; } +class SherpaOnnxOfflineTts { + public: +#if __ANDROID_API__ >= 9 + SherpaOnnxOfflineTts(AAssetManager *mgr, const OfflineTtsConfig &config) + : tts_(mgr, config) {} +#endif + explicit SherpaOnnxOfflineTts(const OfflineTtsConfig &config) + : tts_(config) {} + + GeneratedAudio Generate(const std::string &text, int64_t sid = 0, + float speed = 1.0) const { + return tts_.Generate(text, sid, speed); + } + + private: + OfflineTts tts_; +}; + +static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { + OfflineTtsConfig ans; + + jclass cls = env->GetObjectClass(config); + jfieldID fid; + + fid = env->GetFieldID(cls, "model", + "Lcom/k2fsa/sherpa/onnx/OfflineTtsModelConfig;"); + jobject model = env->GetObjectField(config, fid); + jclass model_config_cls = env->GetObjectClass(model); + + fid = env->GetFieldID(model_config_cls, "vits", + "Lcom/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig;"); + jobject vits = env->GetObjectField(model, fid); + jclass vits_cls = env->GetObjectClass(vits); + + fid = env->GetFieldID(vits_cls, "model", "Ljava/lang/String;"); + jstring s = (jstring)env->GetObjectField(vits, fid); + const char *p = env->GetStringUTFChars(s, nullptr); + ans.model.vits.model = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(vits_cls, "lexicon", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(vits, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model.vits.lexicon = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(vits_cls, "tokens", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(vits, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model.vits.tokens = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(vits_cls, "noiseScale", "F"); + ans.model.vits.noise_scale = env->GetFloatField(vits, fid); + + fid = env->GetFieldID(vits_cls, "noiseScaleW", "F"); + ans.model.vits.noise_scale_w = env->GetFloatField(vits, fid); + + fid = env->GetFieldID(vits_cls, "lengthScale", "F"); + ans.model.vits.length_scale = env->GetFloatField(vits, fid); + + fid = env->GetFieldID(model_config_cls, "numThreads", "I"); + ans.model.num_threads = env->GetIntField(model, fid); + + fid = env->GetFieldID(model_config_cls, "debug", "Z"); + ans.model.debug = env->GetBooleanField(model, fid); + + fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(model, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model.provider = p; + env->ReleaseStringUTFChars(s, p); + + return ans; +} + } // namespace sherpa_onnx +SHERPA_ONNX_EXTERN_C +JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new( + JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { +#if __ANDROID_API__ >= 9 + AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); + if (!mgr) { + SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + } +#endif + auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); + auto tts = new sherpa_onnx::SherpaOnnxOfflineTts( +#if __ANDROID_API__ >= 9 + mgr, +#endif + config); + + return (jlong)tts; +} + +SHERPA_ONNX_EXTERN_C +JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile( + JNIEnv *env, jobject /*obj*/, jobject _config) { + auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); + auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(config); + + return (jlong)tts; +} + +SHERPA_ONNX_EXTERN_C +JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_delete( + JNIEnv *env, jobject /*obj*/, jlong ptr) { + delete reinterpret_cast(ptr); +} + +// see +// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables +static jobject NewInteger(JNIEnv *env, int32_t value) { + jclass cls = env->FindClass("java/lang/Integer"); + jmethodID constructor = env->GetMethodID(cls, "", "(I)V"); + return env->NewObject(cls, constructor, value); +} + +SHERPA_ONNX_EXTERN_C +JNIEXPORT jobjectArray JNICALL +Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl(JNIEnv *env, jobject /*obj*/, + jlong ptr, jstring text, + jint sid, jfloat speed) { + const char *p_text = env->GetStringUTFChars(text, nullptr); + SHERPA_ONNX_LOGE("string is: %s", p_text); + + auto audio = + reinterpret_cast(ptr)->Generate( + p_text, sid, speed); + + jfloatArray samples_arr = env->NewFloatArray(audio.samples.size()); + env->SetFloatArrayRegion(samples_arr, 0, audio.samples.size(), + audio.samples.data()); + + jobjectArray obj_arr = (jobjectArray)env->NewObjectArray( + 2, env->FindClass("java/lang/Object"), nullptr); + + env->SetObjectArrayElement(obj_arr, 0, samples_arr); + env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, audio.sample_rate)); + + env->ReleaseStringUTFChars(text, p_text); + + return obj_arr; +} + +SHERPA_ONNX_EXTERN_C +JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl( + JNIEnv *env, jobject /*obj*/, jstring filename, jfloatArray samples, + jint sample_rate) { + const char *p_filename = env->GetStringUTFChars(filename, nullptr); + + jfloat *p = env->GetFloatArrayElements(samples, nullptr); + jsize n = env->GetArrayLength(samples); + + bool ok = sherpa_onnx::WriteWave(p_filename, sample_rate, p, n); + + env->ReleaseStringUTFChars(filename, p_filename); + env->ReleaseFloatArrayElements(samples, p, JNI_ABORT); + + return ok; +} + SHERPA_ONNX_EXTERN_C JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_new( JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { @@ -513,6 +679,7 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_new( return (jlong)model; } +SHERPA_ONNX_EXTERN_C JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_newFromFile( JNIEnv *env, jobject /*obj*/, jobject _config) { auto config = sherpa_onnx::GetVadModelConfig(env, _config); @@ -560,20 +727,12 @@ JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_pop(JNIEnv *env, SHERPA_ONNX_EXTERN_C JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_clear(JNIEnv *env, - jobject /*obj*/, - jlong ptr) { + jobject /*obj*/, + jlong ptr) { auto model = reinterpret_cast(ptr); model->Clear(); } -// see -// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables -static jobject NewInteger(JNIEnv *env, int32_t value) { - jclass cls = env->FindClass("java/lang/Integer"); - jmethodID constructor = env->GetMethodID(cls, "", "(I)V"); - return env->NewObject(cls, constructor, value); -} - SHERPA_ONNX_EXTERN_C JNIEXPORT jobjectArray JNICALL Java_com_k2fsa_sherpa_onnx_Vad_front(JNIEnv *env, jobject /*obj*/, jlong ptr) {