diff --git a/.github/scripts/test-swift.sh b/.github/scripts/test-swift.sh index 0da23eb24..f333bc0a9 100755 --- a/.github/scripts/test-swift.sh +++ b/.github/scripts/test-swift.sh @@ -7,6 +7,18 @@ echo "pwd: $PWD" cd swift-api-examples ls -lh +./run-tts-vits.sh +ls -lh +rm -rf vits-piper-* + +./run-tts-matcha-zh.sh +ls -lh +rm -rf matcha-icefall-* + +./run-tts-matcha-en.sh +ls -lh +rm -rf matcha-icefall-* + ./run-speaker-diarization.sh rm -rf *.onnx rm -rf sherpa-onnx-pyannote-segmentation-3-0 @@ -38,8 +50,9 @@ popd ls -lh /Users/fangjun/Desktop cat /Users/fangjun/Desktop/Obama.srt -./run-tts.sh -ls -lh +rm -rf sherpa-onnx-whisper* +rm -f *.onnx +rm /Users/fangjun/Desktop/Obama.wav ./run-decode-file.sh rm decode-file @@ -48,5 +61,4 @@ sed -i.bak '20d' ./decode-file.swift ./run-decode-file-non-streaming.sh - ls -lh diff --git a/java-api-examples/run-non-streaming-tts-matcha-en.sh b/java-api-examples/run-non-streaming-tts-matcha-en.sh index ce0289fc9..ba03beaf2 100755 --- a/java-api-examples/run-non-streaming-tts-matcha-en.sh +++ b/java-api-examples/run-non-streaming-tts-matcha-en.sh @@ -31,7 +31,7 @@ fi # to download more models if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 - tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 + tar xf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 fi diff --git a/nodejs-addon-examples/README.md b/nodejs-addon-examples/README.md index ec2f23da2..2de8a2143 100644 --- a/nodejs-addon-examples/README.md +++ b/nodejs-addon-examples/README.md @@ -350,7 +350,7 @@ node ./test_vad_asr_non_streaming_sense_voice_microphone.js ### Text-to-speech with MatchaTTS models (English TTS) ```bash wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 -tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 +tar xf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx diff --git a/nodejs-examples/README.md b/nodejs-examples/README.md index 0c59b7bcc..3db3a2952 100644 --- a/nodejs-examples/README.md +++ b/nodejs-examples/README.md @@ -70,7 +70,7 @@ You can use the following command to run it: ```bash wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 -tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 +tar xf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx diff --git a/swift-api-examples/.gitignore b/swift-api-examples/.gitignore index 97b559df4..1a90488aa 100644 --- a/swift-api-examples/.gitignore +++ b/swift-api-examples/.gitignore @@ -2,7 +2,7 @@ decode-file decode-file-non-streaming generate-subtitles spoken-language-identification -tts +tts-vits vits-vctk sherpa-onnx-paraformer-zh-2023-09-14 !*.sh @@ -10,3 +10,5 @@ sherpa-onnx-paraformer-zh-2023-09-14 streaming-hlg-decode-file keyword-spotting-from-file add-punctuations +tts-matcha-zh +tts-matcha-en diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index b100ef408..6d11a0011 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -719,9 +719,9 @@ class SherpaOnnxVoiceActivityDetectorWrapper { // offline tts func sherpaOnnxOfflineTtsVitsModelConfig( - model: String, - lexicon: String, - tokens: String, + model: String = "", + lexicon: String = "", + tokens: String = "", dataDir: String = "", noiseScale: Float = 0.667, noiseScaleW: Float = 0.8, @@ -739,8 +739,30 @@ func sherpaOnnxOfflineTtsVitsModelConfig( dict_dir: toCPointer(dictDir)) } +func sherpaOnnxOfflineTtsMatchaModelConfig( + acousticModel: String = "", + vocoder: String = "", + lexicon: String = "", + tokens: String = "", + dataDir: String = "", + noiseScale: Float = 0.667, + lengthScale: Float = 1.0, + dictDir: String = "" +) -> SherpaOnnxOfflineTtsMatchaModelConfig { + return SherpaOnnxOfflineTtsMatchaModelConfig( + acoustic_model: toCPointer(acousticModel), + vocoder: toCPointer(vocoder), + lexicon: toCPointer(lexicon), + tokens: toCPointer(tokens), + data_dir: toCPointer(dataDir), + noise_scale: noiseScale, + length_scale: lengthScale, + dict_dir: toCPointer(dictDir)) +} + func sherpaOnnxOfflineTtsModelConfig( - vits: SherpaOnnxOfflineTtsVitsModelConfig, + vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(), + matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(), numThreads: Int = 1, debug: Int = 0, provider: String = "cpu" @@ -749,7 +771,8 @@ func sherpaOnnxOfflineTtsModelConfig( vits: vits, num_threads: Int32(numThreads), debug: Int32(debug), - provider: toCPointer(provider) + provider: toCPointer(provider), + matcha: matcha ) } diff --git a/swift-api-examples/run-tts-matcha-en.sh b/swift-api-examples/run-tts-matcha-en.sh new file mode 100755 index 000000000..1f23f56ec --- /dev/null +++ b/swift-api-examples/run-tts-matcha-en.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -d ../build-swift-macos ]; then + echo "Please run ../build-swift-macos.sh first!" + exit 1 +fi + +# please visit +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker +# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker +# to download more models +if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 + tar xf matcha-icefall-en_US-ljspeech.tar.bz2 + rm matcha-icefall-en_US-ljspeech.tar.bz2 +fi + +if [ ! -f ./hifigan_v2.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +fi + +if [ ! -e ./tts ]; then + # Note: We use -lc++ to link against libc++ instead of libstdc++ + swiftc \ + -lc++ \ + -I ../build-swift-macos/install/include \ + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ + ./tts-matcha-en.swift ./SherpaOnnx.swift \ + -L ../build-swift-macos/install/lib/ \ + -l sherpa-onnx \ + -l onnxruntime \ + -o tts-matcha-en + + strip tts-matcha-en +else + echo "./tts-matcha-en exists - skip building" +fi + +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH +./tts-matcha-en diff --git a/swift-api-examples/run-tts-matcha-zh.sh b/swift-api-examples/run-tts-matcha-zh.sh new file mode 100755 index 000000000..decbbde4a --- /dev/null +++ b/swift-api-examples/run-tts-matcha-zh.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -d ../build-swift-macos ]; then + echo "Please run ../build-swift-macos.sh first!" + exit 1 +fi + +# please visit +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker +# to download more models +if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 + tar xvf matcha-icefall-zh-baker.tar.bz2 + rm matcha-icefall-zh-baker.tar.bz2 +fi + +if [ ! -f ./hifigan_v2.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +fi + +if [ ! -e ./tts ]; then + # Note: We use -lc++ to link against libc++ instead of libstdc++ + swiftc \ + -lc++ \ + -I ../build-swift-macos/install/include \ + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ + ./tts-matcha-zh.swift ./SherpaOnnx.swift \ + -L ../build-swift-macos/install/lib/ \ + -l sherpa-onnx \ + -l onnxruntime \ + -o tts-matcha-zh + + strip tts-matcha-zh +else + echo "./tts-matcha-zh exists - skip building" +fi + +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH +./tts-matcha-zh diff --git a/swift-api-examples/run-tts.sh b/swift-api-examples/run-tts-vits.sh similarity index 86% rename from swift-api-examples/run-tts.sh rename to swift-api-examples/run-tts-vits.sh index 5604a43a8..4f385bd72 100755 --- a/swift-api-examples/run-tts.sh +++ b/swift-api-examples/run-tts-vits.sh @@ -21,16 +21,16 @@ if [ ! -e ./tts ]; then -lc++ \ -I ../build-swift-macos/install/include \ -import-objc-header ./SherpaOnnx-Bridging-Header.h \ - ./tts.swift ./SherpaOnnx.swift \ + ./tts-vits.swift ./SherpaOnnx.swift \ -L ../build-swift-macos/install/lib/ \ -l sherpa-onnx \ -l onnxruntime \ - -o tts + -o tts-vits - strip tts + strip tts-vits else - echo "./tts exists - skip building" + echo "./tts-vits exists - skip building" fi export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH -./tts +./tts-vits diff --git a/swift-api-examples/tts-matcha-en.swift b/swift-api-examples/tts-matcha-en.swift new file mode 100644 index 000000000..ec55f72d2 --- /dev/null +++ b/swift-api-examples/tts-matcha-en.swift @@ -0,0 +1,65 @@ +class MyClass { + func playSamples(samples: [Float]) { + print("Play \(samples.count) samples") + } +} + +func run() { + let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx" + let vocoder = "./hifigan_v2.onnx" + let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt" + let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data" + let matcha = sherpaOnnxOfflineTtsMatchaModelConfig( + acousticModel: acousticModel, + vocoder: vocoder, + tokens: tokens, + dataDir: dataDir + ) + let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0) + var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig) + + let myClass = MyClass() + + // We use Unretained here so myClass must be kept alive as the callback is invoked + // + // See also + // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6 + let arg = Unmanaged.passUnretained(myClass).toOpaque() + + let callback: TtsCallbackWithArg = { samples, n, arg in + let o = Unmanaged.fromOpaque(arg!).takeUnretainedValue() + var savedSamples: [Float] = [] + for index in 0...passUnretained(myClass).toOpaque() + + let callback: TtsCallbackWithArg = { samples, n, arg in + let o = Unmanaged.fromOpaque(arg!).takeUnretainedValue() + var savedSamples: [Float] = [] + for index in 0..