Skip to content

Commit

Permalink
Add Swift API for MatchaTTS models. (#1684)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jan 5, 2025
1 parent 1fe5fe4 commit 6f085ba
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 18 deletions.
18 changes: 15 additions & 3 deletions .github/scripts/test-swift.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@ echo "pwd: $PWD"
cd swift-api-examples
ls -lh

./run-tts-vits.sh
ls -lh
rm -rf vits-piper-*

./run-tts-matcha-zh.sh
ls -lh
rm -rf matcha-icefall-*

./run-tts-matcha-en.sh
ls -lh
rm -rf matcha-icefall-*

./run-speaker-diarization.sh
rm -rf *.onnx
rm -rf sherpa-onnx-pyannote-segmentation-3-0
Expand Down Expand Up @@ -38,8 +50,9 @@ popd
ls -lh /Users/fangjun/Desktop
cat /Users/fangjun/Desktop/Obama.srt

./run-tts.sh
ls -lh
rm -rf sherpa-onnx-whisper*
rm -f *.onnx
rm /Users/fangjun/Desktop/Obama.wav

./run-decode-file.sh
rm decode-file
Expand All @@ -48,5 +61,4 @@ sed -i.bak '20d' ./decode-file.swift

./run-decode-file-non-streaming.sh


ls -lh
2 changes: 1 addition & 1 deletion java-api-examples/run-non-streaming-tts-matcha-en.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ fi
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

Expand Down
2 changes: 1 addition & 1 deletion nodejs-addon-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ node ./test_vad_asr_non_streaming_sense_voice_microphone.js
### Text-to-speech with MatchaTTS models (English TTS)
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
Expand Down
2 changes: 1 addition & 1 deletion nodejs-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
Expand Down
4 changes: 3 additions & 1 deletion swift-api-examples/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ decode-file
decode-file-non-streaming
generate-subtitles
spoken-language-identification
tts
tts-vits
vits-vctk
sherpa-onnx-paraformer-zh-2023-09-14
!*.sh
*.bak
streaming-hlg-decode-file
keyword-spotting-from-file
add-punctuations
tts-matcha-zh
tts-matcha-en
33 changes: 28 additions & 5 deletions swift-api-examples/SherpaOnnx.swift
Original file line number Diff line number Diff line change
Expand Up @@ -719,9 +719,9 @@ class SherpaOnnxVoiceActivityDetectorWrapper {

// offline tts
func sherpaOnnxOfflineTtsVitsModelConfig(
model: String,
lexicon: String,
tokens: String,
model: String = "",
lexicon: String = "",
tokens: String = "",
dataDir: String = "",
noiseScale: Float = 0.667,
noiseScaleW: Float = 0.8,
Expand All @@ -739,8 +739,30 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
dict_dir: toCPointer(dictDir))
}

func sherpaOnnxOfflineTtsMatchaModelConfig(
acousticModel: String = "",
vocoder: String = "",
lexicon: String = "",
tokens: String = "",
dataDir: String = "",
noiseScale: Float = 0.667,
lengthScale: Float = 1.0,
dictDir: String = ""
) -> SherpaOnnxOfflineTtsMatchaModelConfig {
return SherpaOnnxOfflineTtsMatchaModelConfig(
acoustic_model: toCPointer(acousticModel),
vocoder: toCPointer(vocoder),
lexicon: toCPointer(lexicon),
tokens: toCPointer(tokens),
data_dir: toCPointer(dataDir),
noise_scale: noiseScale,
length_scale: lengthScale,
dict_dir: toCPointer(dictDir))
}

func sherpaOnnxOfflineTtsModelConfig(
vits: SherpaOnnxOfflineTtsVitsModelConfig,
vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(),
matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(),
numThreads: Int = 1,
debug: Int = 0,
provider: String = "cpu"
Expand All @@ -749,7 +771,8 @@ func sherpaOnnxOfflineTtsModelConfig(
vits: vits,
num_threads: Int32(numThreads),
debug: Int32(debug),
provider: toCPointer(provider)
provider: toCPointer(provider),
matcha: matcha
)
}

Expand Down
42 changes: 42 additions & 0 deletions swift-api-examples/run-tts-matcha-en.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./hifigan_v2.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
fi

if [ ! -e ./tts ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./tts-matcha-en.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o tts-matcha-en

strip tts-matcha-en
else
echo "./tts-matcha-en exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-matcha-en
41 changes: 41 additions & 0 deletions swift-api-examples/run-tts-matcha-zh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./hifigan_v2.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
fi

if [ ! -e ./tts ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./tts-matcha-zh.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o tts-matcha-zh

strip tts-matcha-zh
else
echo "./tts-matcha-zh exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-matcha-zh
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ if [ ! -e ./tts ]; then
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./tts.swift ./SherpaOnnx.swift \
./tts-vits.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o tts
-o tts-vits

strip tts
strip tts-vits
else
echo "./tts exists - skip building"
echo "./tts-vits exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts
./tts-vits
65 changes: 65 additions & 0 deletions swift-api-examples/tts-matcha-en.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
class MyClass {
func playSamples(samples: [Float]) {
print("Play \(samples.count) samples")
}
}

func run() {
let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"
let vocoder = "./hifigan_v2.onnx"
let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"
let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"
let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
acousticModel: acousticModel,
vocoder: vocoder,
tokens: tokens,
dataDir: dataDir
)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)

let myClass = MyClass()

// We use Unretained here so myClass must be kept alive as the callback is invoked
//
// See also
// https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

let callback: TtsCallbackWithArg = { samples, n, arg in
let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
var savedSamples: [Float] = []
for index in 0..<n {
savedSamples.append(samples![Int(index)])
}

o.playSamples(samples: savedSamples)

// return 1 so that it continues generating
return 1
}

let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

let text =
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
let sid = 0
let speed: Float = 1.0

let audio = tts.generateWithCallbackWithArg(
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
let filename = "test-matcha-en.wav"
let ok = audio.save(filename: filename)
if ok == 1 {
print("\nSaved to:\(filename)")
} else {
print("Failed to save to \(filename)")
}
}

@main
struct App {
static func main() {
run()
}
}
68 changes: 68 additions & 0 deletions swift-api-examples/tts-matcha-zh.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
class MyClass {
func playSamples(samples: [Float]) {
print("Play \(samples.count) samples")
}
}

func run() {
let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"
let vocoder = "./hifigan_v2.onnx"
let lexicon = "./matcha-icefall-zh-baker/lexicon.txt"
let tokens = "./matcha-icefall-zh-baker/tokens.txt"
let dictDir = "./matcha-icefall-zh-baker/dict"
let ruleFsts =
"./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst"
let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
acousticModel: acousticModel,
vocoder: vocoder,
lexicon: lexicon,
tokens: tokens,
dictDir: dictDir
)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig, ruleFsts: ruleFsts)

let myClass = MyClass()

// We use Unretained here so myClass must be kept alive as the callback is invoked
//
// See also
// https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

let callback: TtsCallbackWithArg = { samples, n, arg in
let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
var savedSamples: [Float] = []
for index in 0..<n {
savedSamples.append(samples![Int(index)])
}

o.playSamples(samples: savedSamples)

// return 1 so that it continues generating
return 1
}

let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

let text = "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
let sid = 0
let speed: Float = 1.0

let audio = tts.generateWithCallbackWithArg(
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
let filename = "test-matcha-zh.wav"
let ok = audio.save(filename: filename)
if ok == 1 {
print("\nSaved to:\(filename)")
} else {
print("Failed to save to \(filename)")
}
}

@main
struct App {
static func main() {
run()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func run() {

let audio = tts.generateWithCallbackWithArg(
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
let filename = "test.wav"
let filename = "test-vits-en.wav"
let ok = audio.save(filename: filename)
if ok == 1 {
print("\nSaved to:\(filename)")
Expand Down

0 comments on commit 6f085ba

Please sign in to comment.