-
Notifications
You must be signed in to change notification settings - Fork 477
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Swift API for MatchaTTS models. (#1684)
- Loading branch information
1 parent
1fe5fe4
commit 6f085ba
Showing
12 changed files
with
271 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
if [ ! -d ../build-swift-macos ]; then | ||
echo "Please run ../build-swift-macos.sh first!" | ||
exit 1 | ||
fi | ||
|
||
# please visit | ||
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | ||
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | ||
# to download more models | ||
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | ||
tar xf matcha-icefall-en_US-ljspeech.tar.bz2 | ||
rm matcha-icefall-en_US-ljspeech.tar.bz2 | ||
fi | ||
|
||
if [ ! -f ./hifigan_v2.onnx ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | ||
fi | ||
|
||
if [ ! -e ./tts ]; then | ||
# Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
swiftc \ | ||
-lc++ \ | ||
-I ../build-swift-macos/install/include \ | ||
-import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
./tts-matcha-en.swift ./SherpaOnnx.swift \ | ||
-L ../build-swift-macos/install/lib/ \ | ||
-l sherpa-onnx \ | ||
-l onnxruntime \ | ||
-o tts-matcha-en | ||
|
||
strip tts-matcha-en | ||
else | ||
echo "./tts-matcha-en exists - skip building" | ||
fi | ||
|
||
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
./tts-matcha-en |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
if [ ! -d ../build-swift-macos ]; then | ||
echo "Please run ../build-swift-macos.sh first!" | ||
exit 1 | ||
fi | ||
|
||
# please visit | ||
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker | ||
# to download more models | ||
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 | ||
tar xvf matcha-icefall-zh-baker.tar.bz2 | ||
rm matcha-icefall-zh-baker.tar.bz2 | ||
fi | ||
|
||
if [ ! -f ./hifigan_v2.onnx ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | ||
fi | ||
|
||
if [ ! -e ./tts ]; then | ||
# Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
swiftc \ | ||
-lc++ \ | ||
-I ../build-swift-macos/install/include \ | ||
-import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
./tts-matcha-zh.swift ./SherpaOnnx.swift \ | ||
-L ../build-swift-macos/install/lib/ \ | ||
-l sherpa-onnx \ | ||
-l onnxruntime \ | ||
-o tts-matcha-zh | ||
|
||
strip tts-matcha-zh | ||
else | ||
echo "./tts-matcha-zh exists - skip building" | ||
fi | ||
|
||
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
./tts-matcha-zh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
class MyClass { | ||
func playSamples(samples: [Float]) { | ||
print("Play \(samples.count) samples") | ||
} | ||
} | ||
|
||
func run() { | ||
let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx" | ||
let vocoder = "./hifigan_v2.onnx" | ||
let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt" | ||
let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data" | ||
let matcha = sherpaOnnxOfflineTtsMatchaModelConfig( | ||
acousticModel: acousticModel, | ||
vocoder: vocoder, | ||
tokens: tokens, | ||
dataDir: dataDir | ||
) | ||
let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0) | ||
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig) | ||
|
||
let myClass = MyClass() | ||
|
||
// We use Unretained here so myClass must be kept alive as the callback is invoked | ||
// | ||
// See also | ||
// https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6 | ||
let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque() | ||
|
||
let callback: TtsCallbackWithArg = { samples, n, arg in | ||
let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue() | ||
var savedSamples: [Float] = [] | ||
for index in 0..<n { | ||
savedSamples.append(samples![Int(index)]) | ||
} | ||
|
||
o.playSamples(samples: savedSamples) | ||
|
||
// return 1 so that it continues generating | ||
return 1 | ||
} | ||
|
||
let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig) | ||
|
||
let text = | ||
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." | ||
let sid = 0 | ||
let speed: Float = 1.0 | ||
|
||
let audio = tts.generateWithCallbackWithArg( | ||
text: text, callback: callback, arg: arg, sid: sid, speed: speed) | ||
let filename = "test-matcha-en.wav" | ||
let ok = audio.save(filename: filename) | ||
if ok == 1 { | ||
print("\nSaved to:\(filename)") | ||
} else { | ||
print("Failed to save to \(filename)") | ||
} | ||
} | ||
|
||
@main | ||
struct App { | ||
static func main() { | ||
run() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
class MyClass { | ||
func playSamples(samples: [Float]) { | ||
print("Play \(samples.count) samples") | ||
} | ||
} | ||
|
||
func run() { | ||
let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx" | ||
let vocoder = "./hifigan_v2.onnx" | ||
let lexicon = "./matcha-icefall-zh-baker/lexicon.txt" | ||
let tokens = "./matcha-icefall-zh-baker/tokens.txt" | ||
let dictDir = "./matcha-icefall-zh-baker/dict" | ||
let ruleFsts = | ||
"./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst" | ||
let matcha = sherpaOnnxOfflineTtsMatchaModelConfig( | ||
acousticModel: acousticModel, | ||
vocoder: vocoder, | ||
lexicon: lexicon, | ||
tokens: tokens, | ||
dictDir: dictDir | ||
) | ||
let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0) | ||
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig, ruleFsts: ruleFsts) | ||
|
||
let myClass = MyClass() | ||
|
||
// We use Unretained here so myClass must be kept alive as the callback is invoked | ||
// | ||
// See also | ||
// https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6 | ||
let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque() | ||
|
||
let callback: TtsCallbackWithArg = { samples, n, arg in | ||
let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue() | ||
var savedSamples: [Float] = [] | ||
for index in 0..<n { | ||
savedSamples.append(samples![Int(index)]) | ||
} | ||
|
||
o.playSamples(samples: savedSamples) | ||
|
||
// return 1 so that it continues generating | ||
return 1 | ||
} | ||
|
||
let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig) | ||
|
||
let text = "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" | ||
let sid = 0 | ||
let speed: Float = 1.0 | ||
|
||
let audio = tts.generateWithCallbackWithArg( | ||
text: text, callback: callback, arg: arg, sid: sid, speed: speed) | ||
let filename = "test-matcha-zh.wav" | ||
let ok = audio.save(filename: filename) | ||
if ok == 1 { | ||
print("\nSaved to:\(filename)") | ||
} else { | ||
print("Failed to save to \(filename)") | ||
} | ||
} | ||
|
||
@main | ||
struct App { | ||
static func main() { | ||
run() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters