From 1544a577e0b3b2a0cf49c501f1b62c73760441c1 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 29 Oct 2023 18:30:43 +0800 Subject: [PATCH] Upload TTS APKs to huggingface (#400) --- .github/workflows/apk-tts.yaml | 103 ++++++++++++++ .github/workflows/apk.yaml | 1 - .../com/k2fsa/sherpa/onnx/MainActivity.kt | 24 +++- .../main/java/com/k2fsa/sherpa/onnx/Tts.kt | 54 ++------ scripts/apk/.gitignore | 1 + scripts/apk/README.md | 3 + .../apk/build-apk-tts.sh.in | 65 +++------ scripts/apk/generate-tts-apk-script.py | 127 ++++++++++++++++++ 8 files changed, 286 insertions(+), 92 deletions(-) create mode 100644 .github/workflows/apk-tts.yaml create mode 100644 scripts/apk/.gitignore create mode 100644 scripts/apk/README.md rename build-apk-tts.sh => scripts/apk/build-apk-tts.sh.in (51%) mode change 100755 => 100644 create mode 100755 scripts/apk/generate-tts-apk-script.py diff --git a/.github/workflows/apk-tts.yaml b/.github/workflows/apk-tts.yaml new file mode 100644 index 000000000..68ca8d76d --- /dev/null +++ b/.github/workflows/apk-tts.yaml @@ -0,0 +1,103 @@ +name: apk-tts + +on: + push: + branches: + - apk-tts + - apk + tags: + - '*' + + workflow_dispatch: + +concurrency: + group: apk-tts-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + apk_tts: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + runs-on: ${{ matrix.os }} + name: apk for tts ${{ matrix.index }}/${{ matrix.total }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["12"] + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 + + - name: Generate build script + shell: bash + run: | + cd scripts/apk + + total=${{ matrix.total }} + index=${{ matrix.index }} + + ./generate-tts-apk-script.py --total $total --index $index + + chmod +x build-apk-tts.sh + mv -v ./build-apk-tts.sh ../.. + + - name: build APK + shell: bash + run: | + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-apk-tts.sh + + - name: Display APK + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + # - uses: actions/upload-artifact@v3 + # with: + # name: tts-apk + # path: ./apks/*.apk + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + cd huggingface + git pull + + mkdir -p tts + cp -v ../apks/*.apk ./tts/ + git status + git lfs track "*.apk" + git add . + git commit -m "add more apks" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main diff --git a/.github/workflows/apk.yaml b/.github/workflows/apk.yaml index 4f01bd626..a9d01a107 100644 --- a/.github/workflows/apk.yaml +++ b/.github/workflows/apk.yaml @@ -38,7 +38,6 @@ jobs: shell: bash run: | export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME - ./build-apk-tts.sh ./build-apk-vad.sh ./build-apk-two-pass.sh ./build-apk.sh diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index 5a5b4e077..e88500214 100644 --- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -84,6 +84,8 @@ class MainActivity : AppCompatActivity() { val ok = audio.samples.size > 0 && audio.save(filename) if (ok) { play.isEnabled = true + // Play automatically after generation + onClickPlay() } } @@ -97,10 +99,24 @@ class MainActivity : AppCompatActivity() { } fun initTts() { - // 0 - vits-vctk (multi-speaker, English) - // 1 - vits-zh-aishell3 (multi-speaker, Chinese) - val type = 0 - val config = getOfflineTtsConfig(type = type, debug = true)!! + var modelDir :String? + var modelName :String? + + // The purpose of such a design is to make the CI test easier + // Please see + // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py + modelDir = null + modelName = null + + // Example 1: + // modelDir = "vits-vctk" + // modelName = "vits-vctk.onnx" + + // Example 2: + // modelDir = "vits-piper-en_US-lessac-medium" + // modelName = "en_US-lessac-medium.onnx" + + val config = getOfflineTtsConfig(modelDir = modelDir!!, modelName = modelName!!)!! tts = OfflineTts(assetManager = application.assets, config = config) } } diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt index 748e7b73b..eed9a5934 100644 --- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt +++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt @@ -116,45 +116,17 @@ class OfflineTts( // please refer to // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html // to download models -// -// You can change the type as you wish -fun getOfflineTtsConfig(type: Int, debug: Boolean = false): OfflineTtsConfig? { - when (type) { - 0 -> { - val modelDir = "vits-vctk" - return OfflineTtsConfig( - model = OfflineTtsModelConfig( - vits = OfflineTtsVitsModelConfig( - model = "$modelDir/vits-vctk.onnx", - lexicon = "$modelDir/lexicon.txt", - tokens = "$modelDir/tokens.txt" - ), - numThreads = 2, - debug = debug, - provider = "cpu", - ) - ) - } - - 1 -> { - val modelDir = "vits-zh-aishell3" - return OfflineTtsConfig( - model = OfflineTtsModelConfig( - vits = OfflineTtsVitsModelConfig( - model = "$modelDir/vits-aishell3.onnx", - lexicon = "$modelDir/lexicon.txt", - tokens = "$modelDir/tokens.txt" - ), - numThreads = 2, - debug = debug, - provider = "cpu", - ) - ) - } - } - - println("Unsupported type $type") - - return null - +fun getOfflineTtsConfig(modelDir: String, modelName: String): OfflineTtsConfig? { + return OfflineTtsConfig( + model = OfflineTtsModelConfig( + vits = OfflineTtsVitsModelConfig( + model = "$modelDir/$modelName", + lexicon = "$modelDir/lexicon.txt", + tokens = "$modelDir/tokens.txt" + ), + numThreads = 2, + debug = false, + provider = "cpu", + ) + ) } diff --git a/scripts/apk/.gitignore b/scripts/apk/.gitignore new file mode 100644 index 000000000..3b7d2994a --- /dev/null +++ b/scripts/apk/.gitignore @@ -0,0 +1 @@ +build-apk-tts.sh diff --git a/scripts/apk/README.md b/scripts/apk/README.md new file mode 100644 index 000000000..9e21ef340 --- /dev/null +++ b/scripts/apk/README.md @@ -0,0 +1,3 @@ +# Introduction + +This folder contains scripts for building Android APKs. diff --git a/build-apk-tts.sh b/scripts/apk/build-apk-tts.sh.in old mode 100755 new mode 100644 similarity index 51% rename from build-apk-tts.sh rename to scripts/apk/build-apk-tts.sh.in index 394be163c..05bfa2fd7 --- a/build-apk-tts.sh +++ b/scripts/apk/build-apk-tts.sh.in @@ -1,4 +1,6 @@ #!/usr/bin/env bash +# +# Auto generated! Please DO NOT EDIT! # Please set the environment variable ANDROID_NDK # before running this script @@ -27,61 +29,31 @@ log "====================x86-64====================" log "====================x86====================" ./build-android-x86.sh - mkdir -p apks -# Download the model +{% for tts_model in tts_model_list %} pushd ./android/SherpaOnnxTts/app/src/main/assets/ -mkdir vits-vctk - -cd vits-vctk -wget -qq https://huggingface.co/csukuangfj/vits-vctk/resolve/main/vits-vctk.onnx -wget -qq https://huggingface.co/csukuangfj/vits-vctk/resolve/main/lexicon.txt -wget -qq https://huggingface.co/csukuangfj/vits-vctk/resolve/main/tokens.txt -popd - -for arch in arm64-v8a armeabi-v7a x86_64 x86; do - log "------------------------------------------------------------" - log "build tts apk for $arch" - log "------------------------------------------------------------" - src_arch=$arch - if [ $arch == "armeabi-v7a" ]; then - src_arch=armv7-eabi - elif [ $arch == "x86_64" ]; then - src_arch=x86-64 - fi +model_dir={{ tts_model.model_dir }} +model_name={{ tts_model.model_name }} +lang={{ tts_model.lang }} - ls -lh ./build-android-$src_arch/install/lib/*.so +mkdir $model_dir +cd $model_dir +wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/$model_name +wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/lexicon.txt +wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/tokens.txt +wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/MODEL_CARD 2>/dev/null || true - cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxTts/app/src/main/jniLibs/$arch/ - - pushd ./android/SherpaOnnxTts - ./gradlew build - popd - - mv android/SherpaOnnxTts/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-en-tts-multi-speaker-vctk.apk - ls -lh apks - rm -v ./android/SherpaOnnxTts/app/src/main/jniLibs/$arch/*.so -done - -rm -rf ./android/SherpaOnnxTts/app/src/main/assets/vits-vctk +popd +# Now we are at the project root directory git checkout . pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx -sed -i.bak s/"type = 0"/"type = 1"/ ./MainActivity.kt +sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt +sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt git diff popd -pushd ./android/SherpaOnnxTts/app/src/main/assets/ -mkdir vits-zh-aishell3 -cd vits-zh-aishell3 - -wget -qq https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/vits-aishell3.onnx -wget -qq https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/lexicon.txt -wget -qq https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/tokens.txt - -popd - for arch in arm64-v8a armeabi-v7a x86_64 x86; do log "------------------------------------------------------------" log "build tts apk for $arch" @@ -101,12 +73,13 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do ./gradlew build popd - mv android/SherpaOnnxTts/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-zh-tts-multi-speaker-aishell3.apk + mv android/SherpaOnnxTts/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-$lang-tts-$model_dir.apk ls -lh apks rm -v ./android/SherpaOnnxTts/app/src/main/jniLibs/$arch/*.so done -rm -rf ./android/SherpaOnnxTts/app/src/main/assets/vits-vctk +rm -rf ./android/SherpaOnnxTts/app/src/main/assets/$model_dir +{% endfor %} git checkout . diff --git a/scripts/apk/generate-tts-apk-script.py b/scripts/apk/generate-tts-apk-script.py new file mode 100755 index 000000000..073f16187 --- /dev/null +++ b/scripts/apk/generate-tts-apk-script.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 + +from dataclasses import dataclass + +import jinja2 +from typing import List +import argparse + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--total", + type=int, + default=1, + help="Number of runners", + ) + parser.add_argument( + "--index", + type=int, + default=0, + help="Index of the current runner", + ) + return parser.parse_args() + + +@dataclass +class TtsModel: + model_dir: str + model_name: str + lang: str # en, zh, fr, de, etc. + + +def get_all_models() -> List[TtsModel]: + return [ + TtsModel( + model_dir="vits-zh-aishell3", model_name="vits-aishell3.onnx", lang="zh" + ), + # English (US) + # fmt: off + TtsModel(model_dir="vits-vctk", model_name="vits-vctk.onnx", lang="en"), + TtsModel(model_dir="vits-ljs", model_name="vits-ljs.onnx", lang="en"), + TtsModel(model_dir="vits-piper-en_US-amy-low", model_name="en_US-amy-low.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-amy-medium", model_name="en_US-amy-medium.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-arctic-medium", model_name="en_US-arctic-medium.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-danny-low", model_name="en_US-danny-low.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-hfc_male-medium", model_name="en_US-hfc_male-medium.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-joe-medium", model_name="en_US-joe-medium.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-kathleen-low", model_name="en_US-kathleen-low.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-kusal-medium", model_name="en_US-kusal-medium.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-l2arctic-medium", model_name="en_US-l2arctic-medium.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-lessac-low", model_name="en_US-lessac-low.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-lessac-medium", model_name="en_US-lessac-medium.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-lessac-high", model_name="en_US-lessac-high.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-libritts-high", model_name="en_US-libritts-high.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-libritts_r-medium", model_name="en_US-libritts_r-medium.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-ryan-low", model_name="en_US-ryan-low.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-ryan-medium", model_name="en_US-ryan-medium.onnx", lang="en",), + TtsModel(model_dir="vits-piper-en_US-ryan-high", model_name="en_US-ryan-high.onnx", lang="en",), + # English (GB) + TtsModel(model_dir="vits-piper-en_GB-alan-low", model_name="en_GB-alan-low.onnx",lang="en",), + TtsModel(model_dir="vits-piper-en_GB-alan-medium", model_name="en_GB-alan-medium.onnx",lang="en",), + TtsModel(model_dir="vits-piper-en_GB-alba-medium", model_name="en_GB-alba-medium.onnx",lang="en",), + TtsModel(model_dir="vits-piper-en_GB-jenny_dioco-medium", model_name="en_GB-jenny_dioco-medium.onnx",lang="en",), + TtsModel(model_dir="vits-piper-en_GB-northern_english_male-medium", model_name="en_GB-northern_english_male-medium.onnx",lang="en",), + TtsModel(model_dir="vits-piper-en_GB-semaine-medium", model_name="en_GB-semaine-medium.onnx",lang="en",), + TtsModel(model_dir="vits-piper-en_GB-southern_english_female-low", model_name="en_GB-southern_english_female-low.onnx",lang="en",), + TtsModel(model_dir="vits-piper-en_GB-vctk-medium", model_name="en_GB-vctk-medium.onnx",lang="en",), + # German (DE) + TtsModel(model_dir="vits-piper-de_DE-eva_k-x_low", model_name="de_DE-eva_k-x_low.onnx",lang="de",), + TtsModel(model_dir="vits-piper-de_DE-karlsson-low", model_name="de_DE-karlsson-low.onnx",lang="de",), + TtsModel(model_dir="vits-piper-de_DE-kerstin-low", model_name="de_DE-kerstin-low.onnx",lang="de",), + TtsModel(model_dir="vits-piper-de_DE-pavoque-low", model_name="de_DE-pavoque-low.onnx",lang="de",), + TtsModel(model_dir="vits-piper-de_DE-ramona-low", model_name="de_DE-ramona-low.onnx",lang="de",), + TtsModel(model_dir="vits-piper-de_DE-thorsten-low", model_name="de_DE-thorsten-low.onnx",lang="de",), + TtsModel(model_dir="vits-piper-de_DE-thorsten-medium", model_name="de_DE-thorsten-medium.onnx",lang="de",), + TtsModel(model_dir="vits-piper-de_DE-thorsten-high", model_name="de_DE-thorsten-high.onnx",lang="de",), + TtsModel(model_dir="vits-piper-de_DE-thorsten_emotional-medium", model_name="de_DE-thorsten_emotional-medium.onnx",lang="de",), + # French (FR) + TtsModel(model_dir="vits-piper-fr_FR-upmc-medium", model_name="fr_FR-upmc-medium.onnx",lang="fr",), + TtsModel(model_dir="vits-piper-fr_FR-siwis-low", model_name="fr_FR-siwis-low.onnx",lang="fr",), + TtsModel(model_dir="vits-piper-fr_FR-siwis-medium", model_name="fr_FR-siwis-medium.onnx",lang="fr",), + + # Spanish (ES) + TtsModel(model_dir="vits-piper-es_ES-carlfm-x_low", model_name="es_ES-carlfm-x_low.onnx",lang="es",), + TtsModel(model_dir="vits-piper-es_ES-davefx-medium", model_name="es_ES-davefx-medium.onnx",lang="es",), + TtsModel(model_dir="vits-piper-es_ES-mls_10246-low", model_name="es_ES-mls_10246-low.onnx",lang="es",), + TtsModel(model_dir="vits-piper-es_ES-mls_9972-low", model_name="es_ES-mls_9972-low.onnx",lang="es",), + TtsModel(model_dir="vits-piper-es_ES-sharvard-medium", model_name="es_ES-sharvard-medium.onnx",lang="es",), + + # Spanish (MX) + TtsModel(model_dir="vits-piper-es_MX-ald-medium", model_name="es_MX-ald-medium.onnx",lang="es",), + # fmt: on + ] + + +def main(): + args = get_args() + index = args.index + total = args.total + assert 0 <= index < total, (index, total) + environment = jinja2.Environment() + with open("./build-apk-tts.sh.in") as f: + s = f.read() + template = environment.from_string(s) + d = dict() + all_model_list = get_all_models() + num_models = len(all_model_list) + + num_per_runner = num_models // total + if num_per_runner <= 0: + raise ValueError(f"num_models: {num_models}, num_runners: {total}") + + start = index * num_per_runner + end = start + num_per_runner + if index == args.total - 1: + end = num_models + + print(f"{index}/{total}: {start}-{end}/{num_models}") + d["tts_model_list"] = all_model_list[start:end] + s = template.render(**d) + with open("./build-apk-tts.sh", "w") as f: + print(s, file=f) + + +if __name__ == "__main__": + main()