forked from k2-fsa/icefall
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add MatchaTTS for the Chinese dataset Baker (k2-fsa#1849)
- Loading branch information
1 parent
efd6a86
commit f38861e
Showing
34 changed files
with
3,128 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
apt-get update | ||
apt-get install -y sox | ||
|
||
python3 -m pip install numba conformer==0.3.2 diffusers librosa | ||
python3 -m pip install jieba | ||
|
||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
cd egs/baker_zh/TTS | ||
|
||
sed -i.bak s/600/8/g ./prepare.sh | ||
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh | ||
sed -i.bak s/500/5/g ./prepare.sh | ||
git diff | ||
|
||
function prepare_data() { | ||
# We have created a subset of the data for testing | ||
# | ||
mkdir -p download | ||
pushd download | ||
wget -q https://huggingface.co/csukuangfj/tmp-files/resolve/main/BZNSYP-samples.tar.bz2 | ||
tar xvf BZNSYP-samples.tar.bz2 | ||
mv BZNSYP-samples BZNSYP | ||
rm BZNSYP-samples.tar.bz2 | ||
popd | ||
|
||
./prepare.sh | ||
tree . | ||
} | ||
|
||
function train() { | ||
pushd ./matcha | ||
sed -i.bak s/1500/3/g ./train.py | ||
git diff . | ||
popd | ||
|
||
./matcha/train.py \ | ||
--exp-dir matcha/exp \ | ||
--num-epochs 1 \ | ||
--save-every-n 1 \ | ||
--num-buckets 2 \ | ||
--tokens data/tokens.txt \ | ||
--max-duration 20 | ||
|
||
ls -lh matcha/exp | ||
} | ||
|
||
function infer() { | ||
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2 | ||
|
||
./matcha/infer.py \ | ||
--num-buckets 2 \ | ||
--epoch 1 \ | ||
--exp-dir ./matcha/exp \ | ||
--tokens data/tokens.txt \ | ||
--cmvn ./data/fbank/cmvn.json \ | ||
--vocoder ./generator_v2 \ | ||
--input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \ | ||
--output-wav ./generated.wav | ||
|
||
ls -lh *.wav | ||
soxi ./generated.wav | ||
rm -v ./generated.wav | ||
rm -v generator_v2 | ||
} | ||
|
||
function export_onnx() { | ||
pushd matcha/exp | ||
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/epoch-2000.pt | ||
popd | ||
|
||
pushd data/fbank | ||
rm -v *.json | ||
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/cmvn.json | ||
popd | ||
|
||
./matcha/export_onnx.py \ | ||
--exp-dir ./matcha/exp \ | ||
--epoch 2000 \ | ||
--tokens ./data/tokens.txt \ | ||
--cmvn ./data/fbank/cmvn.json | ||
|
||
ls -lh *.onnx | ||
|
||
if false; then | ||
# The CI machine does not have enough memory to run it | ||
# | ||
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1 | ||
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2 | ||
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3 | ||
python3 ./matcha/export_onnx_hifigan.py | ||
else | ||
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx | ||
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx | ||
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx | ||
fi | ||
|
||
ls -lh *.onnx | ||
|
||
python3 ./matcha/generate_lexicon.py | ||
|
||
for v in v1 v2 v3; do | ||
python3 ./matcha/onnx_pretrained.py \ | ||
--acoustic-model ./model-steps-6.onnx \ | ||
--vocoder ./hifigan_$v.onnx \ | ||
--tokens ./data/tokens.txt \ | ||
--lexicon ./lexicon.txt \ | ||
--input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \ | ||
--output-wav /icefall/generated-matcha-tts-steps-6-$v.wav | ||
done | ||
|
||
ls -lh /icefall/*.wav | ||
soxi /icefall/generated-matcha-tts-steps-6-*.wav | ||
cp ./model-steps-*.onnx /icefall | ||
|
||
d=matcha-icefall-zh-baker | ||
mkdir $d | ||
cp -v data/tokens.txt $d | ||
cp -v lexicon.txt $d | ||
cp model-steps-3.onnx $d | ||
pushd $d | ||
curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2 | ||
tar xvf dict.tar.bz2 | ||
rm dict.tar.bz2 | ||
|
||
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst | ||
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst | ||
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst | ||
|
||
cat >README.md <<EOF | ||
# Introduction | ||
This model is trained using the dataset from | ||
https://en.data-baker.com/datasets/freeDatasets/ | ||
The dataset contains 10000 Chinese sentences of a native Chinese female speaker, | ||
which is about 12 hours. | ||
**Note**: The dataset is for non-commercial use only. | ||
You can find the training code at | ||
https://github.com/k2-fsa/icefall/tree/master/egs/baker_zh/TTS | ||
EOF | ||
|
||
ls -lh | ||
popd | ||
tar cvjf $d.tar.bz2 $d | ||
mv $d.tar.bz2 /icefall | ||
mv $d /icefall | ||
} | ||
|
||
prepare_data | ||
train | ||
infer | ||
export_onnx | ||
|
||
rm -rfv generator_v* matcha/exp | ||
git checkout . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
name: baker_zh | ||
|
||
on: | ||
push: | ||
branches: | ||
- master | ||
- baker-matcha-2 | ||
|
||
pull_request: | ||
branches: | ||
- master | ||
|
||
workflow_dispatch: | ||
|
||
concurrency: | ||
group: baker-zh-${{ github.ref }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
generate_build_matrix: | ||
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' | ||
# see https://github.com/pytorch/pytorch/pull/50633 | ||
runs-on: ubuntu-latest | ||
outputs: | ||
matrix: ${{ steps.set-matrix.outputs.matrix }} | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 0 | ||
- name: Generating build matrix | ||
id: set-matrix | ||
run: | | ||
# outputting for debugging purposes | ||
python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3" | ||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3") | ||
echo "::set-output name=matrix::${MATRIX}" | ||
baker_zh: | ||
needs: generate_build_matrix | ||
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} | ||
runs-on: ubuntu-latest | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Free space | ||
shell: bash | ||
run: | | ||
ls -lh | ||
df -h | ||
rm -rf /opt/hostedtoolcache | ||
df -h | ||
echo "pwd: $PWD" | ||
echo "github.workspace ${{ github.workspace }}" | ||
- name: Run tests | ||
uses: addnab/docker-run-action@v3 | ||
with: | ||
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} | ||
options: | | ||
--volume ${{ github.workspace }}/:/icefall | ||
shell: bash | ||
run: | | ||
export PYTHONPATH=/icefall:$PYTHONPATH | ||
cd /icefall | ||
pip install onnx==1.17.0 | ||
pip list | ||
git config --global --add safe.directory /icefall | ||
.github/scripts/baker_zh/TTS/run-matcha.sh | ||
- name: display files | ||
shell: bash | ||
run: | | ||
ls -lh | ||
- uses: actions/upload-artifact@v4 | ||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' | ||
with: | ||
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }} | ||
path: ./*.wav | ||
|
||
- uses: actions/upload-artifact@v4 | ||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' | ||
with: | ||
name: step-2 | ||
path: ./model-steps-2.onnx | ||
|
||
- uses: actions/upload-artifact@v4 | ||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' | ||
with: | ||
name: step-3 | ||
path: ./model-steps-3.onnx | ||
|
||
- uses: actions/upload-artifact@v4 | ||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' | ||
with: | ||
name: step-4 | ||
path: ./model-steps-4.onnx | ||
|
||
- uses: actions/upload-artifact@v4 | ||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' | ||
with: | ||
name: step-5 | ||
path: ./model-steps-5.onnx | ||
|
||
- uses: actions/upload-artifact@v4 | ||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' | ||
with: | ||
name: step-6 | ||
path: ./model-steps-6.onnx | ||
|
||
- name: Upload models to huggingface | ||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push' | ||
shell: bash | ||
env: | ||
HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
run: | | ||
d=matcha-icefall-zh-baker | ||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/$d hf | ||
cp -av $d/* hf/ | ||
pushd hf | ||
git add . | ||
git config --global user.name "csukuangfj" | ||
git config --global user.email "[email protected]" | ||
git config --global lfs.allowincompletepush true | ||
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true | ||
popd | ||
- name: Release exported onnx models | ||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push' | ||
uses: svenstaro/upload-release-action@v2 | ||
with: | ||
file_glob: true | ||
overwrite: true | ||
file: matcha-icefall-*.tar.bz2 | ||
repo_name: k2-fsa/sherpa-onnx | ||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | ||
tag: tts-models |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
path.sh | ||
*.onnx | ||
*.wav | ||
generator_v1 | ||
generator_v2 | ||
generator_v3 |
Oops, something went wrong.