From dc55d6e076c83604f87aeddf163d3ce435220b77 Mon Sep 17 00:00:00 2001 From: Yifan Yang <64255737+yfyeung@users.noreply.github.com> Date: Tue, 31 Dec 2024 00:27:08 +0800 Subject: [PATCH] Small fix --- egs/wenetspeech4tts/TTS/prepare.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/egs/wenetspeech4tts/TTS/prepare.sh b/egs/wenetspeech4tts/TTS/prepare.sh index 54e140dbb1..3d7ffadb1b 100755 --- a/egs/wenetspeech4tts/TTS/prepare.sh +++ b/egs/wenetspeech4tts/TTS/prepare.sh @@ -10,9 +10,9 @@ stop_stage=4 dl_dir=$PWD/download -dataset_parts="Premium" # Basic for all 10k hours data, Premium for about 10% of the data +dataset_parts="Premium" # Basic for all 7226 hours data, Premium for 945 hours subset. -text_extractor="pypinyin_initials_finals" # default is espeak for English +text_extractor="pypinyin_initials_finals" # default is espeak for English audio_extractor="Encodec" # or Fbank audio_feats_dir=data/tokenized @@ -63,7 +63,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then --audio-extractor ${audio_extractor} \ --batch-duration 2500 --prefix "wenetspeech4tts" \ --src-dir "data/manifests" \ - --split 100 \ + --split 100 \ --output-dir "${audio_feats_dir}/wenetspeech4tts_${dataset_parts}_split_100" cp ${audio_feats_dir}/wenetspeech4tts_${dataset_parts}_split_100/unique_text_tokens.k2symbols ${audio_feats_dir} fi