Skip to content

Commit

Permalink
Add HarmonyOS examples for MatchaTTS. (#1678)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jan 3, 2025
1 parent 0e299f3 commit bf3330c
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 15 deletions.
2 changes: 1 addition & 1 deletion harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* Use these variables when you tailor your ArkTS code. They must be of the const type.
*/
export const HAR_VERSION = '1.10.35';
export const HAR_VERSION = '1.10.37';
export const BUILD_MODE_NAME = 'debug';
export const DEBUG = true;
export const TARGET_NAME = 'default';
Expand Down
10 changes: 3 additions & 7 deletions harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
export { listRawfileDir, readWave, readWaveFromBinary, } from "libsherpa_onnx.so";

export { CircularBuffer,
SileroVadConfig,
SpeechSegment,
Vad,
VadConfig,
} from './src/main/ets/components/Vad';
export { CircularBuffer, SileroVadConfig, SpeechSegment, Vad, VadConfig, } from './src/main/ets/components/Vad';


export { Samples,
Expand Down Expand Up @@ -36,7 +31,8 @@ export { OnlineStream,
OnlineRecognizer,
} from './src/main/ets/components/StreamingAsr';

export { OfflineTtsVitsModelConfig,
export { OfflineTtsMatchaModelConfig,
OfflineTtsVitsModelConfig,
OfflineTtsModelConfig,
OfflineTtsConfig,
OfflineTts,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,20 @@ export class OfflineTtsVitsModelConfig {
public lengthScale: number = 1.0;
}

export class OfflineTtsMatchaModelConfig {
public acousticModel: string = '';
public vocoder: string = '';
public lexicon: string = '';
public tokens: string = '';
public dataDir: string = '';
public dictDir: String = '';
public noiseScale: number = 0.667;
public lengthScale: number = 1.0;
}

export class OfflineTtsModelConfig {
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig();
public numThreads: number = 1;
public debug: boolean = false;
public provider: string = 'cpu';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,16 @@ function initTts(context: Context): OfflineTts {
// for details

let modelDir = '';

// for VITS begin
let modelName = '';
// for VITS end

// for Matcha begin
let acousticModelName = '';
let vocoder = '';
// for Matcha end

let ruleFsts = '';
let ruleFars = '';
let lexicon = '';
Expand Down Expand Up @@ -134,15 +143,47 @@ function initTts(context: Context): OfflineTts {
// dictDir = 'dict';
// ruleFsts = `date.fst,phone.fst,number.fst`;

// Example 8
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// modelDir = 'matcha-icefall-zh-baker'
// acousticModelName = 'model-steps-3.onnx'
// vocoder = 'hifigan_v2.onnx'
// lexicon = 'lexicon.txt'
// dictDir = 'dict';
// ruleFsts = `date.fst,phone.fst,number.fst`;

// Example 9
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// modelDir = 'matcha-icefall-en_US-ljspeech'
// acousticModelName = 'model-steps-3.onnx'
// vocoder = 'hifigan_v2.onnx'
// dataDir = 'espeak-ng-data';

// ============================================================
// Please don't change the remaining part of this function
// ============================================================

if (modelName == '') {
if (modelName == '' && acousticModelName == '' && vocoder == '') {
throw new Error('You are supposed to select a model by changing the code before you run the app');
}

modelName = modelDir + '/' + modelName;
if (modelName != '' && acousticModelName != '') {
throw new Error('Please select either VITS or Matcha, not both');
}

if (acousticModelName != '' && vocoder == '') {
throw new Error('Please provider vocoder for matcha tts models');
}

if (modelName != '') {
modelName = modelDir + '/' + modelName;
}

if (acousticModelName != '') {
acousticModelName = modelDir + '/' + acousticModelName;
}

if (ruleFsts != '') {
let fsts = ruleFsts.split(',')
Expand Down Expand Up @@ -186,6 +227,14 @@ function initTts(context: Context): OfflineTts {
config.model.vits.tokens = tokens;
config.model.vits.dataDir = dataDir;
config.model.vits.dictDir = dictDir;

config.model.matcha.acousticModel = acousticModelName;
config.model.matcha.vocoder = vocoder;
config.model.matcha.lexicon = lexicon;
config.model.matcha.tokens = tokens;
config.model.matcha.dataDir = dataDir;
config.model.matcha.dictDir = dictDir;

config.model.numThreads = 2;
config.model.debug = true;
config.ruleFsts = ruleFsts;
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2098,7 +2098,7 @@ SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
}

#if SHERPA_ONNX_ENABLE_TTS == 1
SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {
if (!mgr) {
return SherpaOnnxCreateOfflineTts(config);
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/c-api/c-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1618,7 +1618,7 @@ SherpaOnnxCreateVoiceActivityDetectorOHOS(
const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
NativeResourceManager *mgr);

SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
SHERPA_ONNX_API const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr);

SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
Expand Down
64 changes: 64 additions & 0 deletions sherpa-onnx/csrc/jieba-lexicon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,23 @@

#include <fstream>
#include <regex> // NOLINT
#include <strstream>
#include <unordered_set>
#include <utility>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "cppjieba/Jieba.hpp"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/text-utils.h"

Expand Down Expand Up @@ -56,6 +67,39 @@ class JiebaLexicon::Impl {
}
}

template <typename Manager>
Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
const std::string &dict_dir, bool debug)
: debug_(debug) {
std::string dict = dict_dir + "/jieba.dict.utf8";
std::string hmm = dict_dir + "/hmm_model.utf8";
std::string user_dict = dict_dir + "/user.dict.utf8";
std::string idf = dict_dir + "/idf.utf8";
std::string stop_word = dict_dir + "/stop_words.utf8";

AssertFileExists(dict);
AssertFileExists(hmm);
AssertFileExists(user_dict);
AssertFileExists(idf);
AssertFileExists(stop_word);

jieba_ =
std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);

{
auto buf = ReadFile(mgr, tokens);
std::istrstream is(buf.data(), buf.size());

InitTokens(is);
}

{
auto buf = ReadFile(mgr, lexicon);
std::istrstream is(buf.data(), buf.size());
InitLexicon(is);
}
}

std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &text) const {
// see
// https://github.com/Plachtaa/VITS-fast-fine-tuning/blob/main/text/mandarin.py#L244
Expand Down Expand Up @@ -279,9 +323,29 @@ JiebaLexicon::JiebaLexicon(const std::string &lexicon,
const std::string &dict_dir, bool debug)
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, debug)) {}

template <typename Manager>
JiebaLexicon::JiebaLexicon(Manager *mgr, const std::string &lexicon,
const std::string &tokens,
const std::string &dict_dir, bool debug)
: impl_(std::make_unique<Impl>(mgr, lexicon, tokens, dict_dir, debug)) {}

std::vector<TokenIDs> JiebaLexicon::ConvertTextToTokenIds(
const std::string &text, const std::string & /*unused_voice = ""*/) const {
return impl_->ConvertTextToTokenIds(text);
}

#if __ANDROID_API__ >= 9
template JiebaLexicon::JiebaLexicon(AAssetManager *mgr,
const std::string &lexicon,
const std::string &tokens,
const std::string &dict_dir, bool debug);
#endif

#if __OHOS__
template JiebaLexicon::JiebaLexicon(NativeResourceManager *mgr,
const std::string &lexicon,
const std::string &tokens,
const std::string &dict_dir, bool debug);
#endif

} // namespace sherpa_onnx
6 changes: 6 additions & 0 deletions sherpa-onnx/csrc/jieba-lexicon.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,15 @@ namespace sherpa_onnx {
class JiebaLexicon : public OfflineTtsFrontend {
public:
~JiebaLexicon() override;

JiebaLexicon(const std::string &lexicon, const std::string &tokens,
const std::string &dict_dir, bool debug);

template <typename Manager>
JiebaLexicon(Manager *mgr, const std::string &lexicon,
const std::string &tokens, const std::string &dict_dir,
bool debug);

std::vector<TokenIDs> ConvertTextToTokenIds(
const std::string &text,
const std::string &unused_voice = "") const override;
Expand Down
5 changes: 2 additions & 3 deletions sherpa-onnx/csrc/offline-tts-matcha-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,13 +327,12 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
// from assets to disk
//
// for jieba
// we require that you copy tokens.txt, lexicon.txt and dict
// from assets to disk
// we require that you copy dict from assets to disk
const auto &meta_data = model_->GetMetaData();

if (meta_data.jieba && !meta_data.has_espeak) {
frontend_ = std::make_unique<JiebaLexicon>(
config_.model.matcha.lexicon, config_.model.matcha.tokens,
mgr, config_.model.matcha.lexicon, config_.model.matcha.tokens,
config_.model.matcha.dict_dir, config_.model.debug);
} else if (meta_data.has_espeak && !meta_data.jieba) {
frontend_ = std::make_unique<PiperPhonemizeLexicon>(
Expand Down

0 comments on commit bf3330c

Please sign in to comment.