Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add HarmonyOS examples for MatchaTTS. #1678

Merged
merged 1 commit into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* Use these variables when you tailor your ArkTS code. They must be of the const type.
*/
export const HAR_VERSION = '1.10.35';
export const HAR_VERSION = '1.10.37';
export const BUILD_MODE_NAME = 'debug';
export const DEBUG = true;
export const TARGET_NAME = 'default';
Expand Down
10 changes: 3 additions & 7 deletions harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
export { listRawfileDir, readWave, readWaveFromBinary, } from "libsherpa_onnx.so";

export { CircularBuffer,
SileroVadConfig,
SpeechSegment,
Vad,
VadConfig,
} from './src/main/ets/components/Vad';
export { CircularBuffer, SileroVadConfig, SpeechSegment, Vad, VadConfig, } from './src/main/ets/components/Vad';


export { Samples,
Expand Down Expand Up @@ -36,7 +31,8 @@ export { OnlineStream,
OnlineRecognizer,
} from './src/main/ets/components/StreamingAsr';

export { OfflineTtsVitsModelConfig,
export { OfflineTtsMatchaModelConfig,
OfflineTtsVitsModelConfig,
OfflineTtsModelConfig,
OfflineTtsConfig,
OfflineTts,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,20 @@ export class OfflineTtsVitsModelConfig {
public lengthScale: number = 1.0;
}

export class OfflineTtsMatchaModelConfig {
public acousticModel: string = '';
public vocoder: string = '';
public lexicon: string = '';
public tokens: string = '';
public dataDir: string = '';
public dictDir: String = '';
public noiseScale: number = 0.667;
public lengthScale: number = 1.0;
}

export class OfflineTtsModelConfig {
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig();
public numThreads: number = 1;
public debug: boolean = false;
public provider: string = 'cpu';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,16 @@ function initTts(context: Context): OfflineTts {
// for details

let modelDir = '';

// for VITS begin
let modelName = '';
// for VITS end

// for Matcha begin
let acousticModelName = '';
let vocoder = '';
// for Matcha end

let ruleFsts = '';
let ruleFars = '';
let lexicon = '';
Expand Down Expand Up @@ -134,15 +143,47 @@ function initTts(context: Context): OfflineTts {
// dictDir = 'dict';
// ruleFsts = `date.fst,phone.fst,number.fst`;

// Example 8
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// modelDir = 'matcha-icefall-zh-baker'
// acousticModelName = 'model-steps-3.onnx'
// vocoder = 'hifigan_v2.onnx'
// lexicon = 'lexicon.txt'
// dictDir = 'dict';
// ruleFsts = `date.fst,phone.fst,number.fst`;

// Example 9
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// modelDir = 'matcha-icefall-en_US-ljspeech'
// acousticModelName = 'model-steps-3.onnx'
// vocoder = 'hifigan_v2.onnx'
// dataDir = 'espeak-ng-data';

// ============================================================
// Please don't change the remaining part of this function
// ============================================================

if (modelName == '') {
if (modelName == '' && acousticModelName == '' && vocoder == '') {
throw new Error('You are supposed to select a model by changing the code before you run the app');
}

modelName = modelDir + '/' + modelName;
if (modelName != '' && acousticModelName != '') {
throw new Error('Please select either VITS or Matcha, not both');
}

if (acousticModelName != '' && vocoder == '') {
throw new Error('Please provider vocoder for matcha tts models');
}

if (modelName != '') {
modelName = modelDir + '/' + modelName;
}

if (acousticModelName != '') {
acousticModelName = modelDir + '/' + acousticModelName;
}

if (ruleFsts != '') {
let fsts = ruleFsts.split(',')
Expand Down Expand Up @@ -186,6 +227,14 @@ function initTts(context: Context): OfflineTts {
config.model.vits.tokens = tokens;
config.model.vits.dataDir = dataDir;
config.model.vits.dictDir = dictDir;

config.model.matcha.acousticModel = acousticModelName;
config.model.matcha.vocoder = vocoder;
config.model.matcha.lexicon = lexicon;
config.model.matcha.tokens = tokens;
config.model.matcha.dataDir = dataDir;
config.model.matcha.dictDir = dictDir;

config.model.numThreads = 2;
config.model.debug = true;
config.ruleFsts = ruleFsts;
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2098,7 +2098,7 @@ SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
}

#if SHERPA_ONNX_ENABLE_TTS == 1
SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {
if (!mgr) {
return SherpaOnnxCreateOfflineTts(config);
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/c-api/c-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1618,7 +1618,7 @@ SherpaOnnxCreateVoiceActivityDetectorOHOS(
const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
NativeResourceManager *mgr);

SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
SHERPA_ONNX_API const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr);

SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
Expand Down
64 changes: 64 additions & 0 deletions sherpa-onnx/csrc/jieba-lexicon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,23 @@

#include <fstream>
#include <regex> // NOLINT
#include <strstream>
#include <unordered_set>
#include <utility>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "cppjieba/Jieba.hpp"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/text-utils.h"

Expand Down Expand Up @@ -56,6 +67,39 @@ class JiebaLexicon::Impl {
}
}

template <typename Manager>
Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
const std::string &dict_dir, bool debug)
: debug_(debug) {
std::string dict = dict_dir + "/jieba.dict.utf8";
std::string hmm = dict_dir + "/hmm_model.utf8";
std::string user_dict = dict_dir + "/user.dict.utf8";
std::string idf = dict_dir + "/idf.utf8";
std::string stop_word = dict_dir + "/stop_words.utf8";

AssertFileExists(dict);
AssertFileExists(hmm);
AssertFileExists(user_dict);
AssertFileExists(idf);
AssertFileExists(stop_word);

jieba_ =
std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);

{
auto buf = ReadFile(mgr, tokens);
std::istrstream is(buf.data(), buf.size());

InitTokens(is);
}

{
auto buf = ReadFile(mgr, lexicon);
std::istrstream is(buf.data(), buf.size());
InitLexicon(is);
}
}

std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &text) const {
// see
// https://github.com/Plachtaa/VITS-fast-fine-tuning/blob/main/text/mandarin.py#L244
Expand Down Expand Up @@ -279,9 +323,29 @@ JiebaLexicon::JiebaLexicon(const std::string &lexicon,
const std::string &dict_dir, bool debug)
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, debug)) {}

template <typename Manager>
JiebaLexicon::JiebaLexicon(Manager *mgr, const std::string &lexicon,
const std::string &tokens,
const std::string &dict_dir, bool debug)
: impl_(std::make_unique<Impl>(mgr, lexicon, tokens, dict_dir, debug)) {}

std::vector<TokenIDs> JiebaLexicon::ConvertTextToTokenIds(
const std::string &text, const std::string & /*unused_voice = ""*/) const {
return impl_->ConvertTextToTokenIds(text);
}

#if __ANDROID_API__ >= 9
template JiebaLexicon::JiebaLexicon(AAssetManager *mgr,
const std::string &lexicon,
const std::string &tokens,
const std::string &dict_dir, bool debug);
#endif

#if __OHOS__
template JiebaLexicon::JiebaLexicon(NativeResourceManager *mgr,
const std::string &lexicon,
const std::string &tokens,
const std::string &dict_dir, bool debug);
#endif

} // namespace sherpa_onnx
6 changes: 6 additions & 0 deletions sherpa-onnx/csrc/jieba-lexicon.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,15 @@ namespace sherpa_onnx {
class JiebaLexicon : public OfflineTtsFrontend {
public:
~JiebaLexicon() override;

JiebaLexicon(const std::string &lexicon, const std::string &tokens,
const std::string &dict_dir, bool debug);

template <typename Manager>
JiebaLexicon(Manager *mgr, const std::string &lexicon,
const std::string &tokens, const std::string &dict_dir,
bool debug);

std::vector<TokenIDs> ConvertTextToTokenIds(
const std::string &text,
const std::string &unused_voice = "") const override;
Expand Down
5 changes: 2 additions & 3 deletions sherpa-onnx/csrc/offline-tts-matcha-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,13 +327,12 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
// from assets to disk
//
// for jieba
// we require that you copy tokens.txt, lexicon.txt and dict
// from assets to disk
// we require that you copy dict from assets to disk
const auto &meta_data = model_->GetMetaData();

if (meta_data.jieba && !meta_data.has_espeak) {
frontend_ = std::make_unique<JiebaLexicon>(
config_.model.matcha.lexicon, config_.model.matcha.tokens,
mgr, config_.model.matcha.lexicon, config_.model.matcha.tokens,
config_.model.matcha.dict_dir, config_.model.debug);
} else if (meta_data.has_espeak && !meta_data.jieba) {
frontend_ = std::make_unique<PiperPhonemizeLexicon>(
Expand Down
Loading