From e0586f1876210839e3b400351f8217021d2f04b6 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 17 Oct 2024 20:03:09 +0800 Subject: [PATCH] add more models for speaker diarization (#1440) --- .../diarization/SpeakerDiarizationObject.kt | 7 ++-- .../apk/build-apk-speaker-diarization.sh.in | 26 +++++++------ ...generate-speaker-diarization-apk-script.py | 38 +++++++++++++++++-- 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt index f4bc24554..9df6bd561 100644 --- a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt @@ -17,8 +17,9 @@ val segmentationModel = "segmentation.onnx" // please download it from // https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx +// and rename it to embedding.onnx // and move it to the assets folder -val embeddingModel = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx" +val embeddingModel = "embedding.onnx" // in the end, your assets folder should look like below /* @@ -26,7 +27,7 @@ val embeddingModel = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx /Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxSpeakerDiarization/app/src/main/assets (py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh total 89048 --rw-r--r-- 1 fangjun staff 38M Oct 12 20:28 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx +-rw-r--r-- 1 fangjun staff 38M Oct 12 20:28 embedding.onnx -rw-r--r-- 1 fangjun staff 5.7M Oct 12 20:28 segmentation.onnx */ @@ -63,4 +64,4 @@ object SpeakerDiarizationObject { _sd = OfflineSpeakerDiarization(assetManager = assetManager, config = config) } } -} \ No newline at end of file +} diff --git a/scripts/apk/build-apk-speaker-diarization.sh.in b/scripts/apk/build-apk-speaker-diarization.sh.in index 2b2922172..9a0080854 100755 --- a/scripts/apk/build-apk-speaker-diarization.sh.in +++ b/scripts/apk/build-apk-speaker-diarization.sh.in @@ -37,18 +37,20 @@ pushd ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/ ls -lh -model_name={{ model.model_name }} -short_name={{ model.short_name }} +segmentation_model_name={{ model.segmentation.model_name }} +segmentation_short_name={{ model.segmentation.short_name }} -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/$model_name.tar.bz2 -tar xvf $model_name.tar.bz2 -rm $model_name.tar.bz2 -mv $model_name/model.onnx segmentation.onnx -rm -rf $model_name +embedding_model_name={{ model.embedding.model_name }} +embedding_short_name={{ model.embedding.short_name }} -if [ ! -f 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx -fi +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/$segmentation_model_name.tar.bz2 +tar xvf $segmentation_model_name.tar.bz2 +rm $segmentation_model_name.tar.bz2 +mv $segmentation_model_name/model.onnx segmentation.onnx +rm -rf $segmentation_model_name + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$embedding_model_name.onnx +mv $embedding_model_name.onnx embedding.onnx echo "pwd: $PWD" ls -lh @@ -74,12 +76,12 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do ./gradlew build popd - mv android/SherpaOnnxSpeakerDiarization/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-speaker-diarization-$short_name-3dspeaker.apk + mv android/SherpaOnnxSpeakerDiarization/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-speaker-diarization-$segmentation_short_name-$embedding_short_name.apk ls -lh apks rm -v ./android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/$arch/*.so done -rm -rf ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/segmentation.onnx +rm -rf ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/*.onnx {% endfor %} diff --git a/scripts/apk/generate-speaker-diarization-apk-script.py b/scripts/apk/generate-speaker-diarization-apk-script.py index 688189fad..f4bb06a9a 100755 --- a/scripts/apk/generate-speaker-diarization-apk-script.py +++ b/scripts/apk/generate-speaker-diarization-apk-script.py @@ -27,10 +27,22 @@ def get_args(): @dataclass class SpeakerSegmentationModel: model_name: str - short_name: str = "" + short_name: str -def get_models() -> List[SpeakerSegmentationModel]: +@dataclass +class SpeakerEmbeddingModel: + model_name: str + short_name: str + + +@dataclass +class Model: + segmentation: SpeakerSegmentationModel + embedding: SpeakerEmbeddingModel + + +def get_segmentation_models() -> List[SpeakerSegmentationModel]: models = [ SpeakerSegmentationModel( model_name="sherpa-onnx-pyannote-segmentation-3-0", @@ -45,13 +57,33 @@ def get_models() -> List[SpeakerSegmentationModel]: return models +def get_embedding_models() -> List[SpeakerEmbeddingModel]: + models = [ + SpeakerSegmentationModel( + model_name="3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k", + short_name="3dspeaker", + ), + SpeakerSegmentationModel( + model_name="nemo_en_titanet_small", + short_name="nemo", + ), + ] + return models + + def main(): args = get_args() index = args.index total = args.total assert 0 <= index < total, (index, total) - all_model_list = get_models() + segmentation_models = get_segmentation_models() + embedding_models = get_embedding_models() + + all_model_list = [] + for s in segmentation_models: + for e in embedding_models: + all_model_list.append(Model(segmentation=s, embedding=e)) num_models = len(all_model_list)