From c855a58cfd8628dfe4ef2ffc0ac169d84a8ac0c5 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 25 Dec 2023 19:41:09 +0800 Subject: [PATCH] Generate the dependency matrix by code for GitHub Actions (#1431) --- .github/scripts/docker/Dockerfile | 2 + .../scripts/docker/generate_build_matrix.py | 79 ++++++++ .../{docker => librispeech/ASR}/run.sh | 11 +- .github/scripts/yesno/ASR/run.sh | 86 ++++++++ .github/workflows/build-cpu-docker.yml | 42 ++-- .github/workflows/run-yesno-recipe.yml | 184 ++++-------------- .github/workflows/test.yml | 27 ++- .github/workflows/train-librispeech.yml | 33 +++- 8 files changed, 280 insertions(+), 184 deletions(-) create mode 100755 .github/scripts/docker/generate_build_matrix.py rename .github/scripts/{docker => librispeech/ASR}/run.sh (87%) create mode 100755 .github/scripts/yesno/ASR/run.sh diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index bbf978d26b..f75d748540 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -31,10 +31,12 @@ LABEL github_repo="https://github.com/k2-fsa/icefall" RUN pip install --no-cache-dir \ torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \ k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \ + \ git+https://github.com/lhotse-speech/lhotse \ kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \ dill \ graphviz \ + kaldi-decoder \ kaldi_native_io \ kaldialign \ kaldifst \ diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py new file mode 100755 index 0000000000..4e494d8107 --- /dev/null +++ b/.github/scripts/docker/generate_build_matrix.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang) + + +import json + + +def version_gt(a, b): + a_major, a_minor = a.split(".")[:2] + b_major, b_minor = b.split(".")[:2] + if a_major > b_major: + return True + + if a_major == b_major and a_minor > b_minor: + return True + + return False + + +def version_ge(a, b): + a_major, a_minor = a.split(".")[:2] + b_major, b_minor = b.split(".")[:2] + if a_major > b_major: + return True + + if a_major == b_major and a_minor >= b_minor: + return True + + return False + + +def get_torchaudio_version(torch_version): + if torch_version == "1.13.0": + return "0.13.0" + elif torch_version == "1.13.1": + return "0.13.1" + elif torch_version == "2.0.0": + return "2.0.1" + elif torch_version == "2.0.1": + return "2.0.2" + else: + return torch_version + + +def get_matrix(): + k2_version = "1.24.4.dev20231220" + kaldifeat_version = "1.25.3.dev20231221" + version = "1.1" + python_version = ["3.8", "3.9", "3.10", "3.11"] + torch_version = ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] + + matrix = [] + for p in python_version: + for t in torch_version: + # torchaudio <= 1.13.x supports only python <= 3.10 + + if version_gt(p, "3.10") and not version_gt(t, "2.0"): + continue + + matrix.append( + { + "k2-version": k2_version, + "kaldifeat-version": kaldifeat_version, + "version": version, + "python-version": p, + "torch-version": t, + "torchaudio-version": get_torchaudio_version(t), + } + ) + return matrix + + +def main(): + matrix = get_matrix() + print(json.dumps({"include": matrix})) + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/docker/run.sh b/.github/scripts/librispeech/ASR/run.sh similarity index 87% rename from .github/scripts/docker/run.sh rename to .github/scripts/librispeech/ASR/run.sh index aeb80b3302..641d59458b 100755 --- a/.github/scripts/docker/run.sh +++ b/.github/scripts/librispeech/ASR/run.sh @@ -1,11 +1,12 @@ #!/usr/bin/env bash + set -ex -cd /icefall -export PYTHONPATH=/icefall:$PYTHONPATH -python3 -c "import torch; print(torch.__file__)" -python3 -c "import torchaudio; print(torchaudio.__version__)" -python3 -c "import icefall; print(icefall.__file__)" +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} cd egs/librispeech/ASR diff --git a/.github/scripts/yesno/ASR/run.sh b/.github/scripts/yesno/ASR/run.sh new file mode 100755 index 0000000000..05c8fbac9b --- /dev/null +++ b/.github/scripts/yesno/ASR/run.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +cd egs/yesno/ASR + +log "data preparation" +./prepare.sh + +log "training" +python3 ./tdnn/train.py + +log "decoding" +python3 ./tdnn/decode.py + +log "export to pretrained.pt" + +python3 ./tdnn/export.py --epoch 14 --avg 2 + +python3 ./tdnn/pretrained.py \ + --checkpoint ./tdnn/exp/pretrained.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test exporting to torchscript" +python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 + +python3 ./tdnn/jit_pretrained.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test exporting to onnx" +python3 ./tdnn/export_onnx.py --epoch 14 --avg 2 + +log "Test float32 model" +python3 ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test int8 model" +python3 ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test decoding with H" +python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 + +python3 ./tdnn/jit_pretrained_decode_with_H.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --H ./data/lang_phone/H.fst \ + --tokens ./data/lang_phone/tokens.txt \ + ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ + ./download/waves_yesno/0_0_1_0_0_1_1_1.wav + +log "Test decoding with HL" +python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 + +python3 ./tdnn/jit_pretrained_decode_with_HL.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --HL ./data/lang_phone/HL.fst \ + --words ./data/lang_phone/words.txt \ + ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ + ./download/waves_yesno/0_0_1_0_0_1_1_1.wav + +log "Show generated files" +ls -lh tdnn/exp +ls -lh data/lang_phone diff --git a/.github/workflows/build-cpu-docker.yml b/.github/workflows/build-cpu-docker.yml index b26cd2095f..c5d5aaeb67 100644 --- a/.github/workflows/build-cpu-docker.yml +++ b/.github/workflows/build-cpu-docker.yml @@ -7,18 +7,31 @@ concurrency: cancel-in-progress: true jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" build-cpu-docker: + needs: generate_build_matrix name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - os: [ubuntu-latest] - python-version: ["3.8", "3.9", "3.10"] - torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] - k2-version: ["1.24.4.dev20231220"] - kaldifeat-version: ["1.25.3.dev20231221"] - version: ["1.1"] + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} steps: # refer to https://github.com/actions/checkout @@ -45,25 +58,14 @@ jobs: run: | cd .github/scripts/docker torch_version=${{ matrix.torch-version }} + torchaudio_version=${{ matrix.torchaudio-version }} - # see https://pytorch.org/audio/stable/installation.html#compatibility-matrix - if [[ $torch_version == 1.13.0 ]]; then - torchaudio_version=0.13.0 - elif [[ $torch_version == 1.13.1 ]]; then - torchaudio_version=0.13.1 - elif [[ $torch_version == 2.0.0 ]]; then - torchaudio_version=2.0.1 - elif [[ $torch_version == 2.0.1 ]]; then - torchaudio_version=2.0.2 - else - torchaudio_version=$torch_version - fi echo "torch_version: $torch_version" echo "torchaudio_version: $torchaudio_version" version=${{ matrix.version }} - tag=ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version + tag=ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version echo "tag: $tag" docker build \ diff --git a/.github/workflows/run-yesno-recipe.yml b/.github/workflows/run-yesno-recipe.yml index 9ac848535f..a99811815c 100644 --- a/.github/workflows/run-yesno-recipe.yml +++ b/.github/workflows/run-yesno-recipe.yml @@ -20,166 +20,60 @@ on: push: branches: - master + - refactor-ci + pull_request: branches: - master + workflow_dispatch: + concurrency: group: run-yesno-recipe-${{ github.ref }} cancel-in-progress: true jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" run-yesno-recipe: - runs-on: ${{ matrix.os }} + needs: generate_build_matrix + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} + runs-on: ubuntu-latest strategy: - matrix: - # os: [ubuntu-latest, macos-10.15] - # TODO: enable macOS for CPU testing - os: [ubuntu-latest] - python-version: [3.8] fail-fast: false + matrix: + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - name: Run the yesno recipe + uses: addnab/docker-run-action@v3 with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install libnsdfile and libsox - if: startsWith(matrix.os, 'ubuntu') - run: | - sudo apt update - sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg - sudo apt install -q -y --fix-missing sox libsox-dev libsox-fmt-all - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - pip install --no-deps --force-reinstall k2==1.24.4.dev20231021+cpu.torch1.13.1 -f https://k2-fsa.github.io/k2/cpu.html - pip install kaldifeat==1.25.1.dev20231022+cpu.torch1.13.1 -f https://csukuangfj.github.io/kaldifeat/cpu.html - - - name: Run yesno recipe - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - ./prepare.sh - python3 ./tdnn/train.py - python3 ./tdnn/decode.py - - - name: Test exporting to pretrained.pt - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 - - python3 ./tdnn/pretrained.py \ - --checkpoint ./tdnn/exp/pretrained.pt \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - name: Test exporting to torchscript - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 - - python3 ./tdnn/jit_pretrained.py \ - --nn-model ./tdnn/exp/cpu_jit.pt \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - name: Test exporting to onnx - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export_onnx.py --epoch 14 --avg 2 - - echo "Test float32 model" - python3 ./tdnn/onnx_pretrained.py \ - --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - echo "Test int8 model" - python3 ./tdnn/onnx_pretrained.py \ - --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - name: Test decoding with H - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 - - python3 ./tdnn/jit_pretrained_decode_with_H.py \ - --nn-model ./tdnn/exp/cpu_jit.pt \ - --H ./data/lang_phone/H.fst \ - --tokens ./data/lang_phone/tokens.txt \ - ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ - ./download/waves_yesno/0_0_1_0_0_1_1_1.wav - - - name: Test decoding with HL - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 - - python3 ./tdnn/jit_pretrained_decode_with_HL.py \ - --nn-model ./tdnn/exp/cpu_jit.pt \ - --HL ./data/lang_phone/HL.fst \ - --words ./data/lang_phone/words.txt \ - ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ - ./download/waves_yesno/0_0_1_0_0_1_1_1.wav - - - name: Show generated files - shell: bash - working-directory: ${{github.workspace}} - run: | - cd egs/yesno/ASR - ls -lh tdnn/exp - ls -lh data/lang_phone + image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + options: | + --volume ${{ github.workspace }}/:/icefall + shell: bash + run: | + export PYTHONPATH=/icefall:$PYTHONPATH + cd /icefall + git config --global --add safe.directory /icefall + + .github/scripts/yesno/ASR/run.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b3fd6f1339..659681b37e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,16 +16,31 @@ concurrency: cancel-in-progress: true jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" test: + needs: generate_build_matrix name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - os: [ubuntu-latest] - python-version: ["3.8", "3.9", "3.10"] - torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] - version: ["1.1"] + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} steps: - uses: actions/checkout@v4 @@ -44,7 +59,7 @@ jobs: - name: Run tests uses: addnab/docker-run-action@v3 with: - image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} options: | --volume ${{ github.workspace }}/:/icefall shell: bash diff --git a/.github/workflows/train-librispeech.yml b/.github/workflows/train-librispeech.yml index 53a2d58439..79002a8813 100644 --- a/.github/workflows/train-librispeech.yml +++ b/.github/workflows/train-librispeech.yml @@ -15,16 +15,31 @@ concurrency: cancel-in-progress: true jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" train-librispeech: + needs: generate_build_matrix name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - os: [ubuntu-latest] - python-version: ["3.8", "3.9", "3.10"] - torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] - version: ["1.1"] + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} steps: # refer to https://github.com/actions/checkout @@ -44,11 +59,13 @@ jobs: - name: Test zipformer/train.py with LibriSpeech uses: addnab/docker-run-action@v3 with: - image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} options: | --volume ${{ github.workspace }}/:/icefall shell: bash run: | - ls -lh /icefall + export PYTHONPATH=/icefall:$PYTHONPATH + cd /icefall + git config --global --add safe.directory /icefall - /icefall/.github/scripts/docker/run.sh + .github/scripts/librispeech/ASR/run.sh