Skip to content

scripts/vsmlrt.py: add tf32 flag to the ort_cuda backend #281

scripts/vsmlrt.py: add tf32 flag to the ort_cuda backend

scripts/vsmlrt.py: add tf32 flag to the ort_cuda backend #281

Workflow file for this run

name: Build (Windows-ORT)
on:
push:
paths:
- 'common/**'
- 'vsort/**'
- '.github/workflows/windows-ort.yml'
workflow_call:
inputs:
tag:
description: 'which tag to upload to'
required: true
type: string
workflow_dispatch:
inputs:
tag:
description: 'which tag to upload to'
default: ''
jobs:
build-windows:
runs-on: windows-2022
defaults:
run:
shell: cmd
working-directory: vsort
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup MSVC
uses: ilammy/msvc-dev-cmd@v1
- name: Setup Ninja
run: pip install ninja
- name: Cache protobuf
id: cache-protobuf
uses: actions/cache@v4
with:
path: vsort/protobuf/install
key: ${{ runner.os }}-vsort-protobuf-v4
- name: Checkout protobuf
uses: actions/checkout@v4
if: steps.cache-protobuf.outputs.cache-hit != 'true'
with:
repository: protocolbuffers/protobuf
# follows protobuf in https://github.com/AmusementClub/onnxruntime/blob/master/cmake/external/onnxruntime_external_deps.cmake#L203
# if you change this, remember to bump the version of the cache key.
ref: v3.21.12
fetch-depth: 1
path: vsort/protobuf
- name: Configure protobuf
if: steps.cache-protobuf.outputs.cache-hit != 'true'
run: cmake -S protobuf -B protobuf\build_rel -G Ninja -LA
-D CMAKE_BUILD_TYPE=Release
-D protobuf_BUILD_SHARED_LIBS=OFF -D protobuf_BUILD_TESTS=OFF
- name: Build protobuf
if: steps.cache-protobuf.outputs.cache-hit != 'true'
run: cmake --build protobuf\build_rel --verbose
- name: Install protobuf
if: steps.cache-protobuf.outputs.cache-hit != 'true'
run: cmake --install protobuf\build_rel --prefix protobuf\install
- name: Cache onnx
id: cache-onnx
uses: actions/cache@v4
with:
path: vsort/onnx/install
key: ${{ runner.os }}-vsort-onnx-v5
- name: Checkout onnx
if: steps.cache-onnx.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: onnx/onnx
# follows onnx in https://github.com/AmusementClub/onnxruntime/tree/master/cmake/external
# if you change this, remember to bump the version of the cache key.
ref: 990217f043af7222348ca8f0301e17fa7b841781
fetch-depth: 1
path: vsort/onnx
- name: Configure onnx
if: steps.cache-onnx.outputs.cache-hit != 'true'
run: cmake -S onnx -B onnx\build -G Ninja -LA
-D CMAKE_BUILD_TYPE=Release
-D Protobuf_PROTOC_EXECUTABLE=protobuf\install\bin\protoc
-D Protobuf_LITE_LIBRARY=protobuf\install\lib
-D Protobuf_LIBRARIES=protobuf\install\lib
-D ONNX_USE_LITE_PROTO=ON -D ONNX_USE_PROTOBUF_SHARED_LIBS=OFF
-D ONNX_GEN_PB_TYPE_STUBS=OFF -D ONNX_ML=0
-D ONNX_USE_MSVC_STATIC_RUNTIME=1
- name: Build onnx
if: steps.cache-onnx.outputs.cache-hit != 'true'
run: cmake --build onnx\build --verbose
- name: Install onnx
if: steps.cache-onnx.outputs.cache-hit != 'true'
run: cmake --install onnx\build --prefix onnx\install
- name: Download VapourSynth headers
run: |
curl -s -o vs.zip -L https://github.com/vapoursynth/vapoursynth/archive/refs/tags/R54.zip
unzip -q vs.zip
mv vapoursynth-*/ vapoursynth/
- name: Download ONNX Runtime Precompilation
run: |
curl -s -o ortgpu.zip -LJO https://github.com/AmusementClub/onnxruntime/releases/download/orttraining_rc2-7983-g9001c69b84-240419-0832/onnxruntime-gpu-win64.zip
unzip -q ortgpu.zip
- name: Cache CUDA
id: cache-cuda
uses: actions/cache@v4
with:
path: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
key: ${{ runner.os }}-cuda-12.4.0
- name: Setup CUDA
if: steps.cache-cuda.outputs.cache-hit != 'true'
run: |
curl -s -o cuda_installer.exe -L https://developer.download.nvidia.com/compute/cuda/12.4.0/network_installers/cuda_12.4.0_windows_network.exe
cuda_installer.exe -s nvcc_12.4 cudart_12.4
- name: Configure
run: cmake -S . -B build -G Ninja -LA
-D CMAKE_BUILD_TYPE=Release
-D CMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded
-D VAPOURSYNTH_INCLUDE_DIRECTORY=vapoursynth\include
-D protobuf_DIR=protobuf\install\cmake
-D ONNX_DIR=onnx\install\lib\cmake\ONNX
-D ONNX_RUNTIME_API_DIRECTORY=onnxruntime-gpu\include\onnxruntime
-D ONNX_RUNTIME_LIB_DIRECTORY=onnxruntime-gpu\lib
-D ENABLE_CUDA=1
-D CUDAToolkit_ROOT="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
-D ENABLE_DML=1
-D CMAKE_CXX_STANDARD=20
- name: Build
run: cmake --build build --verbose
- name: Install
run: |
cmake --install build --prefix install
mkdir artifact
mkdir artifact\vsort
copy install\bin\vsort.dll artifact\
copy onnxruntime-gpu\bin\*.dll artifact\vsort\
copy onnxruntime-gpu\lib\*.dll artifact\vsort\
- name: Download DirectML Library
# follows DirectML in https://github.com/AmusementClub/onnxruntime/blob/master/cmake/external/dml.cmake#L44
run: |
curl -s -o directml.nupkg -LJO https://www.nuget.org/api/v2/package/Microsoft.AI.DirectML/1.14.1
unzip -q directml.nupkg -d dml
copy dml\bin\x64-win\DirectML.dll artifact\vsort\
- name: Upload
uses: actions/upload-artifact@v4
with:
name: VSORT-Windows-x64
path: vsort/artifact
- name: Setup Python portable
run: |
curl -s -o python.zip -LJO https://www.python.org/ftp/python/3.9.10/python-3.9.10-embed-amd64.zip
7z x python.zip -ovs_portable
- name: Install VapourSynth portable
run: |
curl -s -o vs.7z -LJO https://github.com/vapoursynth/vapoursynth/releases/download/R54/VapourSynth64-Portable-R54.7z
7z x vs.7z -ovs_portable -y
- name: Copy plugin
run: |
copy artifact\*.dll vs_portable\vapoursynth64\plugins
mkdir vs_portable\vapoursynth64\plugins\vsort\
copy artifact\vsort\*.dll vs_portable\vapoursynth64\plugins\vsort\
- name: Install waifu2x model
run: |
curl -s -o waifu2x.7z -LJO https://github.com/AmusementClub/vs-mlrt/releases/download/model-20211209/waifu2x_v3.7z
7z x waifu2x.7z -ovs_portable\vapoursynth64\plugins\models
- name: Download x265
run: |
curl -s -o x265.7z -LJO https://github.com/AmusementClub/x265/releases/download/Yuuki-3.5-AC3/x265-win64-x86-64-clang.Yuuki-3.5-AC3.7z
7z x x265.7z -ovs_portable\
- name: Create script
shell: bash
run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);print(core.ort.Version(),file=sys.stderr);core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True).resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test.vpy
- name: Run vspipe
shell: bash
run: |
set -ex
vs_portable/vspipe -i test.vpy -
vs_portable/vspipe --y4m -p -e 9 test.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
ls -l out.hevc x265.log
cat x265.log
grep -F 'encoded 10 frames' x265.log || exit 2
grep -i 'error' x265.log && exit 1
exit 0
- name: Create script (fp16)
shell: bash
run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True).resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16.vpy
- name: Run vspipe (fp16)
shell: bash
run: |
set -ex
vs_portable/vspipe -i test_fp16.vpy -
vs_portable/vspipe --y4m -p -e 9 test_fp16.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
ls -l out.hevc x265.log
cat x265.log
grep -F 'encoded 10 frames' x265.log || exit 2
grep -i 'error' x265.log && exit 1
exit 0
- name: Create script (fp16 input)
shell: bash
run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);flt=core.std.BlankClip(format=vs.RGBH).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True);print(flt,file=sys.stderr);flt.resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16_input.vpy
- name: Run vspipe (fp16 input)
shell: bash
run: |
set -ex
vs_portable/vspipe -i test_fp16_input.vpy -
vs_portable/vspipe --y4m -p -e 9 test_fp16_input.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
ls -l out.hevc x265.log
cat x265.log
grep -F 'encoded 10 frames' x265.log || exit 2
grep -i 'error' x265.log && exit 1
exit 0
- name: Create script (fp16 output)
shell: bash
run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);flt=core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True, output_format=1);print(flt,file=sys.stderr);flt.resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16_output.vpy
- name: Run vspipe (fp16 output)
shell: bash
run: |
set -ex
vs_portable/vspipe -i test_fp16_output.vpy -
vs_portable/vspipe --y4m -p -e 9 test_fp16_output.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
ls -l out.hevc x265.log
cat x265.log
grep -F 'encoded 10 frames' x265.log || exit 2
grep -i 'error' x265.log && exit 1
exit 0
- name: Describe
run: git describe --tags --long
- name: Compress artifact for release
if: github.event_name == 'workflow_dispatch' && github.event.inputs.tag != ''
run: |
cd artifact
7z a -t7z -mx=7 ../../VSORT-Windows-x64.${{ github.event.inputs.tag }}.7z .
- name: Release
uses: softprops/action-gh-release@v1
if: github.event_name == 'workflow_dispatch' && github.event.inputs.tag != ''
with:
tag_name: ${{ inputs.tag }}
files: VSORT-Windows-x64.${{ github.event.inputs.tag }}.7z
fail_on_unmatched_files: true
generate_release_notes: false
prerelease: true