From 45155c0feec89977acf926d50b073501ed6262ea Mon Sep 17 00:00:00 2001
From: WolframRhodium <WolframRhodium@users.noreply.github.com>
Date: Fri, 19 Apr 2024 11:35:58 +0800
Subject: [PATCH] .github/workflows/windows-ort.yml: add fp16 i/o tests

---
 .github/workflows/windows-ort.yml | 36 +++++++++++++++++++++++++++++--
 vsort/vs_onnxruntime.cpp          |  4 ++--
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/windows-ort.yml b/.github/workflows/windows-ort.yml
index ee6c149..4986ae0 100644
--- a/.github/workflows/windows-ort.yml
+++ b/.github/workflows/windows-ort.yml
@@ -221,8 +221,40 @@ jobs:
       shell: bash
       run: |
         set -ex
-        vs_portable/vspipe -i test.vpy -
-        vs_portable/vspipe --y4m -p -e 9 test.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
+        vs_portable/vspipe -i test_fp16.vpy -
+        vs_portable/vspipe --y4m -p -e 9 test_fp16.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
+        ls -l out.hevc x265.log
+        cat x265.log
+        grep -F 'encoded 10 frames' x265.log || exit 2
+        grep -i 'error' x265.log && exit 1
+        exit 0
+
+    - name: Create script (fp16 input)
+      shell: bash
+      run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);flt=core.std.BlankClip(format=vs.RGBH).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True);print(flt,file=sys.stderr);flt.resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16_input.vpy
+
+    - name: Run vspipe (fp16 input)
+      shell: bash
+      run: |
+        set -ex
+        vs_portable/vspipe -i test_fp16_input.vpy -
+        vs_portable/vspipe --y4m -p -e 9 test_fp16_input.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
+        ls -l out.hevc x265.log
+        cat x265.log
+        grep -F 'encoded 10 frames' x265.log || exit 2
+        grep -i 'error' x265.log && exit 1
+        exit 0
+
+    - name: Create script (fp16 output)
+      shell: bash
+      run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);flt=core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True, output_format=1);print(flt,file=sys.stderr);flt.resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16_output.vpy
+
+    - name: Run vspipe (fp16 output)
+      shell: bash
+      run: |
+        set -ex
+        vs_portable/vspipe -i test_fp16_output.vpy -
+        vs_portable/vspipe --y4m -p -e 9 test_fp16_output.vpy - | vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
         ls -l out.hevc x265.log
         cat x265.log
         grep -F 'encoded 10 frames' x265.log || exit 2
diff --git a/vsort/vs_onnxruntime.cpp b/vsort/vs_onnxruntime.cpp
index ed86816..3541cee 100644
--- a/vsort/vs_onnxruntime.cpp
+++ b/vsort/vs_onnxruntime.cpp
@@ -1229,7 +1229,7 @@ static void VS_CC vsOrtCreate(
                 memory_info,
                 resource.output.d_data, resource.output.size,
                 std::data(output_shape), std::size(output_shape),
-                static_cast<ONNXTensorElementDataType>(onnx_input_type),
+                static_cast<ONNXTensorElementDataType>(onnx_output_type),
                 &resource.output_tensor
             ));
         } else
@@ -1238,7 +1238,7 @@ static void VS_CC vsOrtCreate(
             checkError(ortapi->CreateTensorAsOrtValue(
                 cpu_allocator,
                 std::data(output_shape), std::size(output_shape),
-                static_cast<ONNXTensorElementDataType>(onnx_input_type),
+                static_cast<ONNXTensorElementDataType>(onnx_output_type),
                 &resource.output_tensor
             ));
         }