From 61682d22e330b58cc2c12a59654aad454fb36f63 Mon Sep 17 00:00:00 2001
From: WolframRhodium <WolframRhodium@users.noreply.github.com>
Date: Sat, 20 Apr 2024 16:30:19 +0800
Subject: [PATCH] scripts/vsmlrt.py: document `fp16` behaviour of the ort_cuda
 backend

---
 scripts/vsmlrt.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/scripts/vsmlrt.py b/scripts/vsmlrt.py
index 624e104..04a068c 100644
--- a/scripts/vsmlrt.py
+++ b/scripts/vsmlrt.py
@@ -78,6 +78,18 @@ class ORT_CUDA:
 
         basic performance tuning:
         set fp16 = True (on RTX GPUs)
+
+        Semantics of `fp16`:
+            Enabling `fp16` will use a built-in quantization that converts a fp32 onnx to a fp16 onnx.
+            If the input video is of half-precision floating-point format,
+            the generated fp16 onnx will use fp16 input.
+            The output format can be controlled by the `output_format` option (0 = fp32, 1 = fp16).
+
+            Disabling `fp16` will not use the built-in quantization.
+            However, if the onnx file itself uses fp16 for computation,
+            the actual computation will be done in fp16.
+            In this case, the input video format should match the input format of the onnx,
+            and the output format is inferred from the onnx.
         """
 
         device_id: int = 0