From 61682d22e330b58cc2c12a59654aad454fb36f63 Mon Sep 17 00:00:00 2001 From: WolframRhodium Date: Sat, 20 Apr 2024 16:30:19 +0800 Subject: [PATCH] scripts/vsmlrt.py: document `fp16` behaviour of the ort_cuda backend --- scripts/vsmlrt.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/vsmlrt.py b/scripts/vsmlrt.py index 624e104..04a068c 100644 --- a/scripts/vsmlrt.py +++ b/scripts/vsmlrt.py @@ -78,6 +78,18 @@ class ORT_CUDA: basic performance tuning: set fp16 = True (on RTX GPUs) + + Semantics of `fp16`: + Enabling `fp16` will use a built-in quantization that converts a fp32 onnx to a fp16 onnx. + If the input video is of half-precision floating-point format, + the generated fp16 onnx will use fp16 input. + The output format can be controlled by the `output_format` option (0 = fp32, 1 = fp16). + + Disabling `fp16` will not use the built-in quantization. + However, if the onnx file itself uses fp16 for computation, + the actual computation will be done in fp16. + In this case, the input video format should match the input format of the onnx, + and the output format is inferred from the onnx. """ device_id: int = 0