From 56d6aea6e2b56fd443be638282344569473216ea Mon Sep 17 00:00:00 2001
From: Miles Price <milesp@nvidia.com>
Date: Thu, 14 Mar 2024 14:53:59 -0700
Subject: [PATCH] feat: adding code for release v0.6.0 (beta-4) of CV-CUDA

---
 3rdparty/CMakeLists.txt                       |   18 +-
 CMakeLists.txt                                |    4 +-
 CODE_OF_CONDUCT.md                            |   16 +
 CONTRIBUTING.md                               |   20 +-
 DEVELOPER_GUIDE.md                            |   21 +-
 LICENSE.md                                    |   15 +
 README.md                                     |  294 +-
 SECURITY.md                                   |   15 +
 bench/BenchFindContours.cpp                   |    8 +-
 bench/BenchHQResize.cpp                       |  129 +
 bench/BenchMorphology.cpp                     |   10 +-
 bench/CMakeLists.txt                          |    4 +-
 bench/python/README.md                        |  116 +
 bench/python/all_ops/op_adaptivethreshold.py  |   41 +
 bench/python/all_ops/op_averageblur.py        |   32 +
 bench/python/all_ops/op_blurbox.py            |   80 +
 bench/python/all_ops/op_boundingbox.py        |   84 +
 bench/python/all_ops/op_brightnesscontrast.py |   46 +
 bench/python/all_ops/op_centercrop.py         |   33 +
 bench/python/all_ops/op_composite.py          |   52 +
 bench/python/all_ops/op_convertto.py          |   35 +
 bench/python/all_ops/op_copymakeborder.py     |   42 +
 bench/python/all_ops/op_customcrop.py         |   30 +
 bench/python/all_ops/op_cvtcolor.py           |   37 +
 bench/python/all_ops/op_findcontours.py       |  109 +
 bench/python/all_ops/op_flip.py               |   29 +
 bench/python/all_ops/op_gaussianblur.py       |   30 +
 bench/python/all_ops/op_hqresize.py           |   53 +
 bench/python/all_ops/op_inpaint.py            |   56 +
 bench/python/all_ops/op_jointbilateral.py     |   55 +
 bench/python/all_ops/op_laplacian.py          |   30 +
 bench/python/all_ops/op_morphology.py         |  102 +
 bench/python/all_ops/op_nms.py                |   52 +
 bench/python/all_ops/op_normalize.py          |   45 +
 bench/python/all_ops/op_randomresizedcrop.py  |   44 +
 bench/python/all_ops/op_reformat.py           |   43 +
 bench/python/all_ops/op_remap.py              |   53 +
 bench/python/all_ops/op_reshape.py            |   32 +
 bench/python/all_ops/op_resize.py             |   57 +
 bench/python/all_ops/op_rotate.py             |   31 +
 bench/python/all_ops/op_sift.py               |   47 +
 bench/python/all_ops/op_threshold.py          |   42 +
 bench/python/all_ops/op_warpaffine.py         |   60 +
 bench/python/all_ops/op_warpperspective.py    |   70 +
 bench/python/assets/NOTICE.md                 |   19 +
 bench/python/assets/brooklyn.jpg              |    3 +
 bench/python/assets/brooklyn_bboxes.pt        |  Bin 0 -> 196520 bytes
 bench/python/assets/brooklyn_mask.jpg         |    3 +
 bench/python/assets/brooklyn_nms_masks.pt     |  Bin 0 -> 196620 bytes
 bench/python/assets/brooklyn_scores.pt        |  Bin 0 -> 98600 bytes
 bench/python/assets/countour_lines.jpg        |    3 +
 bench/python/bench_utils.py                   |  411 +++
 bench/python/run_bench.py                     |  236 ++
 ci/build.sh                                   |   80 +-
 ci/build_docs.sh                              |    4 +-
 ci/build_samples.sh                           |    2 +-
 cmake/BuildPython.cmake                       |   12 +-
 cmake/ConfigCPack.cmake                       |    4 +-
 cmake/ConfigCUDA.cmake                        |    7 +-
 cmake/ConfigCompiler.cmake                    |    2 +-
 cmake/ConfigPython.cmake                      |    4 +-
 cmake/GetGitRevisionDescription.cmake         |    3 +-
 cmake/InstallNVCVDev.cmake                    |    6 +-
 cmake/InstallNVCVLib.cmake                    |   11 +-
 cmake/InstallPython.cmake                     |   19 +-
 cmake/InstallTests.cmake                      |    2 +-
 docker/build20.04/Dockerfile                  |   90 +
 docker/{build => build20.04}/ccache.conf      |    0
 .../deadsnakes-ubuntu-ppa-focal.list          |   17 +
 docker/{build => build22.04}/Dockerfile       |   58 +-
 docker/build22.04/ccache.conf                 |   17 +
 .../deadsnakes-ubuntu-ppa-jammy.list          |    0
 docker/config                                 |    4 +-
 docker/{devel => devel20.04}/Dockerfile       |   32 +-
 docker/{devel => devel20.04}/gdbinit          |    0
 docker/{devel => devel20.04}/vimrc            |    0
 docker/devel22.04/Dockerfile                  |   72 +
 docker/devel22.04/gdbinit                     |   19 +
 docker/devel22.04/vimrc                       |   23 +
 docker/env_devel_linux.sh                     |    4 +-
 docker/samples/Dockerfile                     |    1 +
 docker/test20.04/Dockerfile                   |   59 +
 .../deadsnakes-ubuntu-ppa-focal.list          |   17 +
 docker/{test => test22.04}/Dockerfile         |   25 +-
 .../deadsnakes-ubuntu-ppa-jammy.list          |    0
 docker/update_build_image.sh                  |    4 +-
 docker/update_devel_image.sh                  |    6 +-
 docker/update_samples_image.sh                |    1 +
 docker/update_test_image.sh                   |    4 +-
 docs/CMakeLists.txt                           |    4 +-
 docs/sphinx/content/cvcuda_oplist.csv         |    5 +-
 docs/sphinx/index.rst                         |    3 +-
 docs/sphinx/installation.rst                  |   68 +-
 docs/sphinx/relnotes/v0.5.0-beta.rst          |   39 +-
 docs/sphinx/relnotes/v0.6.0-beta.rst          |   78 +
 .../sphinx/samples/cpp_samples/cropresize.rst |    2 +-
 .../samples/python_samples/classification.rst |   16 +-
 ...orch.rst => imagebatchdecoder_nvcodec.rst} |   40 +-
 ...orch.rst => imagebatchencoder_nvcodec.rst} |   22 +-
 ..._vpf.rst => videobatchdecoder_nvcodec.rst} |   58 +-
 ..._vpf.rst => videobatchencoder_nvcodec.rst} |   62 +-
 .../python_samples/object_detection.rst       |   24 +-
 .../samples/python_samples/segmentation.rst   |   24 +-
 lint/copyright_check.sh                       |   13 +-
 python/CMakeLists.txt                         |   21 +-
 python/build_wheels.sh                        |   84 +
 python/mod_cvcuda/CMakeLists.txt              |    1 +
 python/mod_cvcuda/InterpolationType.cpp       |    1 +
 python/mod_cvcuda/Main.cpp                    |    1 +
 python/mod_cvcuda/OpAdaptiveThreshold.cpp     |   12 +-
 python/mod_cvcuda/OpAdvCvtColor.cpp           |    6 +-
 python/mod_cvcuda/OpAverageBlur.cpp           |   12 +-
 python/mod_cvcuda/OpBilateralFilter.cpp       |   12 +-
 python/mod_cvcuda/OpBndBox.cpp                |    6 +-
 python/mod_cvcuda/OpBoxBlur.cpp               |    6 +-
 python/mod_cvcuda/OpBrightnessContrast.cpp    |    8 +-
 python/mod_cvcuda/OpCenterCrop.cpp            |    6 +-
 python/mod_cvcuda/OpChannelReorder.cpp        |    6 +-
 python/mod_cvcuda/OpColorTwist.cpp            |    6 +-
 python/mod_cvcuda/OpComposite.cpp             |   12 +-
 python/mod_cvcuda/OpConv2D.cpp                |    6 +-
 python/mod_cvcuda/OpConvertTo.cpp             |    6 +-
 python/mod_cvcuda/OpCopyMakeBorder.cpp        |   18 +-
 .../OpCropFlipNormalizeReformat.cpp           |    6 +-
 python/mod_cvcuda/OpCustomCrop.cpp            |    6 +-
 python/mod_cvcuda/OpCvtColor.cpp              |   12 +-
 python/mod_cvcuda/OpErase.cpp                 |   12 +-
 python/mod_cvcuda/OpFindContours.cpp          |    8 +-
 python/mod_cvcuda/OpFindHomography.cpp        |   14 +-
 python/mod_cvcuda/OpFlip.cpp                  |   12 +-
 python/mod_cvcuda/OpGammaContrast.cpp         |    6 +-
 python/mod_cvcuda/OpGaussian.cpp              |   12 +-
 python/mod_cvcuda/OpGaussianNoise.cpp         |   12 +-
 python/mod_cvcuda/OpHQResize.cpp              |  761 +++++
 python/mod_cvcuda/OpHistogram.cpp             |    8 +-
 python/mod_cvcuda/OpHistogramEq.cpp           |   12 +-
 python/mod_cvcuda/OpInpaint.cpp               |   12 +-
 python/mod_cvcuda/OpJointBilateralFilter.cpp  |   12 +-
 python/mod_cvcuda/OpLabel.cpp                 |   18 +-
 python/mod_cvcuda/OpLaplacian.cpp             |   12 +-
 python/mod_cvcuda/OpMedianBlur.cpp            |   12 +-
 python/mod_cvcuda/OpMinAreaRect.cpp           |    6 +-
 python/mod_cvcuda/OpMinMaxLoc.cpp             |   18 +-
 python/mod_cvcuda/OpMorphology.cpp            |   17 +-
 python/mod_cvcuda/OpNonMaximumSuppression.cpp |    6 +-
 python/mod_cvcuda/OpNormalize.cpp             |   12 +-
 python/mod_cvcuda/OpOSD.cpp                   |    6 +-
 python/mod_cvcuda/OpPadAndStack.cpp           |    6 +-
 python/mod_cvcuda/OpPairwiseMatcher.cpp       |   14 +-
 python/mod_cvcuda/OpPillowResize.cpp          |   12 +-
 python/mod_cvcuda/OpRandomResizedCrop.cpp     |   12 +-
 python/mod_cvcuda/OpReformat.cpp              |    6 +-
 python/mod_cvcuda/OpRemap.cpp                 |   12 +-
 python/mod_cvcuda/OpResize.cpp                |   12 +-
 python/mod_cvcuda/OpRotate.cpp                |   12 +-
 python/mod_cvcuda/OpSIFT.cpp                  |    6 +-
 python/mod_cvcuda/OpStack.cpp                 |    6 +-
 python/mod_cvcuda/OpThreshold.cpp             |   12 +-
 python/mod_cvcuda/OpWarpAffine.cpp            |   12 +-
 python/mod_cvcuda/OpWarpPerspective.cpp       |   12 +-
 python/mod_cvcuda/Operators.hpp               |    1 +
 python/mod_nvcv/CAPI.cpp                      |    8 +-
 python/mod_nvcv/Resource.cpp                  |   12 +-
 .../mod_nvcv/include/nvcv/python/LockMode.hpp |    8 +-
 .../include/nvcv/python/ResourceGuard.hpp     |    8 +-
 python/setup.py.in                            |   81 +
 samples/NOTICE.md                             |   15 +
 samples/README.md                             |  113 +-
 samples/classification/CMakeLists.txt         |   10 +-
 samples/classification/Main.cpp               |    2 +-
 samples/classification/python/main.py         |   11 +-
 samples/common/CMakeLists.txt                 |   12 +-
 samples/common/python/nvcodec_utils.py        |  641 ++++
 samples/common/python/perf_utils.py           |  276 +-
 samples/common/python/torch_utils.py          |  187 --
 samples/common/python/vpf_utils.py            |  519 ---
 samples/cropandresize/CMakeLists.txt          |   10 +-
 samples/label/python/main.py                  |   32 +-
 samples/object_detection/python/main.py       |   20 +-
 samples/scripts/README.md                     |   19 +-
 samples/scripts/benchmark.py                  |   46 +-
 samples/scripts/benchmark_samples.sh          |  104 +-
 samples/scripts/install_dependencies.sh       |   73 +-
 samples/scripts/requirements.txt              |    9 +
 samples/scripts/run_samples.sh                |   87 +-
 samples/segmentation/python/README.md         |   23 +-
 samples/segmentation/python/main.py           |   36 +-
 .../segmentation/python/model_inference.py    |   14 +-
 samples/segmentation/python/triton_client.py  |   20 +-
 src/cvcuda/CMakeLists.txt                     |    1 +
 src/cvcuda/OpHQResize.cpp                     |  139 +
 src/cvcuda/include/cvcuda/OpErase.h           |   12 +-
 src/cvcuda/include/cvcuda/OpHQResize.h        |  406 +++
 src/cvcuda/include/cvcuda/OpHQResize.hpp      |  154 +
 src/cvcuda/include/cvcuda/OpLabel.h           |   17 +-
 src/cvcuda/include/cvcuda/OpMorphology.h      |    4 +-
 src/cvcuda/include/cvcuda/OpNormalize.h       |    6 +-
 src/cvcuda/include/cvcuda/Types.h             |    1 +
 src/cvcuda/include/cvcuda/Workspace.hpp       |    7 +
 src/cvcuda/priv/CMakeLists.txt                |    1 +
 src/cvcuda/priv/OpHQResize.cu                 | 2788 +++++++++++++++++
 src/cvcuda/priv/OpHQResize.hpp                |  115 +
 src/cvcuda/priv/OpHQResizeBatchWrap.cuh       |  408 +++
 src/cvcuda/priv/OpHQResizeFilter.cuh          |  402 +++
 .../priv/legacy/channel_reorder_var_shape.cu  |   22 +-
 src/cvcuda/priv/legacy/cvt_color_var_shape.cu |   77 +-
 src/cvcuda/priv/legacy/gaussian_noise.cu      |    4 +-
 .../priv/legacy/gaussian_noise_var_shape.cu   |    2 +-
 src/cvcuda/priv/legacy/inpaint.cu             |    2 +-
 .../priv/legacy/median_blur_var_shape.cu      |   18 +-
 src/cvcuda/priv/legacy/min_area_rect.cu       |   64 +-
 .../priv/legacy/pillow_resize_var_shape.cu    |    4 +-
 src/cvcuda/priv/legacy/random_resized_crop.cu |   19 +-
 .../legacy/random_resized_crop_var_shape.cu   |   17 +-
 src/cvcuda/priv/legacy/threshold.cu           |    4 +-
 src/cvcuda/priv/legacy/threshold_var_shape.cu |    4 +-
 src/nvcv_types/Tensor.cpp                     |   10 +
 .../include/nvcv/alloc/Requirements.h         |    8 +-
 .../include/nvcv/cuda/TensorBatchWrap.hpp     |  386 +++
 .../include/nvcv/cuda/TensorWrap.hpp          |   37 +-
 src/nvcv_types/priv/IAllocator.cpp            |    4 +-
 src/util/Compat.cpp                           |    2 +-
 src/util/Math.hpp                             |   13 +
 tests/CMakeLists.txt                          |    9 +-
 tests/cvcuda/python/cvcuda_test_python.in     |   27 +-
 tests/cvcuda/python/test_ophqresize.py        |  306 ++
 tests/cvcuda/system/CMakeLists.txt            |    1 +
 tests/cvcuda/system/ResizeUtils.cpp           |  145 +-
 tests/cvcuda/system/ResizeUtils.hpp           |    3 +-
 tests/cvcuda/system/TestOpChannelReorder.cpp  |  264 +-
 tests/cvcuda/system/TestOpCvtColor.cpp        |  292 +-
 tests/cvcuda/system/TestOpGammaContrast.cpp   |  145 +-
 tests/cvcuda/system/TestOpGaussianNoise.cpp   |  250 +-
 tests/cvcuda/system/TestOpHQResize.cpp        | 1320 ++++++++
 tests/cvcuda/system/TestOpMedianBlur.cpp      |    2 +-
 .../cvcuda/system/TestOpRandomResizedCrop.cpp |   68 +
 tests/cvcuda/system/TestOpResize.cpp          |   12 +-
 tests/cvcuda/system/TestOpThreshold.cpp       |  313 +-
 .../cudatools_system/CMakeLists.txt           |    2 +
 .../cudatools_system/DeviceTensorBatchWrap.cu |  152 +
 .../DeviceTensorBatchWrap.hpp                 |   42 +
 .../cudatools_system/TestTensorBatchWrap.cpp  |  175 ++
 .../python/nvcv_test_types_python.in          |   27 +-
 tests/nvcv_types/system/TestAllocatorC.cpp    |  184 ++
 tests/nvcv_types/system/TestArray.cpp         |  265 ++
 tests/nvcv_types/system/TestConfig.cpp        |   11 +-
 tests/nvcv_types/system/TestImageFormat.cpp   |    8 +
 tests/nvcv_types/system/TestTensor.cpp        |  200 ++
 tests/nvcv_types/system/TestTensorLayout.cpp  |   48 +-
 tests/nvcv_types/unit/TestMath.cpp            |   24 +
 tools/mkop/PythonWrap.cpp                     |    8 +-
 251 files changed, 15228 insertions(+), 1970 deletions(-)
 create mode 100644 bench/BenchHQResize.cpp
 create mode 100644 bench/python/README.md
 create mode 100644 bench/python/all_ops/op_adaptivethreshold.py
 create mode 100644 bench/python/all_ops/op_averageblur.py
 create mode 100644 bench/python/all_ops/op_blurbox.py
 create mode 100644 bench/python/all_ops/op_boundingbox.py
 create mode 100644 bench/python/all_ops/op_brightnesscontrast.py
 create mode 100644 bench/python/all_ops/op_centercrop.py
 create mode 100644 bench/python/all_ops/op_composite.py
 create mode 100644 bench/python/all_ops/op_convertto.py
 create mode 100644 bench/python/all_ops/op_copymakeborder.py
 create mode 100644 bench/python/all_ops/op_customcrop.py
 create mode 100644 bench/python/all_ops/op_cvtcolor.py
 create mode 100644 bench/python/all_ops/op_findcontours.py
 create mode 100644 bench/python/all_ops/op_flip.py
 create mode 100644 bench/python/all_ops/op_gaussianblur.py
 create mode 100644 bench/python/all_ops/op_hqresize.py
 create mode 100644 bench/python/all_ops/op_inpaint.py
 create mode 100644 bench/python/all_ops/op_jointbilateral.py
 create mode 100644 bench/python/all_ops/op_laplacian.py
 create mode 100644 bench/python/all_ops/op_morphology.py
 create mode 100644 bench/python/all_ops/op_nms.py
 create mode 100644 bench/python/all_ops/op_normalize.py
 create mode 100644 bench/python/all_ops/op_randomresizedcrop.py
 create mode 100644 bench/python/all_ops/op_reformat.py
 create mode 100644 bench/python/all_ops/op_remap.py
 create mode 100644 bench/python/all_ops/op_reshape.py
 create mode 100644 bench/python/all_ops/op_resize.py
 create mode 100644 bench/python/all_ops/op_rotate.py
 create mode 100644 bench/python/all_ops/op_sift.py
 create mode 100644 bench/python/all_ops/op_threshold.py
 create mode 100644 bench/python/all_ops/op_warpaffine.py
 create mode 100644 bench/python/all_ops/op_warpperspective.py
 create mode 100644 bench/python/assets/NOTICE.md
 create mode 100644 bench/python/assets/brooklyn.jpg
 create mode 100644 bench/python/assets/brooklyn_bboxes.pt
 create mode 100644 bench/python/assets/brooklyn_mask.jpg
 create mode 100644 bench/python/assets/brooklyn_nms_masks.pt
 create mode 100644 bench/python/assets/brooklyn_scores.pt
 create mode 100644 bench/python/assets/countour_lines.jpg
 create mode 100644 bench/python/bench_utils.py
 create mode 100644 bench/python/run_bench.py
 create mode 100644 docker/build20.04/Dockerfile
 rename docker/{build => build20.04}/ccache.conf (100%)
 create mode 100644 docker/build20.04/deadsnakes-ubuntu-ppa-focal.list
 rename docker/{build => build22.04}/Dockerfile (74%)
 create mode 100644 docker/build22.04/ccache.conf
 rename docker/{build => build22.04}/deadsnakes-ubuntu-ppa-jammy.list (100%)
 rename docker/{devel => devel20.04}/Dockerfile (74%)
 rename docker/{devel => devel20.04}/gdbinit (100%)
 rename docker/{devel => devel20.04}/vimrc (100%)
 create mode 100644 docker/devel22.04/Dockerfile
 create mode 100644 docker/devel22.04/gdbinit
 create mode 100644 docker/devel22.04/vimrc
 create mode 100644 docker/test20.04/Dockerfile
 create mode 100644 docker/test20.04/deadsnakes-ubuntu-ppa-focal.list
 rename docker/{test => test22.04}/Dockerfile (79%)
 rename docker/{test => test22.04}/deadsnakes-ubuntu-ppa-jammy.list (100%)
 create mode 100644 docs/sphinx/relnotes/v0.6.0-beta.rst
 rename docs/sphinx/samples/python_samples/commons/{imagebatchdecoder_pytorch.rst => imagebatchdecoder_nvcodec.rst} (52%)
 rename docs/sphinx/samples/python_samples/commons/{imagebatchencoder_pytorch.rst => imagebatchencoder_nvcodec.rst} (58%)
 rename docs/sphinx/samples/python_samples/commons/{videobatchdecoder_vpf.rst => videobatchdecoder_nvcodec.rst} (54%)
 rename docs/sphinx/samples/python_samples/commons/{videobatchencoder_vpf.rst => videobatchencoder_nvcodec.rst} (53%)
 create mode 100755 python/build_wheels.sh
 create mode 100644 python/mod_cvcuda/OpHQResize.cpp
 create mode 100644 python/setup.py.in
 create mode 100644 samples/common/python/nvcodec_utils.py
 delete mode 100644 samples/common/python/torch_utils.py
 create mode 100644 samples/scripts/requirements.txt
 create mode 100644 src/cvcuda/OpHQResize.cpp
 create mode 100644 src/cvcuda/include/cvcuda/OpHQResize.h
 create mode 100644 src/cvcuda/include/cvcuda/OpHQResize.hpp
 create mode 100644 src/cvcuda/priv/OpHQResize.cu
 create mode 100644 src/cvcuda/priv/OpHQResize.hpp
 create mode 100644 src/cvcuda/priv/OpHQResizeBatchWrap.cuh
 create mode 100644 src/cvcuda/priv/OpHQResizeFilter.cuh
 create mode 100644 src/nvcv_types/include/nvcv/cuda/TensorBatchWrap.hpp
 create mode 100644 tests/cvcuda/python/test_ophqresize.py
 create mode 100644 tests/cvcuda/system/TestOpHQResize.cpp
 create mode 100644 tests/nvcv_types/cudatools_system/DeviceTensorBatchWrap.cu
 create mode 100644 tests/nvcv_types/cudatools_system/DeviceTensorBatchWrap.hpp
 create mode 100644 tests/nvcv_types/cudatools_system/TestTensorBatchWrap.cpp

diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt
index 51e72f3ce..0868ee168 100644
--- a/3rdparty/CMakeLists.txt
+++ b/3rdparty/CMakeLists.txt
@@ -15,7 +15,10 @@
 
 set(CMAKE_FOLDER 3rdparty)
 
-# disable all warnings
+# disable all warnings when compiling objects of 3rdparty
+# libraries included here. It *doesn't* affect warnings in public
+# header files that are included by cvcuda code. For that, see
+# solution employed with nvbench.
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w")
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w")
@@ -40,9 +43,18 @@ set(DLPACK_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/dlpack" PARENT_SCOPE)
 set(CUOSD_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/cuOSD" PARENT_SCOPE)
 
 # NVBench --------------------------------
-set(NVBENCH_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/nvbench" PARENT_SCOPE)
-
 if(BUILD_BENCH)
     set(NVBench_ENABLE_CUPTI off)
+    set(BUILD_SHARED_LIBS off)
     add_subdirectory(nvbench)
+    # Because nvbench::main is an object library, cmake<=3.20 doesn't treat it
+    # like regular libraries, and just creating an cvcuda_nvbench interface
+    # library that depends on it doesn't work. We need to create an static
+    # library and pull in the objects created by nvbench::main, as per cmake
+    # docs.
+    add_library(cvcuda_nvbench_main STATIC $<TARGET_OBJECTS:nvbench::main>)
+    target_link_libraries(cvcuda_nvbench_main PUBLIC nvbench::nvbench)
+    target_include_directories(cvcuda_nvbench_main SYSTEM INTERFACE
+        ${CMAKE_CURRENT_SOURCE_DIR}/nvbench)
+    add_library(cvcuda::nvbench::main ALIAS cvcuda_nvbench_main)
 endif()
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6256d8379..fccd9c7eb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -23,7 +23,7 @@ endif()
 
 project(cvcuda
         LANGUAGES C CXX
-        VERSION 0.5.0
+        VERSION 0.6.0
         DESCRIPTION "CUDA-accelerated Computer Vision algorithms"
 )
 
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 990ffc0fc..018377c20 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -1,3 +1,19 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
+
 # Contributor Code of Conduct
 
 ## Overview
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 89506b788..d21011b97 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,6 +1,22 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
+
 # Contributing to CV-CUDA
 
-**As of release v0.5.0-beta, CV-CUDA is not accepting outside contribution.**
+**As of release v0.6.0-beta, CV-CUDA is not accepting outside contribution.**
 
 Contributions to CV-CUDA fall into the following categories:
 
@@ -12,7 +28,7 @@ Contributions to CV-CUDA fall into the following categories:
 1. To propose a new feature, please file a new feature request
    [issue](https://github.com/CVCUDA/CV-CUDA/issues/new/choose). Describe the
    intended feature and discuss the design and implementation with the team and
-   community. NOTE: Currently, as of release v0.5.0-beta, CV-CUDA is not accepting
+   community. NOTE: Currently, as of release v0.6.0-beta, CV-CUDA is not accepting
    outside contribution.
 1. To ask a general question, please sumbit a question
    [issue](https://github.com/CVCUDA/CV-CUDA/issues/new/choose). If you need
diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md
index 51ac6d4e9..a5f4bec53 100644
--- a/DEVELOPER_GUIDE.md
+++ b/DEVELOPER_GUIDE.md
@@ -1,3 +1,19 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
+
 # CV-CUDA Developer Guide
 
 ## What is CV-CUDA?
@@ -35,7 +51,7 @@ CV-CUDA includes:
 | CopyMakeBorder | Creates a border around an image |
 | CustomCrop | Crops an image with a given region-of-interest |
 | CvtColor | Converts an image from one color space to another |
-| DataTypeConvert | Converts an image’s data type with optional scaling |
+| DataTypeConvert | Converts an image’s data type, with optional scaling |
 | Erase | Erases image regions |
 | Find Contours | Extract closed contours from an input binary image |
 | FindHomography | Calculates a perspective transform from four pairs of the corresponding points  |
@@ -45,6 +61,7 @@ CV-CUDA includes:
 | Gaussian Noise | Generates a statistical noise with a normal (Gaussian) distribution |
 | Histogram | Provides a grayscale value distribution showing the frequency of occurrence of each gray value. |
 | Histogram Equalizer | Allows effective spreading out the intensity range of the image typically used to improve contrast |
+| HqResize | Performs advanced resizing supporting 2D and 3D data, tensors, tensor batches, and varshape image batches (2D only). Supports nearest neighbor, linear, cubic, Gaussian and Lanczos interpolation, with optional antialiasing when down-sampling |
 | Inpainting | Performs inpainting by replacing a pixel by normalized weighted sum of all the known pixels in the neighborhood |
 | Joint Bilateral Filter | Reduces image noise while preserving strong edges based on a guidance image |
 | Label | Labels connected regions in an image using 4-way connectivity for foreground and 8-way for background pixels |
@@ -59,7 +76,7 @@ CV-CUDA includes:
 | Normalize | Normalizes an image pixel’s range |
 | OSD (Polyline Line Text Rotated Rect Segmented Mask) | Displays an overlay on the image of different forms including polyline line text rotated rectangle segmented mask |
 | PadStack | Stacks several images into a tensor with border extension |
-| PairwiseMatcher | Matches features computed separately (e.g. via the SIFT operator) in two images using the brute force method |
+| PairwiseMatcher | Matches features computed separately (e.g. via the SIFT operator) in two images, e.g. using the brute force method |
 | PillowResize | Changes the size and scale of an image using python-pillow algorithm |
 | RandomResizedCrop | Crops a random portion of an image and resizes it to a specified size. |
 | Reformat | Converts a planar image into non-planar and vice versa |
diff --git a/LICENSE.md b/LICENSE.md
index f0b0397aa..0701ae6cf 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,3 +1,18 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
 Apache License
 
 Version 2.0, January 2004
diff --git a/README.md b/README.md
index d8fed8bca..31ae4466b 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,29 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
 # CV-CUDA
 
 [![License](https://img.shields.io/badge/License-Apache_2.0-yellogreen.svg)](https://opensource.org/licenses/Apache-2.0)
 
-![Version](https://img.shields.io/badge/Version-v0.5.0--beta-blue)
+![Version](https://img.shields.io/badge/Version-v0.6.0--beta-blue)
 
-![Platform](https://img.shields.io/badge/Platform-linux--64_%7C_win--64_wsl2-gray)
+![Platform](https://img.shields.io/badge/Platform-linux--64_%7C_win--64_wsl2%7C_aarch64-gray)
 
-[![Cuda](https://img.shields.io/badge/CUDA-v11.7-%2376B900?logo=nvidia)](https://developer.nvidia.com/cuda-toolkit-archive)
+[![CUDA](https://img.shields.io/badge/CUDA-v11.7-%2376B900?logo=nvidia)](https://developer.nvidia.com/cuda-toolkit-archive)
 [![GCC](https://img.shields.io/badge/GCC-v11.0-yellow)](https://gcc.gnu.org/gcc-11/changes.html)
-[![Python](https://img.shields.io/badge/python-v3.8_%7c_v3.10-blue?logo=python)](https://www.python.org/)
+[![Python](https://img.shields.io/badge/python-v3.7_%7c_v3.8_%7c_v3.9_%7c_v3.10%7c_v3.11-blue?logo=python)](https://www.python.org/)
 [![CMake](https://img.shields.io/badge/CMake-v3.20-%23008FBA?logo=cmake)](https://cmake.org/)
 
 CV-CUDA is an open-source project that enables building efficient cloud-scale
@@ -18,181 +33,248 @@ efficient pre- and post-processing pipelines. CV-CUDA originated as a
 collaborative effort between [NVIDIA][NVIDIA Develop] and [ByteDance][ByteDance].
 
 Refer to our [Developer Guide](DEVELOPER_GUIDE.md) for more information on the
-operators available as of release v0.5.0-beta.
+operators available as of release v0.6.0-beta.
 
 ## Getting Started
 
 To get a local copy up and running follow these steps.
 
-### Pre-requisites
+### Compatibility
+
+|CV-CUDA Build|Platform|CUDA Version|CUDA Compute Capability|Hardware Architectures|Nvidia Driver|Python Versions|Supported Compilers (build from source)|API compatibility with prebuilt binaries|OS/Linux distributions tested with prebuilt packages|
+|-|-|-|-|-|-|-|-|-|-|
+|x86_64_cu11|x86_64|11.7 or later|SM7 and later|Volta, Turing, Amper, Hopper, Ada Lovelace|r520 or later*** |3.8, 3.9, 3.10, 3.11|gcc>=9* <br> gcc>=11**|gcc>=9|Ubuntu>= 20.04<br>WSL2/Ubuntu>=20.04|
+|x86_64_cu12|x86_64|12.2 or later|SM7 and later|Volta, Turing, Amper, Hopper, Ada Lovelace|r520 or later***|3.8, 3.9, 3.10, 3.11|gcc>=9* <br> gcc>=11**|gcc>=9|Ubuntu>= 20.04<br>WSL2/Ubuntu>=20.04|
+|aarch64_cu11 (JetPack 5.1)|aarch64|11.4|SM7 and later|Jetson AGX Orin|JetPack 5.1|3.8|gcc>=9* <br> gcc>=11**|gcc>=9|Jetson Linux 35.x|
+|aarch64_cu12 (JetPack 6.0)|aarch64|12.2|SM7 and later|Jetson AGX Orin|JetPack 6.0 DP|3.10|gcc>=9* <br> gcc>=11**|gcc>=9|Jetson Linux 36.2|
 
-- Linux distro:
-  - Ubuntu x86_64 >= 20.04
-  - WSL2 with Ubuntu >= 20.04 (tested with 20.04)
-- NVIDIA driver
-  - Linux: Driver version 520.56.06 or higher
-- CUDA Toolkit
-  - Version 11.7 or above.
-- GCC >= 11.0
-- Python >= 3.8
-- cmake >= 3.20
+\* partial build, no test module (see Known Limitations) <br>
+\** full build, including test module <br>
+\*** [samples][CV-CUDA Samples] require driver r535 or later to run and are only officially supported with CUDA 12.
+
+### Known limitations
+
+- For GCC versions lower than 11.0, C++17 support needs to be enabled when compiling CV-CUDA.
+- The C++ test module cannot build with gcc<11 (requires specific C++-20 features).  With gcc-9 or gcc-10, please build with option `-DBUILD_TESTS=0`
+- [CV-CUDA Samples] require driver r535 or later to run and are only officially supported with CUDA 12.
+- Only one CUDA version (CUDA 11.x or CUDA 12.x) of CV-CUDA packages (Debian packages, tarballs, Python Wheels) can be installed at a time. Please uninstall all packages from a given CUDA version before installing packages from a different version.
+- Test tarballs (cvcuda-tests-*.tar.xz) need to be unpacked at the root level to find existing tests.
 
 ### Installation
 
-The following steps describe how to install CV-CUDA from pre-built install
-packages. Choose the installation method that meets your environment needs.
+For convenience, we provide pre-built packages for various combinations of CUDA versions, Python versions and architectures [here][CV-CUDA GitHub Releases].
+The following steps describe how to install CV-CUDA from such pre-built packages.
+
+We support two main alternative pathways:
+- DEB or Tar archive installation (C++/CUDA Libraries, Headers, Python bindings)
+- Standalone Python Wheels (containing C++/CUDA Libraries and Python bindings)
+
+Choose the installation method that meets your environment needs.
 
 #### Tar File Installation
 
+- Installation of C++/CUDA libraries (cvcuda-lib*) and development headers (cvcuda-dev*):
+```shell
+tar -xvf cvcuda-lib-0.6.0_beta-<cu_ver>-<arch>-linux.tar.xz
+tar -xvf cvcuda-dev-0.6.0_beta-<cu_ver>-<arch>-linux.tar.xz
+```
+- Installation of Python bindings (cvcuda-python*)
 ```shell
-tar -xvf nvcv-lib-0.5.0-cuda11-x86_64-linux.tar.xz
-tar -xvf nvcv-dev-0.5.0-cuda11-x86_64-linux.tar.xz
+tar -xvf cvcuda-python<py_ver>-0.6.0_beta-<cu_ver>-<arch>-linux.tar.xz
 ```
+with `<cu_ver>` the desired CUDA version,
+`<py_ver>` the desired Python version and
+`<arch>` the desired architecture
 
 #### DEB File Installation
 
+- Installation of C++/CUDA libraries (cvcuda-lib*) and development headers (cvcuda-dev*):
+```shell
+sudo apt-get install -y ./cvcuda-lib-0.6.0_beta-<cu_ver>-<arch>-linux.deb ./cvcuda-dev-0.6.0_beta-<cu_ver>-<arch>-linux.deb
+```
+- Installation of Python bindings (cvcuda-python*)
 ```shell
-sudo apt-get install -y ./nvcv-lib-0.5.0-cuda11-x86_64-linux.deb ./nvcv-dev-0.5.0-cuda11-x86_64-linux.deb
+sudo apt-get install -y cvcuda-python<py_ver>-0.6.0_beta-<cu_ver>-<arch>-linux.deb
 ```
+with `<cu_ver>` the desired CUDA version,
+`<py_ver>` the desired Python version and
+`<arch>` the desired architecture
+
+#### Python Wheel File Installation
+
+
+Download the appropriate .whl file for your computer architecture, Python and CUDA version from the release assets of current CV-CUDA release. Release information of all CV-CUDA releases can be accessed [here][CV-CUDA GitHub Releases]. Once downloaded, execute the `pip install` command to install the Python wheel. For example:
 
-#### Python WHL File Installation
 
 ```shell
-pip install nvcv_python-0.5.0-cp38-cp38-linux_x86_64.whl
+pip install cvcuda_<cu_ver>-0.6.0b0-cp<py_ver>-cp<py_ver>-linux_<arch>.whl
 ```
+with `<cu_ver>` the desired CUDA version,
+`<py_ver>` the desired Python version and
+`<arch>`  the desired architecture
+
+Please note that the Python wheels provided are standalone, they include both the C++/CUDA libraries and the Python bindings.
+
 
 ### Build from Source
 
-Building CV-CUDA from source allows for customization and is essential for contributing to the project. Here are detailed steps to guide you through the process:
+Follow these instruction to build CV-CUDA from source:
+
+1. Set up your local CV-CUDA repository
+
+    a. Install prerequisites needed to setup up the repository.
+
+       On Ubuntu >= 20.04, install the following packages:
+       - git-lfs: to retrieve binary files from remote repository
+
+       ```shell
+       sudo apt-get install -y git git-lfs
+       ```
+
+    b. After cloning the repository (assuming it was cloned in `~/cvcuda`),
+       it needs to be properly configured by running the `init_repo.sh` script only once.
+
+       ```shell
+       cd ~/cvcuda
+       ./init_repo.sh
+       ```
 
-#### 1. Repository Setup
+2. Build CV-CUDA
 
-   Before you begin, ensure you have cloned the CV-CUDA repository to your local machine. Let's assume you've cloned it into `~/cvcuda`.
+    a. Install the dependencies required for building CV-CUDA
 
-   - **Initialize the Repository**:
-     After cloning, initialize the repository to configure it correctly. This setup is required only once.
+       On Ubuntu >= 20.04, install the following packages:
+       - g++-11: compiler to be used
+       - cmake (>= 3.20), ninja-build (optional): manage build rules
+       - python3-dev: for python bindings
+       - libssl-dev: needed by the testsuite (MD5 hashing utilities)
 
-     ```shell
-     cd ~/cvcuda
-     ./init_repo.sh
-     ```
+       ```shell
+       sudo apt-get install -y g++-11 cmake ninja-build python3-dev libssl-dev
+       ```
 
-#### 2. Install Build Dependencies
+       For CUDA Toolkit, any version of the 11.x or 12.x series should work.
+       CV-CUDA was tested with 11.7 and 12.2, thus those should be preferred.
 
-   CV-CUDA requires several dependencies to build from source. The following steps are based on Ubuntu 22.04, but similar packages can be found for other distributions.
+       ```shell
+       sudo apt-get install -y cuda-11-7
+       # or
+       sudo apt-get install -y cuda-12-2
+       ```
 
-   - **Install Essential Packages**:
-     These include the compiler, build system, and necessary libraries.
+    b. Build the project
 
-     ```shell
-     sudo apt-get install -y g++-11 cmake ninja-build python3-dev libssl-dev
-     ```
+       ```shell
+       ci/build.sh [release|debug] [output build tree path] [-DBUILD_TESTS=1|0] [-DPYTHON_VERSIONS='3.8;3.9;3.10;3.11'] [-DPUBLIC_API_COMPILERS='gcc-9;gcc-11;clang-11;clang-14']
+       ```
 
-   - **CUDA Toolkit**:
-     The CUDA Toolkit is essential for GPU acceleration. Although any 11.x version is compatible, 11.7 is recommended.
+       The default build type is 'release'.
 
-     ```shell
-     sudo apt-get install -y cuda-minimal-build-11-7
-     ```
+       If output build tree path isn't specified, it will be `build-rel` for release
+       builds, and `build-deb` for debug.
 
-#### 3. Build Process
+       The library is in `build-rel/lib` and executables (tests, etc...) are in `build-rel/bin`.
 
-   Once the dependencies are in place, you can proceed to build CV-CUDA.
+       The `-DBUILD_TESTS` option can be used to disable/enable building the tests (enabled by default, see Known Limitations).
 
-   - **Run Build Script**:
-     A build script is provided to simplify the compilation process. It creates a build tree and compiles the source code.
+       The `-DPYTHON_VERSIONS` option can be used to select Python versions to build bindings and Wheels for.
+       By default, only the default system Python3 version will be selected.
 
-     ```shell
-     ci/build.sh
-     ```
+       The `-DPUBLIC_API_COMPILERS` option can be used to select the compilers used to check public API compatibility.
+       By default, gcc-11, gcc-9, clang-11, and clang-14 is tried to be selected and checked.
 
-     This script creates a release build by default, placing output in `build-rel`. You can specify a debug build or a different output directory:
+3. Build Documentation
 
-     ```shell
-     ci/build.sh [release|debug] [output build tree path]
-     ```
+    a. Install the dependencies required for building the documentation
 
-#### 4. Build Documentation (Optional)
+       On Ubuntu >= 20.04, install the following packages:
+       - doxygen: parse header files for reference documentation
+       - python3, python3-pip: to install some python packages needed
+       - sphinx, breathe, exhale, recommonmark, graphiviz: to render the documentation
+       - sphinx-rtd-theme: documenation theme used
 
-   If you need to build the documentation, additional dependencies are required:
+       ```shell
+       sudo apt-get install -y doxygen graphviz python3 python3-pip
+       sudo python3 -m pip install sphinx==4.5.0 breathe exhale recommonmark graphviz sphinx-rtd-theme
+       ```
 
-   - **Install Documentation Dependencies**:
-     These tools are used to generate and format the documentation.
+    b. Build the documentation
+       ```shell
+       ci/build_docs.sh [build folder]
+       ```
 
-     ```shell
-     sudo apt-get install -y doxygen graphviz python3 python3-pip
-     sudo python3 -m pip install sphinx==4.5.0 breathe exhale recommonmark graphviz sphinx-rtd-theme
-     ```
+       Example:
+       `ci/build_docs.sh build_docs`
 
-   - **Generate Documentation**:
-     Use the provided script to build the documentation.
+4. Build and run Samples
 
-     ```shell
-     ci/build_docs.sh [build folder]
-     ```
+   For instructions on how to build samples from source and run them, see the [Samples](samples/README.md) documentation.
 
-     For example:
+5. Run Tests
 
-     ```shell
-     ci/build_docs.sh build_docs
-     ```
+   a. Install the dependencies required for running the tests
 
-#### 5. Build and Run Samples (Optional)
+       On Ubuntu >= 20.04, install the following packages:
+       - python3, python3-pip: to run python bindings tests
+       - torch: dependencies needed by python bindings tests
 
-   CV-CUDA comes with a variety of samples to demonstrate its capabilities.
+       ```shell
+       sudo apt-get install -y python3 python3-pip
+       sudo python3 -m pip install pytest torch
+       ```
 
-   - **See the Samples Documentation**:
-     Detailed instructions for building and running samples are available in the [Samples](samples/README.md) documentation.
+   b. Run the tests
 
-#### 6. Running Tests
+       The tests are in `<buildtree>/bin`. You can run the script below to run all
+       tests at once. Here's an example when build tree is created in `build-rel`
 
-   To ensure everything is working as expected, you can run CV-CUDA's test suite.
+       ```shell
+       build-rel/bin/run_tests.sh
+       ```
 
-   - **Install Test Dependencies**:
-     These are necessary to run the Python binding tests.
+6. Package installers and Python Wheels
 
-     ```shell
-     sudo apt-get install -y python3 python3-pip
-     sudo python3 -m pip install pytest torch
-     ```
+   a. Package installers
 
-   - **Execute Tests**:
-     Run the test scripts located in the build tree.
+      Installers can be generated using the following cpack command once you have successfully built the project
 
-     ```shell
-     build-rel/bin/run_tests.sh
-     ```
+      ```shell
+      cd build-rel
+      cpack .
+      ```
 
-#### 7. Packaging
+      This will generate in the build directory both Debian installers and tarballs
+      (\*.tar.xz), needed for integration in other distros.
 
-   After a successful build, you can create installers using `cpack`.
+      For a fine-grained choice of what installers to generate, the full syntax is:
 
-   - **Generate Installers**:
-     This step produces Debian packages and tarballs, suitable for distribution or installation on other systems.
+      ```shell
+      cpack . -G [DEB|TXZ]
+      ```
 
-     ```shell
-     cd build-rel
-     cpack .
-     ```
+      - DEB for Debian packages
+      - TXZ for \*.tar.xz tarballs.
 
-     For specific installer types:
+   b. Python Wheels
 
-     ```shell
-     cpack . -G [DEB|TXZ]
-     ```
+      By default during the `release` build, Python bindings and wheels are created for the available CUDA version and the specified Python
+      version(s). The wheels are stored in `build-rel/pythonX.Y/wheel` folder, where `build-rel` is the build directory
+      used to build the release build and `X` and `Y` are Python major and minor versions. The built wheels can be installed using pip.
+      For example, to install the Python wheel built for CUDA 12.x, Python 3.10 on Linux x86_64 systems:
 
-     - `DEB` for Debian packages.
-     - `TXZ` for `.tar.xz` tarballs.
+      ```shell
+      pip install cvcuda_cu12-0.6.0b0-cp310-cp310-linux_x86_64.whl
+      ```
 
 ## Contributing
 
 CV-CUDA is an open source project. As part of the Open Source Community, we are
 committed to the cycle of learning, improving, and updating that makes this
-community thrive. However, as of release v0.5.0-beta, CV-CUDA is not yet ready
+community thrive. However, as of release v0.6.0-beta, CV-CUDA is not yet ready
 for external contributions.
 
 To understand the process for contributing the CV-CUDA, see our
-[Contributing](CONTRIBUTING.md) page. To understand our committment to the Open
+[Contributing](CONTRIBUTING.md) page. To understand our commitment to the Open
 Source Community, and providing an environment that both supports and respects
 the efforts of all contributors, please read our
 [Code of Conduct](CODE_OF_CONDUCT.md).
@@ -254,3 +336,5 @@ CV-CUDA is developed jointly by NVIDIA and ByteDance.
 
 [NVIDIA Develop]: https://developer.nvidia.com/
 [ByteDance]: https://www.bytedance.com/
+[CV-CUDA GitHub Releases]: https://github.com/CVCUDA/CV-CUDA/releases
+[CV-CUDA Samples]: https://github.com/CVCUDA/CV-CUDA/blob/main/samples/README.md
diff --git a/SECURITY.md b/SECURITY.md
index 1bcc28963..f0b0503c6 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,3 +1,18 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
 # Security
 
 NVIDIA is dedicated to the security and trust of our software products and
diff --git a/bench/BenchFindContours.cpp b/bench/BenchFindContours.cpp
index 2beb27470..06deb9732 100644
--- a/bench/BenchFindContours.cpp
+++ b/bench/BenchFindContours.cpp
@@ -87,10 +87,10 @@ try
         CPUImage srcVec(shape.y * shape.z, 0);
         for (auto i = 0; i < 10; ++i)
         {
-            auto anchorX = rand() % shape.z;
-            auto anchorY = rand() % shape.y;
-            auto sizeX = rand() % (shape.z - anchorX);
-            auto sizeY = rand() % (shape.y - anchorY);
+            int anchorX = rand() % shape.z;
+            int anchorY = rand() % shape.y;
+            int sizeX = rand() % (shape.z - anchorX);
+            int sizeY = rand() % (shape.y - anchorY);
             generateRectangle(srcVec, {anchorX, anchorY}, {sizeX, sizeY});
         }
 
diff --git a/bench/BenchHQResize.cpp b/bench/BenchHQResize.cpp
new file mode 100644
index 000000000..9d80963ec
--- /dev/null
+++ b/bench/BenchHQResize.cpp
@@ -0,0 +1,129 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BenchUtils.hpp"
+
+#include <cvcuda/OpHQResize.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template<typename T>
+inline void HQResize(nvbench::state &state, nvbench::type_list<T>)
+try
+{
+    long3                 srcShape      = benchutils::GetShape<3>(state.get_string("shape"));
+    bool                  antialias     = state.get_int64("antialias");
+    NVCVInterpolationType interpolation = benchutils::GetInterpolationType(state.get_string("interpolation"));
+    bool                  batch         = state.get_int64("batch");
+
+    long3 dstShape;
+    if (state.get_string("resizeType") == "EXPAND")
+    {
+        if (antialias)
+        {
+            state.skip("Antialias is no-op for expanding");
+            return;
+        }
+        dstShape = long3{srcShape.x, srcShape.y * 2, srcShape.z * 2};
+    }
+    else if (state.get_string("resizeType") == "CONTRACT")
+    {
+        // resize from shape to shape/2
+        dstShape = long3{srcShape.x, srcShape.y / 2, srcShape.z / 2};
+    }
+    else
+    {
+        throw std::invalid_argument("Invalid resizeType = " + state.get_string("resizeType"));
+    }
+
+    nvcv::Size2D srcSize{(int)srcShape.z, (int)srcShape.y};
+    nvcv::Size2D dstSize{(int)dstShape.z, (int)dstShape.y};
+
+    nvcv::DataType    dtype{benchutils::GetDataType<T>()};
+    nvcv::ImageFormat fmt(nvcv::MemLayout::PITCH_LINEAR, dtype.dataKind(), nvcv::Swizzle::S_X000, dtype.packing());
+
+    state.add_global_memory_reads(srcShape.x * srcShape.y * srcShape.z * sizeof(T));
+    state.add_global_memory_writes(dstShape.x * dstShape.y * dstShape.z * sizeof(T));
+
+    cvcuda::HQResize op;
+
+    if (!batch)
+    {
+        HQResizeTensorShapeI inShapeDesc{
+            {srcSize.h, srcSize.w},
+            2,
+            1
+        };
+        HQResizeTensorShapeI outShapeDesc{
+            {dstSize.h, dstSize.w},
+            2,
+            1
+        };
+        cvcuda::UniqueWorkspace ws = cvcuda::AllocateWorkspace(
+            op.getWorkspaceRequirements(1, inShapeDesc, outShapeDesc, interpolation, interpolation, antialias));
+
+        // clang-format off
+        nvcv::Tensor src({{srcShape.x, srcShape.y, srcShape.z, 1}, "NHWC"}, dtype);
+        nvcv::Tensor dst({{dstShape.x, dstShape.y, dstShape.z, 1}, "NHWC"}, dtype);
+        // clang-format on
+
+        benchutils::FillTensor<T>(src, benchutils::RandomValues<T>());
+
+        state.exec(nvbench::exec_tag::sync, [&op, &ws, &src, &dst, interpolation, antialias](nvbench::launch &launch)
+                   { op(launch.get_stream(), ws.get(), src, dst, interpolation, interpolation, antialias); });
+    }
+    else
+    {
+        HQResizeTensorShapeI maxShape{
+            {std::max(srcSize.h, dstSize.h), std::max(srcSize.w, dstSize.w)},
+            2,
+            1
+        };
+        cvcuda::UniqueWorkspace ws = cvcuda::AllocateWorkspace(op.getWorkspaceRequirements(1, maxShape));
+
+        // clang-format off
+        nvcv::Tensor src({{srcShape.y, srcShape.z, 1}, "HWC"}, dtype);
+        nvcv::Tensor dst({{dstShape.y, dstShape.z, 1}, "HWC"}, dtype);
+        // clang-format on
+
+        benchutils::FillTensor<T>(src, benchutils::RandomValues<T>());
+        nvcv::TensorBatch srcTensors(1);
+        nvcv::TensorBatch dstTensors(1);
+        srcTensors.pushBack(src);
+        dstTensors.pushBack(dst);
+
+        state.exec(
+            nvbench::exec_tag::sync,
+            [&op, &ws, &srcTensors, &dstTensors, interpolation, antialias](nvbench::launch &launch)
+            { op(launch.get_stream(), ws.get(), srcTensors, dstTensors, interpolation, interpolation, antialias); });
+    }
+}
+
+catch (const std::exception &err)
+{
+    state.skip(err.what());
+}
+
+using HQResizeTypes = nvbench::type_list<uint8_t, float>;
+
+NVBENCH_BENCH_TYPES(HQResize, NVBENCH_TYPE_AXES(HQResizeTypes))
+    .set_type_axes_names({"InOutDataType"})
+    .add_int64_axis("batch", {false})
+    .add_string_axis("shape", {"1x1080x1920"})
+    .add_string_axis("interpolation", {"CUBIC"})
+    .add_int64_axis("antialias", {false, true})
+    .add_string_axis("resizeType", {"CONTRACT"});
diff --git a/bench/BenchMorphology.cpp b/bench/BenchMorphology.cpp
index 69ed2f97c..d3947e788 100644
--- a/bench/BenchMorphology.cpp
+++ b/bench/BenchMorphology.cpp
@@ -25,9 +25,10 @@ template<typename T>
 inline void Morphology(nvbench::state &state, nvbench::type_list<T>)
 try
 {
-    long3 shape     = benchutils::GetShape<3>(state.get_string("shape"));
-    long  varShape  = state.get_int64("varShape");
-    int   iteration = static_cast<int>(state.get_int64("iteration"));
+    long3 shape      = benchutils::GetShape<3>(state.get_string("shape"));
+    long  varShape   = state.get_int64("varShape");
+    int   iteration  = static_cast<int>(state.get_int64("iteration"));
+    int2  kernelSize = nvcv::cuda::StaticCast<int>(benchutils::GetShape<2>(state.get_string("kernelSize")));
 
     NVCVBorderType borderType = benchutils::GetBorderType(state.get_string("border"));
 
@@ -50,7 +51,7 @@ try
         morphType = NVCV_CLOSE;
     }
 
-    nvcv::Size2D mask{3, 3};
+    nvcv::Size2D mask{kernelSize.x, kernelSize.y};
     int2         anchor{-1, -1};
 
     int bwIteration = (morphType == NVCV_OPEN || morphType == NVCV_CLOSE || iteration > 1) ? 2 * iteration : iteration;
@@ -129,5 +130,6 @@ NVBENCH_BENCH_TYPES(Morphology, NVBENCH_TYPE_AXES(MorphologyTypes))
     .add_string_axis("shape", {"1x1080x1920"})
     .add_int64_axis("varShape", {-1})
     .add_int64_axis("iteration", {1})
+    .add_string_axis("kernelSize", {"3x3"})
     .add_string_axis("morphType", {"ERODE", "DILATE", "OPEN", "CLOSE"})
     .add_string_axis("border", {"REPLICATE"});
diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt
index 67fd8c5f1..e82bf3da4 100644
--- a/bench/CMakeLists.txt
+++ b/bench/CMakeLists.txt
@@ -30,6 +30,7 @@ set(bench_sources
     BenchFlip.cpp
     BenchRotate.cpp
     BenchPillowResize.cpp
+    BenchHQResize.cpp
     BenchCenterCrop.cpp
     BenchWarpPerspective.cpp
     BenchWarpAffine.cpp
@@ -75,8 +76,7 @@ foreach(bench_source IN LISTS bench_sources)
   string(REPLACE "Bench" "cvcuda_bench_" algo_name ${bench_file_name})
   string(TOLOWER ${algo_name} bench_name)
   add_executable(${bench_name} "${bench_source}")
-  target_include_directories(${bench_name} PRIVATE "${CMAKE_CURRENT_LIST_DIR}")
-  target_link_libraries(${bench_name} PRIVATE nvbench::main PUBLIC cvcuda)
+  target_link_libraries(${bench_name} PRIVATE cvcuda::nvbench::main cvcuda)
   set_target_properties(${bench_name} PROPERTIES COMPILE_FEATURES cuda_std_17)
   add_dependencies(bench_all ${bench_name})
 endforeach()
diff --git a/bench/python/README.md b/bench/python/README.md
new file mode 100644
index 000000000..b7879c8ae
--- /dev/null
+++ b/bench/python/README.md
@@ -0,0 +1,116 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
+
+# Python Operator Performance Benchmarking
+
+Using various performance benchmarking scripts that ships with CV-CUDA samples, we can measure and report the performance of various CV-CUDA operators from Python.
+
+The following scripts are part of the performance benchmarking tools in CV-CUDA.
+
+1. `samples/scripts/benchmark.py`
+2. `samples/common/python/perf_utils.py`
+3. `bench/python/bench_utils.py`
+
+We use NVIDIA NSYS internally to benchmark Python code for its CPU and GPU run-times.
+
+
+## About the Operator Benchmarks
+
+Operators for which a test case has been implemented in the `all_ops` folder can be benchmarked. The following statements are true for all such test cases:
+
+1. All inherit from a base class called `AbstractOpBase` which allows them to expose benchmarking capabilities in a consistent manner. They all have a setup stage, a run stage and an optional visualization stage. By default, the visualization is turned off.
+2. All receive the same input image. Some operators may need to read additional data. Such data is always read from the `assets` directory.
+3. All run for a number of iterations (default is set to 10) and a batch size (default is set to 32).
+4. The script `benchmark.py` handles overall benchmarking. It launches the runs, monitors it, communicates with NSYS and saves the results of a run in a JSON file. Various settings such as using warm-up (default is set to 1 iteration) are handled here.
+5. One or more benchmark runs can be compared and summarized in a table showing only the important information from the detailed JSON files.
+
+## Setting up the environment
+
+1. Follow [Setting up the environment](../../samples/README.md#setting-up-the-environment) section of the CV-CUDA samples. Note: The step asking to install dependencies can be ignored if you are only interested in benchmarking the operators (and not the samples).
+
+
+## Running the benchmark
+
+The script `run_bench.py` together with `benchmark.py` can be used to automatically benchmark all supported CV-CUDA operators in Python. Additionally, one or more runs can be summarized and compared in a table using the functionality provided by `bench_utils.py`
+
+
+### To run the operator benchmarks
+
+```bash
+python3 samples/scripts/benchmark.py -o <OUT_DIR> bench/python/run_bench.py
+```
+- Where:
+    1. An `OUTPUT_DIR` must be given to store various benchmark artifacts.
+- Upon running it will:
+    1. Ask the `benchmark.py` to launch the `run_bench.py`.
+    2. `run_bench.py` will then find out all the operators that can be benchmarked.
+    3. Run those one by one, through all the stages, such as setup, run and visualization (if enabled).
+    4. Store the artifacts in the output folder. This is where the `benchmark.py` style `benchmark_mean.json` would be stored.
+
+Once a run is completed, one can use the `bench_utils.py` to summarize it. Additionally, we can use the same script to compare multiple different runs.
+
+### To summarize one run only
+
+```bash
+python3 bench/python/bench_utils.py -o <OUTPUT_DIR> -b <benchmark_mean_json_path> -bn baseline
+```
+- Where:
+    1. A `OUTPUT_DIR` must be given to store the summary table as a CSV file.
+    2. The first run's `benchmark_mean.json` path must be given as `b`.
+    3. The display name of the first run must be given as `bn`.
+- Upon running it will:
+    1. Grab appropriate values from the JSON file for all the operators and put it in a table format.
+    2. Save the table as a CSV file.
+
+The output CSV file will be stored in the `OUTPUT_DIR` with current date and time on it.
+
+NOTE: `benchmark.py` will produce additional JSON files (and visualization files if it was enabled). These files provide way more detailed information compared to the CSV and is usually only meant for debugging purposes.
+
+
+### To summarize and compare multiple runs
+
+```bash
+python3 bench/python/bench_utils.py -o <OUTPUT_DIR> -b <benchmark_mean_json_path> -bn baseline \
+       -c <benchmark_mean_2_json_path> -cn run_2 \
+       -c <benchmark_mean_3_json_path> -cn run_3
+```
+- Where:
+    1. An `OUTPUT_DIR` must be given to store the summary table as a CSV file.
+    2. The first run's `benchmark_mean.json` path is given as `b`.
+    3. The display name of the first run is given as `bn`.
+    4. The second run's `benchmark_mean.json` path is given as `c`.
+    5. The display name of the second run is given as `cn`.
+    6. The third run's `benchmark_mean.json` path is given as `c`.
+    7. The display name of the third run must be given as `cn`.
+    8. Options `c` and `cn` can be repeated as zero or more times to cover all the runs.
+- Upon running it will:
+    1. Grab appropriate values from the JSON file for all the operators and put it in a table format.
+    2. Save the table as a CSV file.
+
+
+## Interpreting the results
+
+Upon a successful completion of the `bench_utils.py` script, we would get a CSV file.
+
+- If you ran it only on one run, your CSV will only have four columns - showing data only from that run:
+    1. `index`: from 0 to N-1 for all the N operators benchmarked
+    2. `operator name` The name of the operator
+    3. `baseline run time (ms)`: The first run's time in milliseconds, averaged across M iterations (default is 10, with warm-up runs discarded)
+    4. `run time params`: Any helpful parameters supplied to the operator as it ran in first run. Only lists primitive data-types.
+
+- If you ran it on more than one runs, your CSV file will have additional columns - comparing data of those runs with the baseline run. Additional columns, per run, would be:
+    1. `run i time (ms)`: The ith run's time in milliseconds, averaged across M iterations (default is 10, with warm-up runs discarded)
+    2. `run i v/s baseline speed-up`: The speed-up factor. This is calculated by dividing `run i time (ms)` by `baseline run time (ms)`.
diff --git a/bench/python/all_ops/op_adaptivethreshold.py b/bench/python/all_ops/op_adaptivethreshold.py
new file mode 100644
index 000000000..ddc316cc6
--- /dev/null
+++ b/bench/python/all_ops/op_adaptivethreshold.py
@@ -0,0 +1,41 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpAdaptiveThreshold(AbstractOpBase):
+    def setup(self, input):
+        self.maxval = 255.0
+        self.adaptive_method = cvcuda.AdaptiveThresholdType.GAUSSIAN_C
+        self.threshold_type = cvcuda.ThresholdType.BINARY
+        self.block_size = 11
+        self.c = 2
+        self.grayscale_input = cvcuda.cvtcolor(input, cvcuda.ColorConversion.RGB2GRAY)
+
+    def run(self, input):
+        return cvcuda.adaptivethreshold(
+            self.grayscale_input,
+            max_value=self.maxval,
+            adaptive_method=self.adaptive_method,
+            threshold_type=self.threshold_type,
+            block_size=self.block_size,
+            c=self.c,
+        )
diff --git a/bench/python/all_ops/op_averageblur.py b/bench/python/all_ops/op_averageblur.py
new file mode 100644
index 000000000..cf591e3f1
--- /dev/null
+++ b/bench/python/all_ops/op_averageblur.py
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpAverageBlur(AbstractOpBase):
+    def setup(self, input):
+        self.kernel_size = (3, 3)
+        self.kernel_anchor = (-1, -1)
+
+    def run(self, input):
+        return cvcuda.averageblur(
+            input, kernel_size=self.kernel_size, kernel_anchor=self.kernel_anchor
+        )
diff --git a/bench/python/all_ops/op_blurbox.py b/bench/python/all_ops/op_blurbox.py
new file mode 100644
index 000000000..8f24740d5
--- /dev/null
+++ b/bench/python/all_ops/op_blurbox.py
@@ -0,0 +1,80 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+from torchvision.io import read_image
+import os
+
+
+class OpBlurBox(AbstractOpBase):
+    def setup(self, input):
+        self.kernel_size = 5
+
+        data = read_image(os.path.join(self.assets_dir, "brooklyn.jpg"))
+        data = data.moveaxis(0, -1).contiguous()  # From CHW to HWC
+        data = data.cuda(self.device_id)
+        data = [data.clone() for _ in range(input.shape[0])]
+        data = torch.stack(data)
+        self.input = cvcuda.as_tensor(data, "NHWC")
+
+        bboxes = torch.load(
+            os.path.join(self.assets_dir, "brooklyn_bboxes.pt"),
+            map_location="cuda:%d" % self.device_id,
+        )
+        bboxes = [bboxes[0].clone() for _ in range(input.shape[0])]
+        self.bboxes_pyt = torch.stack(bboxes)
+        bboxes = cvcuda.as_tensor(self.bboxes_pyt)
+
+        scores = torch.load(
+            os.path.join(self.assets_dir, "brooklyn_scores.pt"),
+            map_location="cuda:%d" % self.device_id,
+        )
+        scores = [scores[0].clone() for _ in range(input.shape[0])]
+        scores = torch.stack(scores)
+        scores = cvcuda.as_tensor(scores)
+
+        self.nms_masks_pyt = torch.load(
+            os.path.join(self.assets_dir, "brooklyn_nms_masks.pt"),
+            map_location="cuda:%d" % self.device_id,
+        )
+
+    def run(self, input):
+        blur_boxes = []
+        # Create an array of bounding boxes with render settings.
+        for current_boxes, current_masks in zip(self.bboxes_pyt, self.nms_masks_pyt):
+            filtered_boxes = current_boxes[current_masks]
+            BlurBoxI_list = []
+
+            for box in filtered_boxes:
+                BlurBoxI_list.append(
+                    cvcuda.BlurBoxI(
+                        box=tuple(box),
+                        kernelSize=self.kernel_size,
+                    )
+                )
+
+            blur_boxes.append(BlurBoxI_list)
+
+        batch_blur_boxes = cvcuda.BlurBoxesI(boxes=blur_boxes)
+
+        cvcuda.boxblur_into(self.input, self.input, batch_blur_boxes)
+
+        return self.input
diff --git a/bench/python/all_ops/op_boundingbox.py b/bench/python/all_ops/op_boundingbox.py
new file mode 100644
index 000000000..5b9f1ba3d
--- /dev/null
+++ b/bench/python/all_ops/op_boundingbox.py
@@ -0,0 +1,84 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+from torchvision.io import read_image
+import os
+
+
+class OpBoundingBox(AbstractOpBase):
+    def setup(self, input):
+        self.border_color = (0, 255, 0, 255)
+        self.fill_color = (0, 0, 255, 0)
+        self.thickness = 5
+
+        data = read_image(os.path.join(self.assets_dir, "brooklyn.jpg"))
+        data = data.moveaxis(0, -1).contiguous()  # From CHW to HWC
+        data = data.cuda(self.device_id)
+        data = [data.clone() for _ in range(input.shape[0])]
+        data = torch.stack(data)
+        self.input = cvcuda.as_tensor(data, "NHWC")
+
+        bboxes = torch.load(
+            os.path.join(self.assets_dir, "brooklyn_bboxes.pt"),
+            map_location="cuda:%d" % self.device_id,
+        )
+        bboxes = [bboxes[0].clone() for _ in range(input.shape[0])]
+        self.bboxes_pyt = torch.stack(bboxes)
+        bboxes = cvcuda.as_tensor(self.bboxes_pyt)
+
+        scores = torch.load(
+            os.path.join(self.assets_dir, "brooklyn_scores.pt"),
+            map_location="cuda:%d" % self.device_id,
+        )
+        scores = [scores[0].clone() for _ in range(input.shape[0])]
+        scores = torch.stack(scores)
+        scores = cvcuda.as_tensor(scores)
+
+        self.nms_masks_pyt = torch.load(
+            os.path.join(self.assets_dir, "brooklyn_nms_masks.pt"),
+            map_location="cuda:%d" % self.device_id,
+        )
+
+    def run(self, input):
+        bounding_boxes = []
+        # Create an array of bounding boxes with render settings.
+        for current_boxes, current_masks in zip(self.bboxes_pyt, self.nms_masks_pyt):
+            filtered_boxes = current_boxes[current_masks]
+            BndBoxI_list = []
+
+            for box in filtered_boxes:
+                BndBoxI_list.append(
+                    cvcuda.BndBoxI(
+                        box=tuple(box),
+                        thickness=self.thickness,
+                        borderColor=self.border_color,
+                        fillColor=self.fill_color,
+                    )
+                )
+
+            bounding_boxes.append(BndBoxI_list)
+
+        batch_bounding_boxes = cvcuda.BndBoxesI(boxes=bounding_boxes)
+
+        cvcuda.bndbox_into(self.input, self.input, batch_bounding_boxes)
+
+        return self.input
diff --git a/bench/python/all_ops/op_brightnesscontrast.py b/bench/python/all_ops/op_brightnesscontrast.py
new file mode 100644
index 000000000..1cd38e679
--- /dev/null
+++ b/bench/python/all_ops/op_brightnesscontrast.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+
+
+class OpBrightnessContrast(AbstractOpBase):
+    def setup(self, input):
+        brightness = torch.tensor([1.2]).cuda(self.device_id)
+        self.brightness = cvcuda.as_tensor(brightness, "N")
+
+        contrast = torch.tensor([0.7]).cuda(self.device_id)
+        self.contrast = cvcuda.as_tensor(contrast, "N")
+
+        brightness_shift = torch.tensor([130.0]).cuda(self.device_id)
+        self.brightness_shift = cvcuda.as_tensor(brightness_shift, "N")
+
+        contrast_center = torch.tensor([0.5]).cuda(self.device_id)
+        self.contrast_center = cvcuda.as_tensor(contrast_center, "N")
+
+    def run(self, input):
+        return cvcuda.brightness_contrast(
+            input,
+            brightness=self.brightness,
+            contrast=self.contrast,
+            brightness_shift=self.brightness_shift,
+            contrast_center=self.contrast_center,
+        )
diff --git a/bench/python/all_ops/op_centercrop.py b/bench/python/all_ops/op_centercrop.py
new file mode 100644
index 000000000..907c31cf0
--- /dev/null
+++ b/bench/python/all_ops/op_centercrop.py
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpCenterCrop(AbstractOpBase):
+    def setup(self, input):
+        width, height = input.shape[2], input.shape[1]
+        self.crop_size = [width // 2, height // 2]
+
+    def run(self, input):
+        return cvcuda.center_crop(
+            input,
+            self.crop_size,
+        )
diff --git a/bench/python/all_ops/op_composite.py b/bench/python/all_ops/op_composite.py
new file mode 100644
index 000000000..d42e5063b
--- /dev/null
+++ b/bench/python/all_ops/op_composite.py
@@ -0,0 +1,52 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+from torchvision.io import read_image
+import os
+
+
+class OpComposite(AbstractOpBase):
+    def setup(self, input):
+        data = read_image(os.path.join(self.assets_dir, "brooklyn.jpg"))
+        data = data.moveaxis(0, -1).contiguous()  # From CHW to HWC
+        data = data.cuda(self.device_id)
+        data = [data.clone() for _ in range(input.shape[0])]
+        data = torch.stack(data)
+        self.input = cvcuda.as_tensor(data, "NHWC")
+        self.blurred_input = cvcuda.gaussian(
+            self.input, kernel_size=(15, 15), sigma=(5, 5)
+        )
+
+        mask = read_image(os.path.join(self.assets_dir, "brooklyn_mask.jpg"))
+        mask = mask.moveaxis(0, -1).contiguous()  # From CHW to HWC
+        mask = mask.cuda(self.device_id)
+        mask = [mask.clone() for _ in range(input.shape[0])]
+        mask = torch.stack(mask)
+        self.class_masks = cvcuda.as_tensor(mask, "NHWC")
+
+    def run(self, input):
+        return cvcuda.composite(
+            self.input,
+            self.blurred_input,
+            self.class_masks,
+            3,
+        )
diff --git a/bench/python/all_ops/op_convertto.py b/bench/python/all_ops/op_convertto.py
new file mode 100644
index 000000000..48e4fa21c
--- /dev/null
+++ b/bench/python/all_ops/op_convertto.py
@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import nvcv
+
+
+class OpConvertTo(AbstractOpBase):
+    def setup(self, input):
+        self.target_dtype = nvcv.Type.F32
+        self.offset = 10.2
+        self.scale = 1 / 255.0
+
+    def run(self, input):
+        return cvcuda.convertto(input, self.target_dtype, self.offset, self.scale)
+
+    def visualize(self):
+        pass
diff --git a/bench/python/all_ops/op_copymakeborder.py b/bench/python/all_ops/op_copymakeborder.py
new file mode 100644
index 000000000..c0bca25b6
--- /dev/null
+++ b/bench/python/all_ops/op_copymakeborder.py
@@ -0,0 +1,42 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpCopyMakeBorder(AbstractOpBase):
+    def setup(self, input):
+        self.border_mode = cvcuda.Border.CONSTANT
+        self.border_values = [255, 0, 0]  # Border values for 3 channel input.
+        self.top = 30
+        self.left = 40
+        self.bottom = 50
+        self.right = 60
+
+    def run(self, input):
+        return cvcuda.copymakeborder(
+            input,
+            border_mode=self.border_mode,
+            border_value=self.border_values,
+            top=self.top,
+            bottom=self.bottom,
+            left=self.left,
+            right=self.right,
+        )
diff --git a/bench/python/all_ops/op_customcrop.py b/bench/python/all_ops/op_customcrop.py
new file mode 100644
index 000000000..0618a4821
--- /dev/null
+++ b/bench/python/all_ops/op_customcrop.py
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import nvcv
+
+
+class OpCustomCrop(AbstractOpBase):
+    def setup(self, input):
+        self.rectI = nvcv.RectI(x=30, y=40, width=420, height=390)
+
+    def run(self, input):
+        return cvcuda.customcrop(input, self.rectI)
diff --git a/bench/python/all_ops/op_cvtcolor.py b/bench/python/all_ops/op_cvtcolor.py
new file mode 100644
index 000000000..6eafee402
--- /dev/null
+++ b/bench/python/all_ops/op_cvtcolor.py
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpCvtColorRGB2GRAY(AbstractOpBase):
+    def setup(self, input):
+        pass
+
+    def run(self, input):
+        return cvcuda.cvtcolor(input, cvcuda.ColorConversion.RGB2GRAY)
+
+
+class OpCvtColorRGB2BGR(AbstractOpBase):
+    def setup(self, input):
+        pass
+
+    def run(self, input):
+        return cvcuda.cvtcolor(input, cvcuda.ColorConversion.RGB2BGR)
diff --git a/bench/python/all_ops/op_findcontours.py b/bench/python/all_ops/op_findcontours.py
new file mode 100644
index 000000000..7fe31cab0
--- /dev/null
+++ b/bench/python/all_ops/op_findcontours.py
@@ -0,0 +1,109 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+from torchvision.io import read_image
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class OpFindContours(AbstractOpBase):
+    def setup(self, input):
+        grayscale_input = read_image(
+            os.path.join(self.assets_dir, "countour_lines.jpg")
+        )
+        grayscale_input = grayscale_input.moveaxis(
+            0, -1
+        ).contiguous()  # From CHW to HWC
+        # Binarize the grayscale_input
+        grayscale_input[grayscale_input <= 50] = 0
+        grayscale_input[grayscale_input > 50] = 255
+
+        grayscale_input = [grayscale_input.clone() for _ in range(input.shape[0])]
+        grayscale_input = torch.stack(grayscale_input)
+        grayscale_input = grayscale_input.cuda(self.device_id)
+        self.grayscale_input = cvcuda.as_tensor(grayscale_input, "NHWC")
+
+    def run(self, input):
+        return cvcuda.find_contours(self.grayscale_input)
+
+    def visualize(self):
+        """
+        Attempts to visualize the output produced by the operator as an image by writing it
+        down to the disk. May raise exceptions if visualization is not successful.
+        """
+        output_dir = self._setup_clear_output_dir(filename_ends_with="_op_out.jpg")
+        # Convert the inputs and outputs to numpy arrays first.
+        # input shape: NHWC
+        # out[0] = points_info shape: NxMx2 (M == max points, 2 for x and y coordinates)
+        # out[1] = contours_info shape: NxC where
+        #       (C == max contours, number of non-zero elements are number of contours)
+        input_npy = (
+            torch.as_tensor(
+                self.grayscale_input.cuda(), device="cuda:%d" % self.device_id
+            )
+            .cpu()
+            .numpy()
+        )
+        points_npy = (
+            torch.as_tensor(self.op_output[0].cuda(), device="cuda:%d" % self.device_id)
+            .cpu()
+            .numpy()
+        )
+        num_contours_npy = (
+            torch.as_tensor(self.op_output[1].cuda(), device="cuda:%d" % self.device_id)
+            .cpu()
+            .numpy()
+        )
+
+        # Loop over all the images...
+        for i, img in enumerate(input_npy):
+
+            # Grab the information on the points and the contours of this image.
+            points_info = points_npy[i]
+            contours_info = num_contours_npy[i]
+
+            # Keep only the non-zero entries from contours_info
+            contours_info = contours_info[np.nonzero(contours_info)]
+            # Use the num_points in contours_info to split the points_info
+            # Since the values in num_points are not start-stop indices of the points
+            # we need to use cumsum to fix it and use it inside the split function
+            valid_points = np.split(points_info, contours_info.cumsum())
+            # Last element in valid_points is the remainder of the points so need to drop it.
+            all_contours = valid_points[:-1]  # This list stores OpenCV style contours.
+
+            plt.figure(figsize=(img.shape[1] / 100.0, img.shape[0] / 100.0))
+            plt.gca().invert_yaxis()
+
+            plt.plot(0, 0, color="white")
+            plt.plot(img.shape[1], img.shape[0], color="white")
+            for contour in all_contours:
+                x, y = contour[:, 0], contour[:, 1]
+                plt.plot(x, y, color="green", linewidth=2)
+
+            # Save using PIL
+            out_file_name = "img_%d_op_out.jpg" % i
+            plt.savefig(os.path.join(output_dir, out_file_name))
+            plt.close()
diff --git a/bench/python/all_ops/op_flip.py b/bench/python/all_ops/op_flip.py
new file mode 100644
index 000000000..962a12856
--- /dev/null
+++ b/bench/python/all_ops/op_flip.py
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpFlip(AbstractOpBase):
+    def setup(self, input):
+        self.flip_code = -1  # means flipping around both axes.
+
+    def run(self, input):
+        return cvcuda.flip(input, flipCode=self.flip_code)
diff --git a/bench/python/all_ops/op_gaussianblur.py b/bench/python/all_ops/op_gaussianblur.py
new file mode 100644
index 000000000..cd306ec93
--- /dev/null
+++ b/bench/python/all_ops/op_gaussianblur.py
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpGaussianBlur(AbstractOpBase):
+    def setup(self, input):
+        self.kernel_size = (3, 3)
+        self.sigma = (5, 5)
+
+    def run(self, input):
+        return cvcuda.gaussian(input, kernel_size=self.kernel_size, sigma=self.sigma)
diff --git a/bench/python/all_ops/op_hqresize.py b/bench/python/all_ops/op_hqresize.py
new file mode 100644
index 000000000..a5514ab72
--- /dev/null
+++ b/bench/python/all_ops/op_hqresize.py
@@ -0,0 +1,53 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpHqResizeDown(AbstractOpBase):
+    def setup(self, input):
+        self.resize_width = 640
+        self.resize_height = 420
+
+    def run(self, input):
+        return cvcuda.hq_resize(
+            input,
+            (
+                self.resize_height,
+                self.resize_width,
+            ),
+            interpolation=cvcuda.Interp.NEAREST,
+        )
+
+
+class OpHqResizeUp(AbstractOpBase):
+    def setup(self, input):
+        self.resize_width = 1920
+        self.resize_height = 1280
+
+    def run(self, input):
+        return cvcuda.hq_resize(
+            input,
+            (
+                self.resize_height,
+                self.resize_width,
+            ),
+            interpolation=cvcuda.Interp.LINEAR,
+        )
diff --git a/bench/python/all_ops/op_inpaint.py b/bench/python/all_ops/op_inpaint.py
new file mode 100644
index 000000000..c2419545b
--- /dev/null
+++ b/bench/python/all_ops/op_inpaint.py
@@ -0,0 +1,56 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+from torchvision.io import read_image
+import os
+
+
+class OpInpaint(AbstractOpBase):
+    def setup(self, input):
+        data = read_image(os.path.join(self.assets_dir, "brooklyn.jpg"))
+        mask = read_image(os.path.join(self.assets_dir, "countour_lines.jpg"))
+        # Binarize the mask
+        mask[mask <= 50] = 0
+        mask[mask > 50] = 255
+
+        # Add scratch marks on the top of the input data and convert it to tensor
+        mask3 = mask.repeat(3, 1, 1)
+        data[mask3 > 0] = mask3[mask3 > 0]
+        data = data.moveaxis(0, -1).contiguous()  # From CHW to HWC
+        data = [data.clone() for _ in range(input.shape[0])]
+        data = torch.stack(data)
+        data = data.cuda(self.device_id)
+        self.data = cvcuda.as_tensor(data, "NHWC")
+
+        mask = torch.unsqueeze(mask[0], -1)  # 3 channel chw to 1 channel hwc mask
+        mask = [mask.clone() for _ in range(input.shape[0])]
+        mask = torch.stack(mask)
+        mask = mask.cuda(self.device_id)
+        self.masks = cvcuda.as_tensor(mask, "NHWC")
+        self.inpaint_radius = 3
+
+    def run(self, input):
+        return cvcuda.inpaint(
+            self.data,
+            self.masks,
+            self.inpaint_radius,
+        )
diff --git a/bench/python/all_ops/op_jointbilateral.py b/bench/python/all_ops/op_jointbilateral.py
new file mode 100644
index 000000000..99b0cc0f7
--- /dev/null
+++ b/bench/python/all_ops/op_jointbilateral.py
@@ -0,0 +1,55 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+from torchvision.io import read_image
+import os
+
+
+class OpJointBilateral(AbstractOpBase):
+    def setup(self, input):
+        self.diameter = 5
+        self.sigma_color = 50
+        self.sigma_space = 1
+
+        data = read_image(os.path.join(self.assets_dir, "brooklyn.jpg"))
+        data = data.moveaxis(0, -1).contiguous()  # From CHW to HWC
+        data = [data.clone() for _ in range(input.shape[0])]
+        data = torch.stack(data)
+        data = data.cuda(self.device_id)
+        data = cvcuda.as_tensor(data, "NHWC")
+        self.grayscale_input = cvcuda.cvtcolor(data, cvcuda.ColorConversion.RGB2GRAY)
+
+        mask = read_image(os.path.join(self.assets_dir, "brooklyn_mask.jpg"))
+        mask = mask.moveaxis(0, -1).contiguous()  # From CHW to HWC
+        mask = [mask.clone() for _ in range(input.shape[0])]
+        mask = torch.stack(mask)
+        mask = mask.cuda(self.device_id)
+        self.class_masks = cvcuda.as_tensor(mask, "NHWC")
+
+    def run(self, input):
+        return cvcuda.joint_bilateral_filter(
+            self.class_masks,
+            self.grayscale_input,
+            diameter=self.diameter,
+            sigma_color=self.sigma_color,
+            sigma_space=self.sigma_space,
+        )
diff --git a/bench/python/all_ops/op_laplacian.py b/bench/python/all_ops/op_laplacian.py
new file mode 100644
index 000000000..ee9d4b75a
--- /dev/null
+++ b/bench/python/all_ops/op_laplacian.py
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpLaplacian(AbstractOpBase):
+    def setup(self, input):
+        self.kernel_size = 3
+        self.scale = 2.0
+
+    def run(self, input):
+        return cvcuda.laplacian(input, ksize=self.kernel_size, scale=self.scale)
diff --git a/bench/python/all_ops/op_morphology.py b/bench/python/all_ops/op_morphology.py
new file mode 100644
index 000000000..f13434e05
--- /dev/null
+++ b/bench/python/all_ops/op_morphology.py
@@ -0,0 +1,102 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+
+
+class MorphologyBase:
+    def __init__(self, device_id, input, morphology_type):
+        self.device_id = device_id
+        self.mask_size = [5, 5]
+        self.anchor = [-1, -1]
+        self.num_iterations = 3
+        self.border_type = cvcuda.Border.CONSTANT
+        self.morphology_type = morphology_type
+
+        # Morphology requires binary input, with mostly white foreground
+        threshold_value = torch.tensor([150.0] * input.shape[0])
+        threshold_value = threshold_value.type(torch.float64)
+        threshold_value = threshold_value.cuda(self.device_id)
+        threshold_value = cvcuda.as_tensor(threshold_value, "N")
+
+        maxval = torch.tensor([255.0] * input.shape[0])
+        maxval = maxval.type(torch.float64)
+        maxval = maxval.cuda(self.device_id)
+        maxval = cvcuda.as_tensor(maxval, "N")
+        self.binary_input = cvcuda.threshold(
+            input, threshold_value, maxval, type=cvcuda.ThresholdType.BINARY
+        )
+
+        if self.num_iterations > 1:
+            self.workspace = cvcuda.Tensor(input.shape, input.dtype, "NHWC")
+        else:
+            self.workspace = None
+
+    def __call__(self):
+        return cvcuda.morphology(
+            self.binary_input,
+            self.morphology_type,
+            maskSize=self.mask_size,
+            anchor=self.anchor,
+            workspace=self.workspace,
+            iteration=self.num_iterations,
+            border=self.border_type,
+        )
+
+
+class OpMorphologyOpen(AbstractOpBase):
+    def setup(self, input):
+        self.MorphologyBase = MorphologyBase(
+            self.device_id, input, cvcuda.MorphologyType.OPEN
+        )
+
+    def run(self, input):
+        return self.MorphologyBase()
+
+
+class OpMorphologyClose(AbstractOpBase):
+    def setup(self, input):
+        self.MorphologyBase = MorphologyBase(
+            self.device_id, input, cvcuda.MorphologyType.CLOSE
+        )
+
+    def run(self, input):
+        return self.MorphologyBase()
+
+
+class OpMorphologyDilate(AbstractOpBase):
+    def setup(self, input):
+        self.MorphologyBase = MorphologyBase(
+            self.device_id, input, cvcuda.MorphologyType.DILATE
+        )
+
+    def run(self, input):
+        return self.MorphologyBase()
+
+
+class OpMorphologyErode(AbstractOpBase):
+    def setup(self, input):
+        self.MorphologyBase = MorphologyBase(
+            self.device_id, input, cvcuda.MorphologyType.ERODE
+        )
+
+    def run(self, input):
+        return self.MorphologyBase()
diff --git a/bench/python/all_ops/op_nms.py b/bench/python/all_ops/op_nms.py
new file mode 100644
index 000000000..dd9abfa9f
--- /dev/null
+++ b/bench/python/all_ops/op_nms.py
@@ -0,0 +1,52 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+import os
+
+
+class OpNMS(AbstractOpBase):
+    def setup(self, input):
+        bboxes = torch.load(
+            os.path.join(self.assets_dir, "brooklyn_bboxes.pt"),
+            map_location="cuda:%d" % self.device_id,
+        )
+        bboxes = [bboxes[0].clone() for _ in range(input.shape[0])]
+        bboxes = torch.stack(bboxes)
+        self.bboxes = cvcuda.as_tensor(bboxes)
+
+        scores = torch.load(
+            os.path.join(self.assets_dir, "brooklyn_scores.pt"),
+            map_location="cuda:%d" % self.device_id,
+        )
+        scores = [scores[0].clone() for _ in range(input.shape[0])]
+        scores = torch.stack(scores)
+        self.scores = cvcuda.as_tensor(scores)
+        self.confidence_threshold = 0.9
+        self.iou_threshold = 0.2
+
+    def run(self, input):
+        return cvcuda.nms(
+            self.bboxes, self.scores, self.confidence_threshold, self.iou_threshold
+        )
+
+    def visualize(self):
+        pass
diff --git a/bench/python/all_ops/op_normalize.py b/bench/python/all_ops/op_normalize.py
new file mode 100644
index 000000000..a17fd296f
--- /dev/null
+++ b/bench/python/all_ops/op_normalize.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+
+
+class OpNormalize(AbstractOpBase):
+    def setup(self, input):
+        mean_tensor = (
+            torch.Tensor([0.485, 0.456, 0.406]).reshape(1, 1, 1, 3).cuda(self.device_id)
+        )
+        self.mean_tensor = cvcuda.as_tensor(mean_tensor, "NHWC")
+        stddev_tensor = (
+            torch.Tensor([0.229, 0.224, 0.225]).reshape(1, 1, 1, 3).cuda(self.device_id)
+        )
+        self.stddev_tensor = cvcuda.as_tensor(stddev_tensor, "NHWC")
+
+    def run(self, input):
+        return cvcuda.normalize(
+            input,
+            base=self.mean_tensor,
+            scale=self.stddev_tensor,
+            flags=cvcuda.NormalizeFlags.SCALE_IS_STDDEV,
+        )
+
+    def visualize(self):
+        pass
diff --git a/bench/python/all_ops/op_randomresizedcrop.py b/bench/python/all_ops/op_randomresizedcrop.py
new file mode 100644
index 000000000..0dc1f5c03
--- /dev/null
+++ b/bench/python/all_ops/op_randomresizedcrop.py
@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpRandomResizedCrop(AbstractOpBase):
+    def setup(self, input):
+        self.resized_shape = (input.shape[0], 320, 580, 3)
+        self.min_scale = 0.08
+        self.max_scale = 1.0
+        self.min_ratio = 0.75
+        self.max_ratio = 1.33333333
+        self.interpolation_type = cvcuda.Interp.LINEAR
+        self.seed = 4
+
+    def run(self, input):
+        return cvcuda.random_resized_crop(
+            input,
+            self.resized_shape,
+            self.min_scale,
+            self.max_scale,
+            self.min_ratio,
+            self.max_ratio,
+            self.interpolation_type,
+            self.seed,
+        )
diff --git a/bench/python/all_ops/op_reformat.py b/bench/python/all_ops/op_reformat.py
new file mode 100644
index 000000000..eb4c2ddc8
--- /dev/null
+++ b/bench/python/all_ops/op_reformat.py
@@ -0,0 +1,43 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpReformatNCHWToNHWC(AbstractOpBase):
+    def setup(self, input):
+        self.input_nchw = cvcuda.reformat(input, "NCHW")
+
+    def run(self, input):
+        return cvcuda.reformat(self.input_nchw, "NHWC")
+
+    def visualize(self):
+        pass
+
+
+class OpReformatNHWCToNCHW(AbstractOpBase):
+    def setup(self, input):
+        pass
+
+    def run(self, input):
+        return cvcuda.reformat(input, "NCHW")
+
+    def visualize(self):
+        pass
diff --git a/bench/python/all_ops/op_remap.py b/bench/python/all_ops/op_remap.py
new file mode 100644
index 000000000..31175d66e
--- /dev/null
+++ b/bench/python/all_ops/op_remap.py
@@ -0,0 +1,53 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import numpy as np
+import torch
+
+
+class OpRemap(AbstractOpBase):
+    def setup(self, input):
+        batch_size, width, height = input.shape[0], input.shape[2], input.shape[1]
+        batch_map = np.stack([self.flipH(w=width, h=height) for _ in range(batch_size)])
+        batch_map = torch.as_tensor(batch_map, device="cuda")
+        self.batch_map = cvcuda.as_tensor(batch_map, "NHWC")
+        self.src_interp = cvcuda.Interp.LINEAR
+        self.map_interp = cvcuda.Interp.LINEAR
+        self.map_type = cvcuda.Remap.ABSOLUTE
+        self.align_corners = True
+        self.border_type = cvcuda.Border.CONSTANT
+        self.border_value = np.array([], dtype=np.float32)
+
+    def flipH(self, w, h):
+        mesh = np.meshgrid(np.arange(w)[::-1], np.arange(h))
+        return np.stack(mesh, axis=2).astype(np.float32)
+
+    def run(self, input):
+        return cvcuda.remap(
+            input,
+            self.batch_map,
+            self.src_interp,
+            self.map_interp,
+            self.map_type,
+            align_corners=self.align_corners,
+            border=self.border_type,
+            border_value=self.border_value,
+        )
diff --git a/bench/python/all_ops/op_reshape.py b/bench/python/all_ops/op_reshape.py
new file mode 100644
index 000000000..37bc63950
--- /dev/null
+++ b/bench/python/all_ops/op_reshape.py
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpReshape(AbstractOpBase):
+    def setup(self, input):
+        self.shape = input.shape[::-1]  # Reverse everything out
+
+    def run(self, input):
+        return cvcuda.reshape(input, shape=self.shape)
+
+    def visualize(self):
+        pass
diff --git a/bench/python/all_ops/op_resize.py b/bench/python/all_ops/op_resize.py
new file mode 100644
index 000000000..0a3d4fcf5
--- /dev/null
+++ b/bench/python/all_ops/op_resize.py
@@ -0,0 +1,57 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpResizeDown(AbstractOpBase):
+    def setup(self, input):
+        self.resize_width = 640
+        self.resize_height = 420
+
+    def run(self, input):
+        return cvcuda.resize(
+            input,
+            (
+                input.shape[0],
+                self.resize_height,
+                self.resize_width,
+                input.shape[3],
+            ),
+            cvcuda.Interp.AREA,
+        )
+
+
+class OpResizeUp(AbstractOpBase):
+    def setup(self, input):
+        self.resize_width = 1920
+        self.resize_height = 1280
+
+    def run(self, input):
+        return cvcuda.resize(
+            input,
+            (
+                input.shape[0],
+                self.resize_height,
+                self.resize_width,
+                input.shape[3],
+            ),
+            cvcuda.Interp.LINEAR,
+        )
diff --git a/bench/python/all_ops/op_rotate.py b/bench/python/all_ops/op_rotate.py
new file mode 100644
index 000000000..b7d0697ee
--- /dev/null
+++ b/bench/python/all_ops/op_rotate.py
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpRotate(AbstractOpBase):
+    def setup(self, input):
+        self.angle_deg = 40
+        self.shift = [input.shape[2] // 4, input.shape[1] // 4]
+        self.interpolation_type = cvcuda.Interp.LINEAR
+
+    def run(self, input):
+        return cvcuda.rotate(input, self.angle_deg, self.shift, self.interpolation_type)
diff --git a/bench/python/all_ops/op_sift.py b/bench/python/all_ops/op_sift.py
new file mode 100644
index 000000000..1d0e23567
--- /dev/null
+++ b/bench/python/all_ops/op_sift.py
@@ -0,0 +1,47 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+
+
+class OpSIFT(AbstractOpBase):
+    def setup(self, input):
+        self.max_features = 100
+        self.num_octave_layers = 3
+        self.contrast_threshold = 0.04
+        self.edge_threshold = 10.0
+        self.init_sigma = 1.6
+        self.grayscale_input = cvcuda.cvtcolor(
+            self.input, cvcuda.ColorConversion.RGB2GRAY
+        )
+
+    def run(self, input):
+        return cvcuda.sift(
+            self.grayscale_input,
+            self.max_features,
+            self.num_octave_layers,
+            self.contrast_threshold,
+            self.edge_threshold,
+            self.init_sigma,
+            flags=cvcuda.SIFT.USE_EXPANDED_INPUT,
+        )
+
+    def visualize(self):
+        pass
diff --git a/bench/python/all_ops/op_threshold.py b/bench/python/all_ops/op_threshold.py
new file mode 100644
index 000000000..6cd277fc3
--- /dev/null
+++ b/bench/python/all_ops/op_threshold.py
@@ -0,0 +1,42 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import torch
+
+
+class OpThreshold(AbstractOpBase):
+    def setup(self, input):
+        threshold = torch.tensor([150.0] * input.shape[0])
+        threshold = threshold.type(torch.float64)
+        threshold = threshold.cuda(self.device_id)
+        self.threshold = cvcuda.as_tensor(threshold, "N")
+
+        maxval = torch.tensor([255.0] * input.shape[0])
+        maxval = maxval.type(torch.float64)
+        maxval = maxval.cuda(self.device_id)
+        self.maxval = cvcuda.as_tensor(maxval, "N")
+
+        self.threshold_type = cvcuda.ThresholdType.BINARY
+
+    def run(self, input):
+        return cvcuda.threshold(
+            input, thresh=self.threshold, maxval=self.maxval, type=self.threshold_type
+        )
diff --git a/bench/python/all_ops/op_warpaffine.py b/bench/python/all_ops/op_warpaffine.py
new file mode 100644
index 000000000..9a4f062b1
--- /dev/null
+++ b/bench/python/all_ops/op_warpaffine.py
@@ -0,0 +1,60 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import numpy as np
+
+
+class OpWarpAffine(AbstractOpBase):
+    def setup(self, input):
+        self.xform = np.array(
+            [[1.26666667, 0.6, -83.33333333], [-0.33333333, 1.0, 66.66666667]]
+        )
+        self.flags = cvcuda.Interp.LINEAR
+        self.border_mode = cvcuda.Border.CONSTANT
+        self.border_value = []
+
+    def run(self, input):
+        return cvcuda.warp_affine(
+            input,
+            self.xform,
+            flags=self.flags,
+            border_mode=self.border_mode,
+            border_value=self.border_value,
+        )
+
+
+class OpWarpAffineInverse(AbstractOpBase):
+    def setup(self, input):
+        self.xform = np.array(
+            [[1.26666667, 0.6, -83.33333333], [-0.33333333, 1.0, 66.66666667]]
+        )
+        self.flags = cvcuda.Interp.LINEAR
+        self.border_mode = cvcuda.Border.CONSTANT
+        self.border_value = []
+
+    def run(self, input):
+        return cvcuda.warp_affine(
+            input,
+            self.xform,
+            flags=self.flags,
+            border_mode=self.border_mode,
+            border_value=self.border_value,
+        )
diff --git a/bench/python/all_ops/op_warpperspective.py b/bench/python/all_ops/op_warpperspective.py
new file mode 100644
index 000000000..c73ae25d2
--- /dev/null
+++ b/bench/python/all_ops/op_warpperspective.py
@@ -0,0 +1,70 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+
+from bench_utils import AbstractOpBase
+import cvcuda
+import numpy as np
+
+
+class OpWarpPerspective(AbstractOpBase):
+    def setup(self, input):
+        self.xform = np.array(
+            [
+                [3.46153846e-01, 3.33031674e-01, 1.28000000e02],
+                [0.00000000e00, 6.92307692e-01, 0.00000000e00],
+                [-4.50721154e-04, 5.65610860e-04, 1.00000000e00],
+            ],
+            np.float32,
+        )
+        self.flags = cvcuda.Interp.LINEAR
+        self.border_mode = cvcuda.Border.CONSTANT
+        self.border_value = []
+
+    def run(self, input):
+        return cvcuda.warp_perspective(
+            input,
+            self.xform,
+            flags=self.flags,
+            border_mode=self.border_mode,
+            border_value=self.border_value,
+        )
+
+
+class OpWarpPerspectiveInverse(AbstractOpBase):
+    def setup(self, input):
+        self.xform = np.array(
+            [
+                [3.46153846e-01, 3.33031674e-01, 1.28000000e02],
+                [0.00000000e00, 6.92307692e-01, 0.00000000e00],
+                [-4.50721154e-04, 5.65610860e-04, 1.00000000e00],
+            ],
+            np.float32,
+        )
+        self.flags = cvcuda.Interp.LINEAR | cvcuda.Interp.WARP_INVERSE_MAP
+        self.border_mode = cvcuda.Border.CONSTANT
+        self.border_value = []
+
+    def run(self, input):
+        return cvcuda.warp_perspective(
+            input,
+            self.xform,
+            flags=self.flags,
+            border_mode=self.border_mode,
+            border_value=self.border_value,
+        )
diff --git a/bench/python/assets/NOTICE.md b/bench/python/assets/NOTICE.md
new file mode 100644
index 000000000..7dd764391
--- /dev/null
+++ b/bench/python/assets/NOTICE.md
@@ -0,0 +1,19 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
+
+The data files obtained from the following sources :
+
+- brooklyn.jpg is obtained from [pexels](https://www.pexels.com/photo/people-across-on-intersection-1486222/) under Pexels license
diff --git a/bench/python/assets/brooklyn.jpg b/bench/python/assets/brooklyn.jpg
new file mode 100644
index 000000000..89e67551c
--- /dev/null
+++ b/bench/python/assets/brooklyn.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23e406cb60c8f9200d4bc5d95e8b9d8a8168876390f0c6b08c837b90795d4beb
+size 247098
diff --git a/bench/python/assets/brooklyn_bboxes.pt b/bench/python/assets/brooklyn_bboxes.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3261e47208b22be52416c8639207fe1bf06569e9
GIT binary patch
literal 196520
zcmZ_12Yi-A^Y1;o3rR@tz4wHYkU|M1^bVo--g^g8k)nXoR6qd>q9WKH6+{sP6-7a@
zAPNX5Ql$6ZVb6DOpZ_`Mec#WkA9Cm3Yp>banc3gY&dzSywlw8Le0=2p^IwpJN&o(1
zrVN^pH?ZHtepSYf7}=yjkaYil{x?Gpm^fzqfT30TOqw`s<OJV7;|KMhG;HL+J`)Fx
zo-k&7pUDNj<0Lz~t2gu~P_y5VLF3fFulRNtI%fRD4*wSVj?-Ngb@IoVu3mf&m^83o
zN&dKUEoMwpGI3n_b}gl4xz?wnT9$iyoLB4D`?M_AQawG+d+vY%V@8e~G+^SeF{3B=
zmW>}cX#AjojfM@F=sV6Qr~SAJ6UX_sALm!4ZA)*TssDV|a}AW;zqbFsy$+D1|GU?D
z`Ar(c{lEMCe@mcHc0Ft^N9C#wkoI!UD*45hmjIb3YwQH+EcfhjJ4|Awv0Sr3QeHO7
z1v^GUB$&Eg#8-UeruC=%YdK+`rCz#RvdzU?D#=}&DoXA{;Y;z7LiyeHlUlM$?%P6o
z5U(ECol;Jk(_UYxC)?zKZ9sWG<-b$jR(9EOQYyEgQ%kfoRS)eSl=r0mBT`>Z${kx>
zjI^ixFy(#ZCp%XfNQk;=3*e`hveefyT7Iz4(Mq_wZ7WmW2_@};uc`8feL-qSeRbC+
zK&Ksau24Q-j@o5XEZr$j=KXf+zO_7mNlx2WB}dBCJ?d*2OZh*vyGqX5S150yZrNDq
zKg08Vl&_Uj(5WOn)OAn!XVgCqov-8=^{dI->V{3D-f{RjLBIc!-S$OkC|>%Gtpfdh
zlpm-3w(PL;q=QtTydvd0C_l|-sQS@9CC%g_bhG*Xu7`ek$xy%B$D~O9rax)CKNr5f
zkqT0v{zPw)@+<O9rk{P0)A#79vASS;%0s(BU9vIMYoV^%x5Z!j@x6&$u`j7pHdKP7
zCH%e(|4*uOwl=!{K%K>6{G|)^X7c=L_-ZLX+SQc%NFef9VW&t7c|(OuU3r)C^Pci3
zwVC?W<TDj1`7+<WpssoB=ZL&!Kap1S&sXx~75fPMCDE_b@*VOlkVDFc`fF`-^jHKR
zE98tlClTVUgC$-*vXSZz{3`hZOZHL$lCBNC-C?Ukrx9{UP*?0_thQL+wh?lkb}!oc
zlrN?HD*V>amux!k?@_01HS{uE-?c~4XQn=FJAouk_$)$RKT-Yz<>~qw_UR`t>yy;4
zAa7B=gz`rEmK}mEKda9oCok!)@AF-tci;Aw>vowwV{@rDOkcKLk?$DoBOT>0yG|do
zk&LHm`n>JP_<LT5B8Q!JqdtQCy`?twn<M9UwXamBU;FfN>xcY@(2kSGXW9>I``PZ*
zN3E}f%6Rxqr`<1f6k~6n{Z9Xl{3B!r?We={4>}2X?zFq~U$%lo$x(fs@(S{YerPMn
z5qn1e37r_JV}wzA-)_@4=|_1HbBJ~$r7h+4u$v#X$SApGBg`oq4BuVNE$Dd35$tsm
z&s&)L*uR#B<|O?KkkinARBqTN<~n%dE%nVF8%6u&jFLfe(l#`g;Kx^*Qa^(Jg&HG$
z<p6Y?{rE{svm1GYOP;xhe0SN7<}&3K;OCYNM}KwsY%agqft33}r?I(h<9OZ;J{o}?
zPZ)=E0a8T0T<DEv`e-WO+ll6!b*R>Zenla-N6kaq8v2XP4I9Y&@0*o28cv>opFYsv
z$ou}F-<$9shvb)<o7ms)_9yBGQ-1;96KVfF=(T{(kLHSJyo~3wl=i<gTDmgk51R{=
zhss>&*MXC5(C<b4%jO{T!;$3c4<FcHp_IWWI7i)Q!RUA__9NQ|e!Tg=F8U8ew|@Lz
zmER{x0yg3=sr(+slOTR?h-{u>ZYZH7A3bI=PBN+0kUFa+QQCmrH7KhCjy8mYk<|N*
zR@#B}{jldA*ia)Zb)rPeAN0Eivg(I^$MY{u(qtyce}sC&c&|V9@CY(Yp}pl`;{&jI
z0x}tn9X&^R8tr{X{qkVebZlcR<Lx<kN|reJlb_*W$vo;kPFr8_Jd*z_s933^yks%o
z7f6=u<9Q@~j#p7&man`D?S+gOhozCotw=>MmjuWMwEZ;txhfX>2m|lKq>2ibt?)Vr
zIs#6HQQiccs-c49SNK?f-3Nk`p_C6$AyQL?$|+>K5ZaBvnP})QcK8LI2hdr7yxW5#
z(eOJ}Rgl&yNg|b!*~n=Kcp8Ol3+YoMl_s^pvEj^VBf+0UB$TVli;s$7UI|3%SL^_A
zHXgZU!FvVTcno^^@IH;t7<8Edo}ZBF@;ZEk^L;w+`SBUUd$-ZxNA%&gy<(qMn$`oM
zbIl%NzWi2sBjq!84gEA&lfOE`eAGn#K&}t$9?Byq_oAPl+1*&lIrvYIFTw4H9y{D5
zvuunSEO(JhAU3j1{bTbbS2oB+`;^KS)I#r1*-zAQ+Y-5ch`r5#W<29Pigq_4_qMd~
zJ$jm^;w4Q-qv`MM9(3B05$ERf5h?+k4WU1~?PYMd8}^w(KWC^+>8Jy#wa0qvQ?@Dn
z?51kVBgni==QA$8vsvI~Df9M0RTCL)x6O4U#@#!%F1TKa`6f{p<D-0IN9#IR-$%Bg
z{sn)dt!VJ^bLQ~XIs?uB!}9|+55MCNwZr<W`trN(Du?V!JDYjK4?R}ZVe%{z`&GB6
zSF7y|aM>SywbNd*1i7Blb<yHB`yum^7h|&v?>)+I4|FFyi}&niaM~Na4A!~ydyl=N
zTQOQbx7&3E^b;<vbR>9s(R!QKcwFDv-=OP5`Qz|861xd9)sfUOdy4tZ2TR|iV;Ltm
zs9%ho{BEyk#YhX6oy-$+8M|K8FGXKhn3w#if1A&~Qc=Q8w$!Bk2yhsbm5Sh26831}
zqbC}uWGp!BFSX3K*mVtw0hgNcy#PG(;j@J~0Pa<lBvT$Q<d$tjc?EE-tvQZeCP)eR
zS&8!Y;IAJD)ZP4p-K0u8+R4Ue>;uk6P_GxyqiDa6d4Sw6+9}{+1o9jU-e+K6!@%bn
z=w~{(9Y+0^!T(fgB##>}topKDLc5_@-rMFi{Jz=p6!S=ZH2EL+3gpSV=6%W^mo@O8
zhVS|f?F1vA4dyiXUq`+$K4|qQxb5bnNZHT4k%A<58aJQbwkMf4!oa0#X1?_TrS_Q+
ztn3ea#oR$|F_^tC^UrEDoDQbmv|3i-KRe|8mUg>fv%yI9w!LOA*c?zc5Dck}om8d1
z0?+-ym=rM53rh7U@50k__6T$DA^hw9NIHelcm-dhC0^HFbg|72$9qkr4-IL54hVhS
z{={5$mHDSGlzc(yTG(iJe*4A_KsNdGusW26purXR^f&D$I}vV+kxwJEIgpX^9%JFU
z{m?#+?3>c&7}^_)y?urEP!1n@2{LFR*)jo-dXwz6-yzL-`P+WP&no=hg+5>9b3ar;
zak~-P@z80@+t*k<oabGIU$GAzhSNqXa3E84mwWK#i+s-UelUF2X1y?64W+ysZ<LoC
zpoBjZ8}oa6)m6g4js$w(3syM)qyS&Hw#ozVi>URR4Ffj=sWE}IKzT5)4LIQ^+if1d
zC&B4*+HvE0lxj|E%WNHW$aa<~@ZDL;;afflo+|k14(^p8;a`zjwE7D?J!|iQ7`;_6
ztBDr!P)2}RhwLlh*HtWi9Pec_uF_O8K7JkfSw-X31u-8~roRzj5;%)aeuw`<_5tlA
z(6125Q3dp|xBR1Wk>)q{b>+Ny5Lz)t)eO0(s^GP3u<KL?vw()q73R=%Ndi9)+Fkgu
zrx+)7WumGM%6)B5s}!Vi$Zl3YF`k;@-&Vrv4%mmPCAhKDu2)A{e^q0?^J8t+Kq7T5
z&};*K{yygJUXra6nUAVSKK|hYyW9StQl%3zuA<tAhW8${E7-#!_?d;pR^#~*`ZY~A
z15Mty$C;0xL@Qa$L*A;3tkSLE|5xj9b}1G#0y+)gaf2>HTkC8c@P0h}x23EW`u!6+
zZz6{raO@epj3717#_KlfE_{A!Z`+|d4UJ?X=jLGkTorBFpxr}uxUPb>?%4@yKWmfy
za$UDX8hh<vos6ydNi_4&E^y$EZpz5~%`Vbe_+MdCqQACEeIW^E99rLCw=h3s^V>vS
zO8eubx@m`gKeoGdb@XWMLOohuz!uX?J@9S^{&N-V$VV1~?*p-KF)>(Q27a_c>oMSb
zvb+gSRW$j??JhX%yl*dgl6m1bCqI(`ojdk{PNm!*zuB$5)1;mWg3cuyXdJ3C?-+l^
zJTn1!RE9osO(fs^!C7y5QBI=4wZ^<x(S$L-_)917FNk)sz|S&9RwENnJAu*@ymNkQ
zp$R}6S~{9==!b&O0-Xpc16SSJy&vTvU`Jo$OTT@<Va0qAB##;A6(xYr<!CKMX7FA`
zC_G91VEX$!_*n(Y^LVd17WABnf=;ZwV>a7jX2{vRUmyGa2tI>A$G6RPTazBYYm(9G
z9=p|OPyO}g80-5{vevk9^{f2>{C0Hqn`_J;9l>XZn+NP~l)K-qnAKqBvvSNt!1ZB!
z)kM?Z7{+pQ>y6iVi&kn-eixgr0H&0e{j5hCz%#M{i>}!7tmFFf#!dX<c>3f;YYEgl
z&U#`dGRpzkZrBz`#~%rMF~)|2rgxbI2OzipAe1ljS1Khv@w9!gnuS>EY<la19$TTa
zZ2nIL-KRot5!{3#hrWzOH)3;`n<mIKd54)n@nkfpUy1T0e3E5EAHJpD6=u>YU`rjS
z)P?F|s2`&IA9}T%UUa0bK|I;SxT(PSJjP7485x&gXI;?74eZ&dG(7e&`4n5Iif2$j
zuVU2`jQM<edc|&lZl1JeEL9ah)lZUC9X$Wt_CLt3EuL`$cy6a&z;DfmpP%g4wBCu{
zlwwO|>UrV>6<Nidw3_i8Lmz7*!Afe83<n2~+rMlGd_^#?4}|X^)r0s%MLf<gY#?}4
zOuI9r3V8bhxYZo3Z?m~-Cv(^<V22MemrvvaunF|BmB76gNc|Oh(?&fl@2kCx_F3S4
z0TQSymy!29q`H+j&>7o7Myi_FYX-i23UggY`362NvQnL_GMV*Lm|LzfHw~qJWqJ}Q
zPvDp16-hm%h)s=<Q>qqT$Tho4g@6JHat2>FNxdh*x|(dDUtcos+{2T35}6gsQ`RbH
zC+F~=ZxL^4D_hhuMnfZe1{@A#ru|txfJz<NtDZ%^9qe^Aicz$ka=|)l$WGM)`Tb#o
z!K>S>On)S<RE+djt2TJon{2A?i&Y)8+tqdE{dTfk^}=5E+sAZEA}Rare)wO22l#_(
zhU6~Wr*#)uY_~Em?6h~~iX2rDphmbX(Zl3F7Nk^jZDrL^ZGqnfXyHBGLzddl>|wRc
z-oR>JVU1E%J&r$IUl!S&Hb@_2UA0sOs_)&Lqps=Z@`n8bJP(q7c&by>Q~37vm6!rq
zZSUC5x;C?Bfm{Qh)70C<5g7IKtDnw8Iz>{Zkpk~|n+Eb6_Bj<?9e_8`ME{Kc{I0~C
zmPCI7<yBpTXOhPJbj?1GCaY7vhLOJw+;#|^rCoe%k8}V}mb1e8mGTsjqlS)=3wXAH
z;9@z(<w0GQay(ep4YwIL(IyyucP9>#L3xa<(M@Cr8f|4<ROGh3NV`eUPccK8t^cyM
zO=a--lvO4RDw)J1-1v4l><jvpg2PV#!BW#y;XNfj;AKbpU)e)10KClv?|rBrE8Re>
zI?!?W+?nTYJz5>St_==#Fu9b5gXgJ?<QU=@(fqt?`%>=w-hR9n$!`-(0@m&;Q%qIr
zCCN17tUX$0K|h7hr_e(vh_;Y<!>yPWLO%l>TL>TFSlnCYF!;9&{ElIC`pPFJ2kxWr
zn>RDgHp|DxdHZMW*Piki@cd=Q{UPENF5-O9?lcwQGg5vqJD{^hb~7*e$yWO->kKz9
zTu1J~@~G?qw<GW$FCiB{%C&q9KKH`XtKzi=ASve;MS&WZ!ReYvz7V~hCgu@?7BhJ2
zxOKlL)89nuo?*UeM;~iZa>_Pi6!;;(aK=I->^L2sa;e>!u@l31%mtSlGOrXMlP)}Q
zK5-#>ZiCKZXtx0Abf^4LyrwF&KA7jxw30{f$53Yh*6L=DiFnSjv{T#R5|+A-Rgf?3
zOb2VzdC~}uo}lipjPVEffiHkb>Ad+ky%~k2SZw|r5r%ajEvtQyzq&ln@3E>9_?ga{
zv;nnKWiX>=3wE2YvWV^lW0Q?(H5K0b$S=@HRmJ#MMs^~LG}>s(^E0%Wsq&C=1$<N|
z{UU6yE;L#aN3Tkh)*Eb$r&p=?vO&Z+UL%fMnfecGxQb?OOF>>q_^z*l+nGcO@7M&D
z<dM!C+9_3w!Of~fQHiOkDD+vMIJooQX366!hq*Rc>Z*#=uPy7DGi$0>WSI&DsoR6k
z8J_Y7$Y2K+xlZ{|qZR&mGw7sAv5I7s)g63}17};pLtV-pZX2+^6>$vb_m|2v<Pn0N
zhJ(wG(UX1PSpe~Zm*LkNI{j5Vat;+AU4d0`fqbkA@V~<OOs2gb>^0^a%}D)BRY9H=
z<xBkYbjI&(`gPaV#ZNCpGO2P!l|t{5y^1gUfEDr{o(DnyjOxYb9_yw1FmB_RABvH4
zmh4i~m?2-V)%94ox@u1$k9hR)w;DkG4{a|!f#}{o`?>m#dHrMA4!uMu&(RCzE4z$1
z{Bj$l;?!<+0InO$Yx*Vlf0udV72-7)k>eKo3)<eK+prS6K|A5-wmtgz(iW&I@|CVB
zpEHj)(Af@^<Tq80bx>dVl=#OUdli4U9@d#8e#EVJ$s@8Ce|{%E%xIm!*vcdRkj$7s
zFZjDB@n%NrNJdnNEYfjEN6WXm5u+tQ=JH-TV`8KZM9UfS7xRK)q`Uy0=OE9itREU6
z7a#CwH}<emw`DY^6UXp|+nV^ZLHOtGc&{V<_oM!PJhZNo3clU7gXF9(L+?Q{o%UK2
zduoM$8HQC}1BYF#^S&NJFQ(fLrUAHf*`CB-FGT(o&9k(-*ye+4Vc@}a@IMV(%QNHn
zJZeM0$p#+0Zbqv?QfgYDA0oowUI!>tHBCI_T})#}Hb?~jooKI?smr%uaJUpJiXwho
zgcT-%&(-)Ik00&uBaY{8HLUQPoqrdDzwP*|AQP#dhn`1M9>vI>Vd}tbD*kjMq*Vcb
zxi<X9%S)yf^2?IxraoT4N#Yv?5`u)@rvHWb$#a=^iqOPI&<RG~tIX%b$xeaOeps27
zt?=#E(QC|l*0mRiZ?N}+=k){qa(MoZ`I_=S!RK3G>RG$b)Pv7hIS$Sz$~W>C?f9Uv
z^Uw)|9|bqw@VgIuF6MW|?Cau46&Pz3X#W~sdOb=M_!mU|6r>x)^M|aJ>mUziHohgI
z`2yaKLlcd7?+(6q6r;Bh7#oEZHG^sy_H&!H>;*7<rpKGE!T9V5{v~@xWIX-ug1;OE
z&3OD=7so4)r&|-Mh2V7%QX&4zJmBztEYgd>#~hDL9R9rtMkUhnY%s*dp$1|DO~Cq3
zu--UmF7n`cE;Gg>pzznkHyZF(Fz=O7J44>U_Dbaody=Qsps*QVF&oZa0C5Mwa|Km^
z-s5B!9A(g>neaUu4>MR*VO+%{@m4&oEKk$Z7eLrVa5fQ(xWfAZv_Fwr*O&)WR4_PI
zSFSUsg(A7R)Cy8lSeJ)jXN@FSHO4-p@%@YC5<O@Ep4}%Z6brU{Q-3|O{F?SgK<6&F
zT8tkZLc80r?%&|NF*sS>tu9n&S}Ya?uDuDqjZmqKv;NW?|1z0+72)qWdOlm>8_RHf
z{su_4g!k?cBkC)wJanGL&uc=zGl<O#JPpD>&qS(SWfC!#RDK(+Dl&(KgQpddW-VEw
z+VUAM^Hgm#_XU1$F?x%a_f%b^5-!_R87r_g*2O(85((Vm@n1t^vziPJ9kpjwP4K5S
zc-oiV#)8LV(Bw6HgYS3o8xN~~SYrf!ZyD0PZ_~lMU+`cqt5Hx7VxB0&V|dqAVI6;!
z9aZDO!&3bIJZ$ZV?W$)%?UFsJF0)_yvTS4@cn&>TqL;(Zr}lt)!~R8NV>5DCNPj-m
zW3j@$Hd!0|!slo|pE*8CUev^I_^hiNz-2}7HjDBH_7R<rHi(x}o<xtD5-%>qf10BU
z(P|j`f^Hlo$V{DsU7WUSbte2)l8N}wE&?!L=h6=!{8JZac5&fStT91e*Ll>xX5Znv
zi=Qrrek77vsxugO(ejCILN8)unQqS0Jefy3^@%v`*0mV3+47NY1Gn+=s4kGnSe`L?
z@KJ|&Oh3l1uRNvywnNc-HtTCkr0BFBLAk&Dt$VW09Lhdi9&b00e{@T%qze9Z85aBj
z_*@OU43!<U-wFE-G|jLMFXG6R;Fo29>BzVs0%~SsjW=x!xK;@{6`IbB)?nfwLwKGg
zn)N~}Ms|^DO{<~AnFlfs;w9O%gG*nj!KWYfJDc7}F&e+SDcXrAp3wpOk78f42S3Ak
z?$*`*GJtl2_}&}(e&Eq4<M8&H9l?AMPK(n_Po(7h=uV6Wx1Z<cBlfc>uZmsI1Rrao
zpJk>!@^?5~lkfAu@fsjTdEyzJD38Ri-pW3&uX@QOf*{_q#dL+s7+Gz;1BbPGlX<5j
zIQ$cIT)btq*#(~8qMq~4j)Kpv>2186G;cFYAC+CqU(u|9&mte^c?)lQ^ZY0_UV@z|
znM8RsWnGZ6%gktwy*XcJ4E>E`toA`ZiC}GSwB=$YefZ~M*QHoN4z}6}Z9HI4uax$y
zGDb@n-AQ14A23}Z(`>ZSh(1=LJPXX|#5*pJqdN6#U?DXqugLS})bs}p>(JXAY~RKC
z+u$j;g=#c-?cyb2|Kb~S!&GpqA{1s*?)E!o!fA7;y^7Ql;Cun2BoQuNq7N0}`~~U{
zqQ)y=rQ>}8GU&$lH~80^-#!KJQfO%bR{j`zI0Noh1uwVrG>zY$qMa9!%YFE+#IyZy
zl!&G$!v9+8xL9WveLjz89)TtwgU&C=ItARr3m`9JByx^qrBg?KfX7P84=MFw?{YY@
z52LJuyamEkRw1BiJ@zSwVMU?rPfmgQm$sIQ!-_gcW&A>4<bMVq{|U5Js>0~q<KXES
z<XV}PcP%*c!$;3Wrsxd3E`i@{@+n4w%+o0Ez+6)d4iAUdhH3|+bCtZHvY|5+{B45d
zn&IDHWxX~+)~W(}UrWZT*64?|wyMbYDB=y(>3zPu2%r8)`~%ejZa*O|P?OTivR?JX
zhWw~kjD9ZK?^Hw3wT^tMx?^WP@{9TmD_BnbZfHA)cm-n}eg3T8LYwc~JK%FD<M*8E
zKx+ly^El*k6#uyua%>>Cs2_(cH?vl_WoOw++Q}<j?x<eqg?$?4la|Csdg*7dh6h%3
zOZ3(q`aj$G=zF1_?6GS<T@Gn1l_TV}42AD?n(QX<vq*PDo{h=>h^1ZDVc@YJ-)reX
z*hyvStX;OzH+G<|L2E|p>7kx+F+z96zRr-RQITF1%2?eKeMQP_-2(oMOxDg1NyAU>
zO#3nL(S&}27vOU+7BofogWm{wLl-etg}92F?^tQ;!IVdnH&T=L{N#J^Jp?){;U@zA
zUMD`0k7W1g*7UOi_}q`$6`5CZ!I{gpoN3PUO7e|vh*h?b=d=%0@y1Mb{FW@<YfBua
zEB<{Y^!uA_W11n&0C`8p%loXnzJ|{g^sJ2f!sjODi+PN$L(CtuS@WiumRNxyFQb=d
z#Ce0OZO~r^`00(E7K6Xu^q{J7et0$U!*8ud|BK83>?ag|_!(+Nf$P2C^O~(@Mk2Q$
z{Ncwv{jO=cG13BQe-PipDWA$)e(Vo6rk8#)h;rv|_cJ5-=|g;@C33lFM}n(v-*77P
zN_F%<)${>xZrTO#=ltIJCY$jwf;e+~=pVI9O@DeFEw7mrQ0puF=Z08|KlnU^cB9DO
z*k!x3ihdP2^aee*m|-6NR-50!?F6+F|Gz)}^G?d0zx^KNU5HSwHBpS}li>3R_>Ptf
z<_A{utL0mhiIuqh!S=`}9@##|XDoB%D2D>foeMntkXe8{g84ogsk)fdBj`UFS$3vB
zS-dk=+)i37vvC9#mP>gi<0+2SS%GH+xvac&>U8#$7Qnq*=hTO8EbqJ7HVaA(kWoe6
zZzIE?lf=_z)Gy?FPwHmzy+6`VX4c5(X$vrB5C|P5)4>kM-xTV`V-I5}b-R?a_|2`0
z=7PI<w6Yj_N%Uw5<qqE#Vw0VrwuJUOz|Y&DQYiGFruHzNen9=2w7CvcbL*fNX}1Gj
z`gZ*4d@S%g{zb#-i;TvZ@O}qQYw&rHl1SQ}gEhZKJn#YB#1b(%&yF5hV6qbL;8hui
zUtJMgdW`oL(XOxTMcZ?+h`K74T4N+aEu>feVEH(rhevIRa^5R_Q1htm4L*;=A3kmy
zs3<sY#QxkI%ER#AJK*z2N-O2EHd+ynm`%TlHQ`@BO?iLiB1Ea+@F=K(ZOR{g_mc^#
znAQtCxg}Fr9gM<n%s^N3@oQ_-<AJInxb`^yu*>18K^|Wd&zk5_a4!?NZ&GeY^f>Y5
zMxay}xZWMP6@!ye@Rfw$+KSSi#4(<9@WQ9tFDjPZsy#SbMK%zV(Tw9y)Nu56*q&E?
zu%Qm%uZxbPLFZ-q`7QqQ6p!5Zg3rmc`z<m1N~{a+sFqk+AGxU}@mYrdz8Eey+F0F*
z@5AH<^2uN%o`uh7A{7nwqx3f)-0lVrO%bI7n2|&AAII}EN5XYOxSK@0w>{kBTk8p+
z87m<@iua3%_x8f#1V4BHV<V5ew|VfBMVw(Sz4(~?jC%AVL7L;IyKylRoOUzvc<?I}
zyc?^hd;F-$dN6W{=X+QBQ5Srk&i5Gh-3DSeNqn}4|0*&GeuL5965R!9DDwHb!bd*&
zcMYLlD68RvEFsyh-7aD-&+GBMF*u!x9LN)aUKpBKs@+O=imaf#E%tF<_eIW?@w2bm
zm*j8gx5h3W*h;1Y@5jm(;w@{i&M))`tmL7sXB@u=?RuR|yzi!LMlNr`c_BDx*~hs=
zKN~W#_UrzPxpu^j+aU#_q2RDv58lyj@c6n)x@m;I6T#)l$SFuN!CB|;B%3Bs3kIK`
zWSsg-d$WMi90-mN<LNnD!_1&OQ2LuL$T5@rk15!10JuCD`XS_JxcMyyd>%=w_lZ9{
zKOvMj$56^ci92_vy*uFZSdU&F2e0!$pC|FVUHp4C`0VyIX3_t;jE^~{EAz!!yWEW9
z?Ra^`<bx&$iHGzAfvkPUjEDOedBa>J_bf*(G;Kf)Z`ou<LEpuz@8KsWsAo+p>R$q%
zC)2xV`P`f#KPW=IYib~`)Akthada+FKbUd%y=g`-PJ_=~;NR^JK1csjz~^abvl1=8
z44pXIcpAQfh#)OQn;~fU5lXY@`%F;HMI;tO-w*7Z!gx#qW1nOkBq8}{=(jij=0PJK
z3z|xKHY4O|-VS4AJ<HE{`Z|`@^00{~dFn>wbF>meT%tEt*%-+!<(&$k@I15-j>LMR
z$EN7y9Y)~=xP1gZW0)PBf8-+Ci)iN$^693)wcCS#l=d^AIvB3X)9QS<4WoP{_BIB;
zxgV{EV}lFPdK_HVWZvJ1Z_tBrQHi+7BA&*<wd>h-u&+DT+=@8DVyH)hx@~xKGm*&Q
zc$019Fccl$z6{>JEdK#V-MXS4dycQdhd=cCF(*7nyuoD`m9pQs0zUolM7lHAEF*ud
z9kX~Td2=hFA3@xF0ld{>pT8|~sSZBBM*SfC;TM?+$RP(OOPIm(*mHF8D7Sa_0g@~Q
zS6$?@nB0jdB=(Sq&SK=%4}UiTO3l?1)W&Cqk0M6OZ16LJnjKVs-fPHyWLtXSEzhgL
zNcWx{sv4t@Zt|w8=~@4*BW~h_UZ$$9P-#v4x&&_S+xNlS1Sl@zy+jahovMYjtAOXj
zkYY>X2+L_rgU2!SqPF}5{#r7wKcl=n^fswXtgtFL`xv&>oOtvg?6fs`er?frE^+DC
zK#efsBXt-*)A8ebVJU4q^`oixH}gYZBrpfOc9s{XCqu0V<&T0IL-2>mMaORI>Bo>t
zW%9=Rqs`^i>wq>c+7a5Vhij0RHx=tFmFoHttml-SphqLGo3^T6gq?(e+e46|NGIJH
z?ZuNn;qaE&sUAu{ePn=kIjP0cRnI^#;W7|<Zhb!-JSHm~9DbE{B4q-&9>tS!dKlc4
zfZr>i6AA8)<Gm0W1TMSC(p>!{_8-c=Um5MC${grr^X9AIqqG0l^wa#jZa;#KKODcN
zd(z87`48=9g10;M7`RObpWVFLR-VzFu=6p*oyViEOzPJ}J5xRTpil9hG0vVN!2QaM
zq_yNp&u1hw5nuBJ-oOQWnEth8_4osPJjKkG0KPi>P{c1>{L>43bt{tyaIYeI3pdWM
zAd+XMF{|X`54VNiT=Ftr;JsjQ`Eh!cNnB+p`VEzQvz+?mqnoEZa;w7o6|ut-v(O{A
zK4v;nBIn9HkM+lpKQ|lsgn`dbI;~4<J}aStK4y+bj(vD9iZ_QrFP03lq4XmR2|WU?
zJO5>xdCY^`vrQl5Q5AfigB5oqzitLkkJuH);d3f^cX?QwzZwZYy|9<J%tW-Ci2rP`
ztGa3q?Ya!I&#50w`6}bq<5}SIRP65``!nr0|9qPvmd?qB&88k&50%rr=ho4eObVkQ
znf#8Pd^2*{Ji_-##>VsTTZv4MrO34yUB1QlBxt=s`%!4*HBd1bS-yre+|Judq?t~=
z)o9J_1HOS=+zRa-ys!j*e+#(<powMFO@o(L!R0J8vRZ1xPh-|Vu6`SEye=hms9yk`
z4&a;XZ-4Nlknz?GM9ZgSEOgv@WDM4jfjx9!Pp1&Rr-6c@=wkxqDbyY>uhZTwaQhXk
zJBH^j-cTEVb{6)0g3N`b^a=lj^+Ffgev{vVJ-u5*>&f7J26%gpmHh;G_91`zHCj)F
z+emo0E+0{&J@vLi-$h8?06k{mC4Y{!ccJ_sHDi&?X0YHfJd9sxxfLy30R!AP*a3b{
zf%o%_g+%@@4-UI|`B%y((^@G0-!1lo!qk7@CkA|<!<;mlIPoaB%wfNA9n{EpQX9dc
za`?wCK3GWJ^J>c3Q&cZ7w^b&NJPPZM0H0rn|2X2;FCqIAcD(9MEpiX>C-GkJo5x`V
z=j=21yMgql19(^kBz#(p1Vyc#tH#oE_5r~4BIb~1`5uVHzXX0p(9`MQZYA`whW5O$
z|G8>DRO8qmblLHF#50_K5sTlwjM*iMIL3>NnhM}_N2sLmeIfJ%_<Ytgj=lu<bKvhf
ze)b_UVYb3Y3i^8&zFmy$ta^uefcTPUUg*VqF@tfDhQGT8FQ)-<>W+90+sO|Zf`w)g
zcUXjt*5^EqvyhMEs(PFi(|y}a_eE}Ha-a4av2qF4&tX$O_`6Nu8w3YGoqyI>e}T94
zIe8V6k+QY<`dzpUm6m!o_9f(13`R~x(pax%Y=nZ_v(Q=`csz@K#)8ide?p~~cDRsE
z{b%T1Ii4?qPB{4dI&uv0)UVFW_ZaOu{9mdE@UukP;*a-4V~h1t=mdG<8D!mq+fRBT
zlFOhUfaX_#=OOU>678nJ=Ni2dIz*PhUzcI_g?<dYi{)I)Jbva9Pj{!aLWp0?r92!z
z*!fS5DIY*=j@(t|qjcnVkbbfgO?=}i>~<3PJBd;sxdQzN+B*Tg)^L)5{~S!u{LEUk
zON`FE!)S<Qzi%?*mppy*frtM@^ERU?UE<74?5q;_{2qD_=KYnlpMoF#98?4G$5+v>
z5cU&arFJg)drP1q<l(JC3K8J(YUG?q{CO$3QI+qGzRPd^2zyE(-mw(v#SwRY-y_f7
z)OV;f%*^8XZT#nj^g0oLdM0#Y<ry=L**A!|vYRi{<!Rn`88h?DqsTLgxW<dTohr*r
zM?9`;G7j8s0sU3R`EN;l7BD(W)GYeZg!%9*{Qf}bt}=DeUp4hK^H5t>VBeeh^eRUF
z#!rvOr`u_U<GrQJIr#9So%56jfmDahV(dpKpMV^akoaKqR7x(uYP^RmH2e*{O{6c=
zc&`8(TnWD}qVo-{W<zl<zo#SFl{`&^@>iaI<)Ot)I8Oy9$u>uFZ=k1q@OLcZa|C_e
zfOqD@nD~JHCSno&z^{3{^Bwqqm7K^o;HDydn!x)r(b->&n?E^owUBXJf`9y;hkkob
zl;mN-Pw*MZ8xx2`Bzsy}ht_Y}8S*W>#ejlidG8UBcsbhN&mPccp7O_-)rhvy+AQ9?
zZC66qtw-E>nOBkLM&3wdXJ#$YH5adY3N5@uv~~ke-Ol;9%nQ-<ZV|1oB#Ukn^Gk0s
z687;thW>uS%rjSx$j_9!9O?7S1QFoo&&Xgh-uVUayp-?vkyAMI52L-Y`0W8|I`7vf
z{=5c0*%PGqL!p!f4n=`b>FRy-0`l>^h&RzZ--^KJFDd853HaQYH_Fs3w3|&X$LI7Y
zj`NGp@-u||+!v8jCUN99_y&%sXK05=I5{E(Skyr9%FXZ7p;Lhx18A={)-V=-J06KI
zQVS?g;>^ZI^mCc~=O-LD?E5{-93M%(-CF2-b6(J`zhc4N6^y?y@cvEa;Y_}}%tSwr
zAI<3%{N-0X<K}bX&Tc*XDfDX~*^{(?mCT=C)yLRp1oWMKrxB0XOcXMfxb*XIQ4ha+
z3bs-rIeHtXBa(@a)MExdi{IXhHBTpL&wk$t;@bz2$3$$`Pxrw8s0HqIfxC&!1Dl!E
zr%S4C0JUD6`*Z7&-V&-mL;rc;>ssV^kUWgJyv2Dny_xY7O?+cD`tT#({3zDYP}=A%
zd<&P>`fcbY6Zd!psUc_msfYh=;JeG<9th5+Qqos{?4dJO58-=PaC$D%^CAEG6Urlr
z4?he2BI%|-WDOY$K5vA6Fu1*#rv>=o>!9|)z6_rZpI_1rKPnSHUJKWMg3li#%@A1+
zjs`KZKZYN-Z@x}{K<!B4(C$PRnTh%>==zWkvXoY`h+mHbZ=1;(p4X-4zv*S@y$bmn
zUtzBiat}PN$;##`e7b$E2YlvYXI}W5wV~)`w!ja2+~#xI%K*=x#~yr%b9{kyVWH-0
zv{(dQuVB850G~gnl*~=@E#-;$&+kI50`ZM6;X9PL#t-P3oleSg(0qyc6zWmn@ejPs
zdfa>nHJ1;)jh|7(nSY@?RoZ})#q?~Tar%ms@%YU_=w+0dORq!0*H5Sqx*6x+WyliB
zA97-AuK5qovxz@1Ldq?{<LNxFNBrUwsHc-x*A;D@#J`@7R2p(V<t_Lj{%IOA>e{O1
z^eY>BTbYO4{@m-PHP%$D-ZD?~%}ahUZ$K}WIL2a6{|+!;IREA}^Nt%Y=ir0Wt$bg>
zD2kF(o_Q*cvGpqY%pubG0XSNKZSCUySn9sTzg&3#5IX6+vxENUz|m@??lNrN;hT%G
zevEeOfLJf1&uspEi=77(yLks(tjg~%W8b~t`6A<N4|#M;D6NO>Edalsg3=}WaR!g+
zIrP>5AN6UTE=E@8@ul}s{ygOki6p)bA5ZY_5_rCzT+l_7HwN=o(*McKZ9726pX_AD
zVj{RVmHDDF?=HY24j`_%fY_>&&l9xPnE1mAP}k-Ay~&9)cOGdF-sB>@(jDNrJOBF`
zp0U&S0J2Jd!)x0Ew&oHA+Kw+4Nxz>YCc1}H+I!eZYyh7JJ>`{|ePW0|)K>fCL+t$m
z=T_Fz_Z%X8U1;M+RyJ?a?=<!gHz4OsIY!SOlTV0yEQkMG{Ag!aX=D&Qi8t2`++9I$
zs`0r2sb&zrew*<_h9>x$!iXsX-(0>#J@o<oPM`-X&|@}nj1TB{80XwS;AbQ_zXNZ_
zpL5-tJb2hwedMwChUy#ibIlG?UwiO#o?4H71Erbz4g9-qAEmy_)Lf_>-uOXp7c<9Y
z>%MzYbBU)PF5dAi)^*9Qpxnj(KT+TD)Yntq70Itv+i5otT;0K^7yE}_!lwbJcTzt9
z-2M*TmLng{`9n2iKkc}EpY_ZW4Y0Z^;Cy-P>~HlG^~1>1SdFx9b8clVPit{LW+9(V
z$*+3}4U>1MXHu&V`-v~(6)xj^-{;`k6<e%bTx%Hlc`g!#@6P(Y1)NmTy?MI~eEuG}
zVsZG-;n+bDaR-;vUZ}r@`)KmtcG8=3wk3XO0QH*c@BUl<3!1-c`|Ec=w<_TG59lS1
zbAJ;Vg+pYF-avilUw=b6`9*pXTJ6t1<0@i|VdPb8<!2J{<89DSCr;w*If{74Ug$@F
z&l~twL>}BXP*0}bkCcaj&!16VRo;Z}I96b9>!1IN&+DL*15R&7PUQJh?&7rTm@iV0
z%U=B}xO#~5*;^=0<UC*vP>I_L^gd>{pX>p>p8jW&-!YRhJVegXek0cDmzXbHeDVf-
zWm0mEbwzC~<e~o7V^8J4OBb~ivj)56%qO_))^U;M8_=aMagvSn>x#_;hvSe#lG%h5
z%kZnerPpW4%iBPCCg-%bF$$`I+Z*7H++*rRQ_|1;g&adT|NRd3QbD?yujnWFiQsYw
zeANe+o6zDI=(~Ap960CVM*YBdPNB*q%AFh@H=oguRPcBmm=^@@x^WjHi_JLX%Ulos
zy4;AjSr-Js!8|jYsQOCq`bDg|Ir()?pV8#w4Q5<Nt1rw;=&^=;#r)yUzkGmSTqb|Y
z?`AUoMs4!xzD0|Pa)Ndu(7-9^y1eF7W~Zm!<7Pe7isYu*4PAe5dM7{KjS)L|o<MwK
z2U4vIue<pk$G@Gl6U&JEk<t|W=pERB%O%}G?_!|w8*)xXV!u+JjO@RIzRTx0g+9H|
z*;Yy`GWTw!zm2dFcM8u7o;SfoeSZHK)E$e=jx%n8==<kbSt(;>75|olM8{dL|4D}C
zCTP1|#dXk`3*GC`e-+&Robq}^G2W-w&q4PLt*zqh%XV<N1RQ>m=Wl}3+v(j;+zoOB
zEhVzkH3{7Ofmz~Xz8?ag|A7A#+P`eK$fN2j_;qK~zmPrjGmf$OrhNb!{6W1+?8|*m
z<ig2+48Hm~R@2AiC))YP{tf<zP`)1PevLTQ-|{8n{RVY@VLT?%`dsGOx9HV+`p!95
z>brUUhRlce`NSvQptO+uIw$`WaQJ=tQ;!{nmua^Q|M_>y6WGW5m7jjh0IwpuGFDdm
z(M34%iM>1zBo4Bl^;Rr?^q<sXzndAv;d3Rvw}g*A>M-_&-$A+aUz)1FoJ_&zL(~r>
ze)}iX0-%3|n9&XUggU?|Am5gHp;%0Jbpo8fVi)k;;q!cY8cz8XKI<}ny-c|`<sYfj
zJTC`6AH)iSz~f^c+}^0pVRNj*DR0Sm*$6#{=b!VLjW_TG<>_c-7xdgd!e`76jj)Ex
z>I<IMCeCqyaS%#8dLy5j{l7QS*ASlXq1|XXrrx8!M{SsXli8~^&o@FPn>_53pz>WC
zqGvEu4kPbw9oCBfLENYvUO}i{LOXeqp|`+&oD_h=@leXcAI+w{0{s(Ql4-6_&`Kcj
zldtG^u+-7pd741n<9DQfpZt&y;ETvP_#TJFcG1sZy?wyr@2D5S=MQ{V;&TP_S!41u
zzC^y>@`!f&AfHoT@{FT?dOvo2&(70-QXUIFAEvw*Klw+vDg~EMAfE{GG!9W-Nj}iK
z8I|Pe>T~cJO1^bpo({n;-iI}Xk(d1s?Szs4ag|KkgXGQK0c9@QUuegVcDHFS>~%kJ
ziwoGpHG2ep0vO$!bfKH4<s$Py5`13Nf6!}o;Pf#@e>UGgX0%!IoDXCF&aeIuDYjuh
za4YiTE(>tR$tMhat%bdc*}?O>Hr;$r?PT@~H#6>Qlkf2())+)Q;{g5Ogf+PC;uS^s
z$th4MHU}wB<J{ja>`J5o{&y3Qqz8EI>|_+U7|Lgf`5mn@KFx2)AsF0t{#O+K^iQ;=
zh&%sAe}mZ<{K@m)RNARbPiL4t)DM$I<|7Y3E5J*a*}lf?_29q~zPt0dpPIL^*B0`j
z`2#D6kQMmbO=XLGW|qNkU0Ls;8wpOYg1gCbgt&>Kf8W8ki+da~f1{-wIY2#MJn~cK
z9P~M5Xa3}Ab@2HZdWr;}ukjhr2)YPYDV#++%y?-;&o99(@gup6{F3k>E;y7x1E-Ns
zCQmL<?lRhMdhj`e`feRv8GLm4W%2R=tBi!B<Gh`X$8(&Y#f%lp(=2@GW8hg`dipz7
z)|Y>`c^b_KIR?E_G`kh6eVMWh)rI`5c5)PX)yLELh0!z{^h;M+@)-E+=#=6m|48eL
zp!ASd7lY5o(0>Whiw~gpF?f6uzP_=~z@Ou13}a<DUg|RNHkMwml7DEObrC(@E;ES=
z977r>>{iywZr$XvoNkcqyO(%ND7dtP`N!GA6Y%sYD7ue%%;g9EB`2sK$<MEe7(FV7
z@XKp5Ue8hD@}KYHU$@3%yF@&t8q&E>I}Xo%)VtU%JFvu^tMEo3IOXCM+3FX5va(XE
z=zl9d|A7;BR`^VX<La#clcCs9-Gsi2U)-jBKR7xC9<p=y-}0X7F8U#pnD2w>e-Cw!
zS`N4G^HwDHh1`M41McOxi+u)hr-fFbP->`dAt&NU)OUGJb5%JNLQ6f>1*|p+T)sm&
zXL%_P<8vl>T$hrUdGB9*_T_~6Jv&F8VN`k%N4`P1ANkG?urbX!zf<&Ev0r$}gC`EB
zUB>-Z=(v66ufg*WH1P%VMKW}+fy>UHIS1e67#BO#k9aHT?B}_8kre>+T|WO|{BF0N
ztU`RY5Nj!~&p<y4Klup$$3q*#`n@ai^wXy)X9m_6k$alN=vT4-9LWav-2D}$`U>Mv
za}I9{R_iYndEXb!G}Omwo!o!@8`9zKCveZLQ`&)#Zd|m3AD4I19=vvbQcwLU_E?|z
z^dY#2#xMS!@?7E^>#>7w#4G-$9YdVs5Y&?Kqu)giEy*YO9qaKXANmShMick&R?fd#
z3crE;zZe{M=Py=)r?IsA7P!f&dGPr%+$0d!c#_X1oDbZKmWe`A?)Jg|qZ8=warOuA
z(+($v!2by7ey6)4t#`?fyUn=K;IzYuRPs1nHvCrn<S*ePo6l3Un}q-U2HZ~tpU=|1
z0e273PCB@~6DiigpFWDcD*Wc7yca{<<1ln{$+!D~v8}=FQ<QTj5BL<1ePw{Zx%9Kx
zT!o(m_W9hpffb7RmHJ7<Gp^E$WEpNw@LmvckJHSvImA00{=|aA$Ki{ebaRCHwgxzU
z4t)huev<M~&K3T}=u*Tx&UyR+?%Ti`$$N#5`q=+8@X+D;Lh!o)3ts`w2Z6(D%nja-
zlNXq0s$<DJdG1bte{612&N)hOyE^NzUHJRKX!=v=yZydx_{&+)`PCfaTd-_27d(9a
zV{XGOC&A5m_^1jFms2i&f5v#J2>8APe)~c3q6tzyNQMi17<bk1RPVr7Rlb)~&acKN
zQ*M9nv@Nfi!l_}!?e+)p36wj>|A5<FTxK<E4CuP^-YLq>)5u7<v(+v`!Qp!eh*Ob~
zf={HXAdhy`cR19L_(dracDK#sV<%({D2KOYAX@``%|bl5F61k@`p!@8j!o4dUNMaN
z{_-;MY9HnN@G;<AE<X2oI#QnVVI#<J$mZ?&@&>)ROceb#Jnv$E@D|+U5Ct}%{A{uT
z&NCbB<xV#18AqMb&IZuv8@Ynik8v*BOF4WV%vjk%KFn^p%KI0I{<s@AB8bckz&n3{
z$8nlIlgCe64xev;z%8)%efZyPu<#R<xOI~+_*jkSmne7p+xI|d7e})AGBv=LF#KMJ
zOCid7>^TM8N<yv)_|1iACQT{$2?L+q$-^Y1aKd>V#G}h2k09{00=ODY9OD|&N@rit
z&r{x0CG#HfKIpcDuO2FZ?;+&dC8+=?wN(!pr`!ple6f=V&WNWe=lAvmN1MR!<KSU1
za##c|M^bYjc-)kd<?1|k<UZe^cfIhFE!wL<{P`?DgJdNuk#NT85_Or;c8hyC&QTsn
zoVmPm{^=U}lgzl-3?94q?Z?n9mMY+J0Craae0F|g3b=lkxjRM9L*JJX{txpIx5vqU
zcyB6Cs(=IMxWVlv{q@BXepV;htEfR-_;u=+alc&va!FuH*w4(|ODYrR2uDK|bTD#Y
z1p&Skg4fmYhb!=2CF++)pQYfJ%lE0R4}c1B#JMY|3iP)=_!Wh|>VV7cgj_9f)2%Oh
z>6^4ihCR6NPHc67u8UXn2T$F(lor&lg#C>ISDl><0FQH#@F403Fe8isKb@aEUyr7C
zf0?20(at$L6FTmE+)`bKahJ-umRRKWyM12&i(~JA-?30$#k`RTg*Ey*T1+Iaalli)
zNgwpg=N~X{WHIW#(o6W>Uw-t=OV)l%93{<z<KGeoYDQf83U;Wthv_!`kC6lV7woLF
z9Oiv@Q^$FI!(*REn6K(X?~(`Kv*nhjJevK*GmJZSAi-s!#R%hz{8?|Cv)E-F&NCii
zd~?T`2>^H4aWUoTS9$U}PC~yN=URNglSIy^JVXl>$fLW5w7sN>u^u}dX)5p@kqqOn
zTn0uv${qbtwBt@jS2Y^kC?Kv8j6DQ^&;Ha82cH8}Th;~RO$fNhc}eKF`|X}GO4Vds
zPo>=9`66((GBfz=%ma71!DhV)<^5<`2|ja53w%C`_m;~2N1@nHJb3&u*4dl$C1=4(
z&S}z~i-Ubl93=~jI|BdLxp!qV`0L_A`<WNgK*0m_Tj<|;<^?w%E}GqN*$BKYkNta*
zzY)zijB*}@3c_Rk+j^@kEYaEs_>2O@0#zY1=P?_lY7s}}E&}A3jAU+M#pI2$hHs2r
zrQn&l^Gc!kxqa~w!@<K&{HzY5I621=j~*<$r7HBA(k|FR{Ty_A7Q9#vVx+5v#1Y$p
z`&Ag>&B4tw`4HUBC-Z9;IGhU~jp^w<r1K#-?dFYb<alQ(=a&ozM+;>o995vdKXa#L
zqH-rg7K@9IAD1~qdh;mf{E3P}pDsIO9H%#ygOqKN^crGEF3#+ByO&ExwH3r<^<lR`
z&-u5rkmX`%?4w^5!I|HQKm=kBFT&9@=7h`g7qxQaFc|Lkvp8{2FGj=dUC>1M9)Vus
zsF|QDF>gI&{wu~F?^EvLC-;#{T`blIoQy@P0V)D}jwMecUU_a5P}iZ8%<Pu{eb(D5
zihjiKy}YW8o+?xBaIz(M>O=dLz`Z(@H&bDh2Z6^Av4U9Q)IPMsT?WeWAI7=t`&e%x
z@eN<<CzAIZM!Qk06dquu;l!E4R5R+$RDsw>B=>#X<{NoQDi%E^k@x28IS70{KrfmS
z8H!N#v5x;!z8`CBM!rW7c=8+g{BQktIXmsnu`Oplse$EkFNcTDdX=a$837xq@9wAk
zR{1l(gc9M`swWopm&#E0xC3S{{c!8Pck#cU;eT)KOFzSiZ<M27Rq@Y{%X8?y06(uB
zBU^L^?dK40zQ<m~bnc%xf|QyN@5odMX!IuScfsOn5s$q`eq*k#>XBz-oz6JsMk#PF
z8@iQsMao0S*9b$O?tTlyxDH}pF&h3k2LVnyJSzbAUEWS#T@HPPgU=6Wj|?N~a~m7?
zf%<vuWT?mQ%_7d>##0!1c5xoP%+(8NrJFpVon1c!pQDgNF!^s=_}Q3qE5bPOB@Pk}
zoe<(0{>ZZ_=USq%^E;eJybrF0a^I-4yEOdi+v4ug+@LK#gNZX=fgeBeJ5DghGKqhb
zhYt77G9NT$bRK3taHoXN=tRoH$n)@Le75C0;YF+`P%glyTX!APfn+o`0LL#fhPm<U
zzx-dK|J50nA?6nCWJs_{qrSU8)JHWz?opH{GA`mx65}q3do?t;;!nQZWk#(({&TE~
zWIT2<3<hF|_3)3==w}NPj~zykml1-TLWp|=VsFvpYs7iVN13X8a$g$d?&RM%zB{}f
zV{)m_DGl1sKp&G#A$s*^U++&E2qFHgu{R&~2i>i-k@(GL7#9k^_$c^XpE!j-<06G~
z**B0&1h{*La^gYYyUUCDlz2ok?fnV;tK19mmGJ{tL)a(0NbhUOAIu9Hy=*q#(2wH$
z_H}y4*?n`LRynyue;uA*HBD%DsaTVNyoCFblCkqp)=;irkFZl6iGJOk3+dRIy8+b?
z`M(4*XJE&FbMsgxbmFj#tN4YV(}N)VXLm<$p2yC8h%?N?SLEI!JWF@xya2h!(dK4(
z8qCWAcUxl@4Y@_eohvL6cgil9HZ5Mzb^PaQ$d7x;h|9LchvnuEyqpW<c-K^EU@BQh
zjOVR9bAI?w+`UvyxqJFX<4LmGz>khrImCfyuxA>H#GaG)u**A)U_UZq_j6}jRpcBG
z)|N5aQ^^WAPW!|`!GE`2n+HcTsPhGURzU;X*pYMl0;_3ZBr%Yqc*-vC6feRfhYjrN
z4W`e+I^3Oe`;+;^;rVvF@WIH<7o2f7Zk=EriNwNAK({mN1y(KeCj`zi89zx#EDAZ-
zrTh-E3upGJNV}25H}ZJ?kb5@*R1x@+qN;fCszjwS9>eKX4b=j9R#7pu%iVnV!%okc
zszjAw4^31!bX;Du^PgS5M>TMmGk_|^LuWX6=Z~E>=F{bo4pZ66?eC9O>Cg+|p1VBw
z;yx4japQfFid62z;-i$u;lHj_HL*|fzQ9ki_{hI2rJv=&=MvAjSO(6M?Ia(nGWgg6
zKL1<3lKxa-T(1S^gYaiIt0E5`uBY7`?DiYv=JLe8Q-vP-U#e)v&p^4vd}X=kYM-hM
zKe71FK5D+KkP9kDxf>(Sf$!X=2aboRNkpH*bSVBt2=V1~6@&fk1HWHkXCNBf%fmYT
z!C$u?xW_!yQL5pG-$UC4#GjoW<H@(HNBakD0r*uHJ>}^_kDQ%<?B<<XI?|(uwz{$h
zw+BMc%@fsinyN?rKH#~N+aR6vUwV<)Zz?#R=fS;+_`kvM)f=4ajQqzz-{JW@{We<d
z%06HOdRH<FIu0k75D)glPUh+a<nhqH#JuD1_A|b_`+cYKy$y8M)4s!{&*0CEzn648
zdM^h)r!(ID<!9yzxBvKoPF1y#+xI%xQ~v<-hCAi3mw6={%^imyM}I&3=P?>DFwaCn
z@3O9mJ%!0`=+$!a^T<u`pWSV^QQW&%q6(1EL+I3iUI4i2)-MU*O%8m;fpdlEFU}-*
z{GvMez3%3vB5*Ao{;Ptg&d+S>@ozQvJv+W)z-LE45_}F-20LzRA{qbg9C$wck0kC9
zjJ)HCGy5a2EWYQ_|9I}vxXQSSC%$}xR&&AOOz1_)bNJ0J?mvV2#o+ED+T(U7_WQix
zH=MjWZ|dccr*RkR6~XIZ#&eYH;M3hC|Dnf!PREZ<0RQ6fd&AUCZp8f>ejI*op&f2G
z29MoM5BJHt%lC}W6UN0Yn}XXBssZEah$*6-1n{^o`kli)qZL&m^1WuNFpor_>C5mx
znUjVo=*6vU>Ont}5waC~{e$ndq5rcxneAz}9QfNEADY}e#_x5`n&GLzpW8oN4kicS
z2@YjlRv-=Or(3CR1QT7%Sn#{<a))38#*0JZRp3}7RgqY5FX9_r`Q6oTDBnOoSLNfs
z_5hy;5g~PXhTNQoJ^aH8ARRpUk$a&_86RojauZc3-x2pnWgPv@-4Vsg?N&Y`uS*@g
zSnkYa9y!e^n`*S1gWvlo^Mp6}e4Tduh*H-^?}?PpWFNMiT8m##6c{hKK6+0eBKIE<
z`YWR47o`}v{7Ad5{ulf|fF7R$Y4iBLkDWWG-|xtz8wfQ^f89RxNh!qN$A4pounAsz
zkP`Hq#OzZIxn{uO4RQI!$;55%SrA3lMh<bzN;MU-Mk|$&Q3kkM>aoMxstNKA!|QGY
zKF2dF6nW}5QO$XdM?$^k*k?oV*u|^6sg~#s|AhLj7%!bwb?QaQP}Kr^@kOik@i#cf
z2JX7|C#v??i$WWX!L3a2xss>;B5*K?)lHddhaRJeQ|D4I2ERI;{<I@LU61}azq%iG
z?{GU2yX}l0TwNWqC&1?clm`-@PEhVl=_ckIO+0@K^c?<t3cbowi*tj~(2e4~4wOfL
zt6A82Jo|nj$hRhW9WCKIhB$KtaJ&|Na~SQ^;C`eM<dVWUZ-@Uaxpz4n`}7hE-+|c4
zN$5^w#p7_<oty~<CtZ9s4ty$xeg@_4{{1vv-=p7#;8PUqy2{`lx0rChTU}3i7oDx#
zx!c-020tT+e2%8j4=0Y?RJlCxy3i|Obo9}!!3%fJq_sz14Qa0x_SBE>4&P?!&7jM0
z@VP2-cKLPx&c9D;m+vrw{lZf4?T}r7AM1sk&L+Mcj9<M_SERoe?NaD@)6RNb1Ds1F
z9#W6-PMion9IkEyM_t@#C3xA4&rfwT`Wt{hoWy5>e6FjrUhyUFT<YQHsLlbW{lVu-
zl(UaTe;uC}XwSuWkI-I4?Ei``!XCo$rz>OEzTBG;&U@Z+S68E6v;>(N*uf$0K?+eW
zsu_&G>ipzf(}2$(ZIp>ZKIMpS6!1NTct#d@6wA4kROn}c)Ac+!PzpY|a~ds88sjKY
zn(@BF;U4&x-puQ*!Mg(5FEdVG5i-J*fZNCHV3Q4w1#`Z;fd@C{!p}q2UE}amOX$xe
z<8UU9yk~cF0{1XNuLQdDp%=wCTaMrD^81%S$K5pbDf~HHf7jH6o<DgT)xj6e?|A%m
z@<vT5&mVFQFqQT**(Yp_9FjRl*h1kS+P&a!D$n0%J}D;i>8L3|8sC%0(TMLtKD66c
zs3teeAf)&>_awDO@AtT$KNGnUmt()rOG+pyLJqln@8Qv_#_tUW<04r(d$B$hJTd3D
zJm8GLBzYB{+Nw_MyQPsq>-<0q!Y(D+m@Spn5bimsDjhv?a5wEQX0#IHSMKJTCeqa-
z|3YFFqh$y>Nl=4my}h&pKi%2)t>9Z%WFM~DgHL_I$xOx}w~2!SMG9X=l@fm&0fM>v
zmqxNv=3?1{@lzXckNRlhDn-a)HvBioYh4DNBJ}wT9%)4oayfa>^@&+8C*IKk`MtrM
zk_!D5%rD)+=5_e9?xwY`KsL9p`Zje3lcTX4{Lf_;+DQhn%V%2;z7Iz}e?vEi@pXWc
z)y@y!2{QL!#c&CaH-iy)lKIQU4cS#xb%=lZ;RnVeRqkv=-rQ8ni9r|FiN~M3P9A0?
zIP2!Abo}Eua41r>f)4kasJb3J%mu%lo-)8`hyS_YvBTjeY6SMnO+A!XL5>YnC+bJj
zi=iHztF0P1+3~$EdJL73Y834-lY__Auz-oGBjeG<oBN|%5SMoAf|Jw0NtgG#1bj~c
zA6l#K;0gJo;HtyXC90`PW<@d;yiZ{K&rwc3E*?^!`aSTU+o%Wjx_wvmfe&sN0JpR7
zAHD?FLl}P>;m6_gPpW|TxRDtDK2D1H-j46la+>~y;)i^XJnA8@Kfraj+i}@b?$7t;
zssu~D$onoI{<bRPJ?^_?UZ@7$oA}|bpFa4#?i_d^extj|FibZBr^sa2ZK!{pe2<E(
zvo!eJ0Dj`hzZkB{b5Bhs_*oA;Db{X2BmV_F6vlsj{M0nYcN5*kBi})kyXUpR!TOA}
z-ns+hD~R*kWjv204$>6=R+ArH=E2Wt`Wo8m554*x{G88wZrwdyyEs=j_6uvMs*LOZ
zDW9RekV|`Ty%uub_TO?p_WPQGE6MUKpGC<3Bi#o&0rHD(smjsbD&3gzfxYV99y$L`
zxyyTgM|Yw;fWHS*>anZC9=`lJ&(fRn`{dWv^2q<3?g(Cy=>+|1;Kg;_8T$z#?%aX#
z%HNLw=hNW(A>|GygTbe0_znU8va!Q3<H}>X7ti6|S(^gRxqJ1aOef0YIcHLd@uArV
zETO+i;B|fU%MHx<(LwN6*A&w(C)U9&x9{1D__a6lahYkaTpoTa>N`Je6!>Vl=dqt@
ziTz))-DoFCs^Tv<0>8ZRs~ez4cMpfN+Zgic8X?aV{!U6`>T?r=ads2MK42r-Pvv`i
z_>YrywD01&YfMx4567SGhW%CmpBuwp9C>u@s27Ic+zFhzY}cB$@EHR>m!S`D;vBu0
ze|Fgo&~fKMjv5y&|DO9qr+^##>>=hUmp6OIv;uX;6OSHCe^1#P%p3eQB+4c;3%v@y
zPNsf2Mob~|QzRb4479KwiyMPp_Yr??hdsw3`L1}N^XWqe)t6mGY=(A=DDTLs{%<g_
zt{TaWiCNMDeqHv1^Gug87Zs^)%=?98gjHkxR!CfWs7z4YiKIq=!`!xo-kLI#{DRdE
zMrOI-`DOAkiqTtJD6N-%?BA7uRNSiyZinI@jRQMRgWLCr(Bv~Ne&%i;FYNshRti7j
zi*F{gqlEEz!OjGCYe9JpxpkTF^cH+`o}T==TyVEJdwu&@rT>OM-9uG|{wLseI<0<4
zoV^36y`3Gi7~&_}K)z(DPJ}0v{=7!?xh47&;v2ouON#6z0^D3C;%%3K3tsrm-T5RB
z27kF4ew`n<5`czog6D20ojc}f-~Anr+Uh*#FXF&QhfArdGwq~;&z*T*6+9k{Jw$-p
zov=GjWP+z|eV3z#fD2))de{-<vp)Fj{P+_5Rk!Zw!+hY*lQ#$N@_4_k>g|~aC#Y)B
zt0evKM_pWPoSMXU&h<dotq=Nxdl~Tcgqp(lI5ap2e!3Eu9*(~aj;P7>D}novMqqzk
z@SppGkA7IjSjwZp=b`Y|9XuWeo}IPt@xJ4KBk}Mk<oz-8LOAQGZ#@2b41RJ|@Pud>
z{G>1*e){kHauB|g;A^YuhfRcm#~nO4d{Xs<?{xB|`!G&}*+1+Cy?Ejk-5G!W;CC;|
zA9BxhC*;EkcJkwbv7>NZ8~w7AjUVgI&*kU<{GI^L4K`BluG(bq+Tm$?{WO1lXN}Fk
z|Ln#H$=BUIdh4P4gMX)NE&R)T`rktL0I$N??`uc<G2r+R{DffQ&OI0}LEKB%8oc7X
zkbZ=9++{mV_v3wk;>-E)-;Vn^M$w-H_65gM?uQ?p4{o#tho>XQKf&RD%ctvN>bD|~
zV<z=p`rqX%wfoBeIpj%q#lCOacd76E<R9U~hk0!|{JMRFPxS~7uKq%M?%w<N^kC}8
z5qIv#_ejnQ4yQfx4D@L1SF<nZ?AGlEj)(sk;v6Hun<%+Td!Bu8JrukOBoDd+dPyd}
z+!OhbDMY;0okRCAZIGuw_f_|SJ_zc`Un|8=Zo#;6`-9!E!yNMDHi9|r*e7&8SrE_N
zO}_QGAL#{ZJ;D8U0~qIqzmHYVGhTX`FZjleff=R>S*JHK4Joh8`M{%aT?3qEM;w2w
zov8(Gmf{y@VOI$<*bGs1!0n;nwu=id#6RZL5$6cILMKw5h2In@;#^8y`kO|aqZ@k3
z<UXTO^e2dO+~sK}h5Ix5d*t}O8R4;$kIYk25v)9FrXiQZ_7k3mW0zkVcYbdUe(`wT
zKL~ESd43@A=3=OQ0zSWpJXu+q?u@TA>N)h?B=^iB^nA`fWZgk@6B#TA<(~ncpF%F~
zUVXPtbAQR}1xCNi{eA)r-OUXs<H79+>c7t@b!(2E_#0=r`LZYSu8Cj$0+c>d>;+@b
zXV~Fv2ClZD=kMdSuR>;>RB!g<3P6q;_>l$dsPzLCBEZ8D&}qVo+~F{HlHuiyMYfH>
z=aZhe2e;yalwZmKP@o99W%$oy;nUq^ItUN89r5as`0KT~7knn|yL(O+5O=5v8k2R5
z{T#DTdT_oyxVn?k)}0-sKB_wUS`EIsa~_`)%ON|JT#bJCkKTCM?a@a8QH)^px{w)f
z7;@K0q#gCEgV!nG`$Brr6FlHvV*Khfy#EulQ;7d_2VCZqD!pk1zBVGeqrP(Y8TqOn
z=rIGlZiqd$qh5XHw3<+BL;R`=s6PZcH6%%m$ByzPOig6oOk`G?hdqRY)8pwMH=5x8
z=Ah|{>M75-XhodE3w)}<^IWu98#?Y@{Xwdhy3764b*b;p!8HeeiKZg?cGTy_b^KR%
zDzvYffKGG3-5!i5cYkR=^v+JO;%|KLd@gwJvcVryqp=_E1y-}5pT>Pi3!vABxb!pN
zG<T5l+{OQwtNE1oB`)p$3e|DD7Myl*#cw_N9c7$58A?BV$<r8(oZ`8MzpB!_w+S3C
zW}I(Vk5JA^8GoBwayU;o5nQc6eB*I&gZpHZi$`X0ZpG1MzJni!GyeFc-o%HE9)zBl
zA@$>o50|I$8*h^tt_RXCnQnRtIL1j4{TJnRi5m}vZ~nF<IO_cTrudWAer4-Hr@*Z)
zDR*(j9<2YpiI+9jlT~eyqPZT%cXy6^k|K+Y?-Ss&5`OV4kN-Rl+;jI~G}KRd;u}N3
zeHR~o0{n|-e2mslpdWW1=rRv(Pt)Cya~XI$hra}H4;)_ZDW6V#7tj9~zJKTM&iqgL
ztGX}cIh^wzgg<nb^C?r219wV;s|I_06aK1$!=Gw5)>37ceun;4khOXYc#%vT`7!E8
z;U7DEjwYXO8a_}U_lOQdzTB$@p1b_v?<sfw_62a?#Rn|&k6W)@f{y!p9ao_5)^Xv+
z?dx(ElyNtg2TFOv4m0|UXa5ZEfpdfX;n$yYE4{Ige5q}`&_i|3cTd2N3Sghm`5$@Y
zWq9L1*5driqpa7;5#Jc(X}7yE$fG{*O@^O~#5w+*Uj~?R=%FU}A&qCepXcvhbpn^F
z5r4iZE}LwEnX0ORKSS}O-Fdp_h)ahe|5;`%`qk8H;=%1#%szV82Yeof-om&SX$bvK
z0;hLlU1h{G`tg1S@s8=9{(eFpv&$~tZ@N%F8UMO3bn4(gFXHEP;?Fa{wZnFuDL@ZB
z@sI0c^=rx3coCeqWDhWpq|%?;hTBN-60d;IRq5{o%H7S$LOj8nY%y;<A%{!c8%qBj
zKEIB%96rAWo&UwtUBGWuEdK*PL3ej|cQ*(qpn!se-6A5Q*kWU0WA`<%ySuL4jn@Rb
zP!SbT<ow@r@b|y>CC~SnXV0G5+1by|&d$!h0H2r9tH=?4E4wt-`iH=kFXegOEe*Rj
zw<C9+)oBknHIANG3zzuZ%zoM7PA4>iZ+no>XgJ-ETn6wM?5ggDA_R7aw=<FhN<ALL
zBHhI*xij3G$X?wWlC-4nW1;_^c7=w)&+XXrCqanttM4F^ZZB5h0xR}W=Bbl;Mq@p7
zymn22+h>cV>&Y*DW<(<3{{j1gRg)w6GfptRez!lkk9Kz#%N+3xd$E*{pjQ=H>tX*5
zWaW3z&-=jVlZ_kkyDZ4=aP}Q`axalwABj+?M~W-W4;|>)k4Y`h&Ewy=6yGWQo~gbO
zywPzB;EqW}xE1TcpLv2~KB}L_xsn6;H%|Avh#z>_orF82BYmI3em<A1%>eMq>M!Y;
zF2RRPzsmQg|J~tY;Nes_TqTir#`jo{k4;Y|+g7;kmqwh^5coa@Zcm1Dk)``E{)IwZ
z+MM)6{Z$`M&!;B?-EUi6`62N6q;x!a9RTlx?jBctY~D*x(8rBPZwWk(eEhLLxv$Tw
ze12P;ogPRIgV@KPYvn@m&4-Xf<QqLfdpajiin|D(*M02rUfS_^dY<Q+%AYaVe9@cz
zd{k<7vQK!re(Az*+|oE5CT{L{eAbTq`{dgXE>~23qPUDD?ARva?v{|N-0|63y)lj7
z`EdHskstaZxZpN5aS7FvJ=oF57(cCjzY;FgO4^E}s0q(U!@Uadx@~p={A$2&F_avB
za!zou{%y){aTfh|f0B5JI3F-4I~%^b`!hQSU1mCVxz&wx*(aruxo=i>x&EllK0Xs)
zqr^p@rCsg$H?CLSy<ypj`cqD~>@xFXH{UO$pAFg1C#rWE{0=^9C8uVW(r5V^(M1LG
zKc9iGAKgEDHo3VM4?foQ+y!v9v-jR)9*K3$v*wEm#`CrCx_8K*z1)Eu-zn#r8vGqg
z(xFK&_VY31+*Q1D_&qwwH@Xa;wZ&(gQNH&_a*1;_ucJpM)9<x6`fFophd1}!iPo*P
z;q$r5HxZu^c({}Jjb+Ap4R-bZ<$K%aE6GisbM)@4xkh=|-IL|#zX_L@@qb)qUG`Ve
z3;soX!!~fQCeLW={DO2gf6KJ|MDl1D_q^)I1<tW7wXXdY4xg+aYqO7kQ_l|WT{$xy
z2@mJO#o#CQ2P=~2Lhl`}KNpxc!q2cgZ=k(0@xtx!=Nu3J!+yS&U0t0#j?a%_2Ug*S
z-dlfkb3bfn{npO8!qCHt;ymWiqoMFR_A~3Us~4wTadtC*bmT34Ie(fqESHb&R5%my
z8(SK$Gn_-&Pj9~JT*|q|qaA4N4Se_`KP#z34qxZ@lE0h8^0D;0nZLr{zP%N}dB$Bs
zBl|?{tq053lF!NBcsV`#ihm>QlQuYxc*IpWJqKT3!RJM3Pvay!uqWH)@17iv@9){q
z^YJs?NY2uu)Y;UCo&e8BC41@TIZ0oB1GnMo>t*S#)^XcIhLD@H;r6rlC9D0|8{Zdu
z@O8(!bqdRJHdLCzuhO4<P7-+)aSQvy$??$kKu9tQ{lw%Z`qaX{U=R9n4tzevyjht?
z_(uH|v&B5^lNXGIx2;xQTjFlqL%NUtZ)4Z;O7dM-+ynn(et#R1#XYEZnoA;|;Y;v8
z=8Lb`_tpLWw0b6b?h8Klh(lg)U$c^ah_l~Yk>>YS`Vj|THT}uAeMt2-`s3sVi!}Uo
zjnjzRmIExkoE)71gp09%D!PC^Oo!W-=)dN?^k>4u-pR<cnfkZWYd6vFD!fr=8DAaY
z^JshyRz7Zc`l}d&9_J!=<TtL6^iKw-m(nYKB=bWn{2wVEJE2G0q*v1Ozl)vVVc_sd
za5%&Nj_H5M$NjwKhsaxak@%8Wht7w?GtF=(r{}P%+gcgLO^e%-%9Z5Mko|lv{@Yne
z6?$VE&t0ayb;U);dwa5%>*G_N?err38+kV_rT<;!$ylsiRrK~H;_2%1PsbcR0zNO)
zFY=?OcbJ3MimzLuU*z_;A6d<KdL_M+zw1-?q#W+Oo#1XI^UjQ9ReGCwf1P^(jw<J)
zhFNubPBQ83=`fr%fin@e);Wv)J|0MT*wK3%!`HyQ(b;dt%NX|X+4Q`<xVxk2r_~$$
zuL5spz|+VJuvK=Q-|d95t81B;=CXs^lh1b9ZR)Qlo-X`2gWThN1G<?wyGp+|cCLGc
zcGz{v?$N)E?FU{{&I3!bo9Ndxcziv1)RAxQ=Cb^;*|naZZ6ELga{Q~fN<FbJc^5mm
zI{jFR&oRE=0q?`V@nCkc_d6-zd*olbiya>Efp4*cW4^iD^O0})#q4<Xx&w|KUX30-
z2WR_{<Hz`j^CQ1lC&anK57a-@d%n%?KyNC~-Qi{XR*~Zb@B1~og8Vy()96Is#`06I
zpjST^71^P2O05e#tPGFq<kuUo>x(A*uicazov$@I_krhk826Fq`3!p7F4;Pd^BJ=}
zcLlwwCa=Z{?QH~~yU>Rn+0U22&yDc;Cj8YFuMv7ROI%*)+n2=-`8CQngwO5Baa1x-
z`Em4T9=kjCv(DmYsYrhh&CfPY3-Ql!)5Lh^xhJbn*3JAX@d+6}^un^=@8IvKpnd1#
ze``2@oq8IR)BSn)<KsN{d1>tT-N)V!|M?qv^|TTD<WY8V+>`ih9{STC{=SaeOPyP}
ziodIdxV+7J^`~<wd=&B{J(Whh_eOr2O5~ovt2mAQrOXL&{wtLkEbv*Da{W2o?^o&H
z--=2sqUc|__Q?BgoLx`vf8yUbwd`MK%Z#51>OPk~{wPC?oqu+<oI&=@cQ7a1Vf;S^
zpN~%a8_xq-c-vd!_Yr9|&^;SF7}2qg7|;JPIccT*#p-&=-Fo-Zf9EJ5$rSU#_VBzN
z+&(UO2E8qO-i3Z#k{rtZ??O*blmR~Mr7Q8@(FulS@VX!UU(S+?eT4h);`D)?{HNf3
zcx!G_-ujPa80-2=t*{63FFq`SEc93&I(i%Xgb(V+gjf4k^kxxBqMqsO>$~t(hn@UL
zIbMEa(blj6_<+S2=jMJ8ksJFdmD6_Qx&v={ly|-dKS6KAzO4c`HYq<EPS%1eQGZ9c
zS{a{pe2??;ZPM%XcN=f*nC8ZH)AU|?F3vo?75~k6eFA4@k=zr;McjLJcR8--vZrJJ
zzngMXjO%UEo6Gtar?u(ZZlryW{aO1e=z-^~4&`@Z_1{50nH_?=(49L@n<zim%z3+v
zcNO6HJth5G{8u&34o`1DmnXt|V?Mo^zonY~UuGVOn_pI_C*=8j+8v)$%=#gpj2#{Q
z-kkq=Pvtw)wuj+=#pGS>4xIcty+%7~Dt~_(8R=JPN6765^Fu9if5rDP^z#Sx#d(C9
z_SGs|e|)I?F2-XDCnK(`Zs~8fQ=HyNUz|M39w_PEv%ci72eZ4`ft!j}*?na_L$dCE
z7fGJokN&&+Ua!YTL+7}Asehz+ySvJM8J(S9&R@G^J=08ojmhrRk46rBihP&z)n5sH
zuJ2ay?pu9AJq_UR<DT2jz2VQNy~*Lw?8&md$7C<UXL~&OtE+vdW>1k@bLR!mG!GB(
z+!gd%oJ{sui9dImH)<u_#6_>rE;kvdC&uZc*&*cA(z(K0(&6NAPxg@aHj?*f0es_E
zH7`V-$=Bh2#8<51->FLe&zcV+F7AKklbC;g%#Kul2e^I%IagsX-%_@>BD*sDPOG$c
z61`mu@7pE)l4k78D*TZf@XxDP)W|P4pE#kGw@edyULn7WerD|DTg>n73C-J(_x6$h
z4!yQ}tTyZCmbr+1c3-FbMz~sse<S44lzn`l@|~R%yuKV?)AJ{_v%dI^LF7CJuHPlT
z=sWj!-=V#Y*})66s}6tmz3i5s#c`ZVj$PQni_PQi63-u|C*K!m=T~b_19|68gaaML
zi9DSiYP{c&|A*dJg}+voaOLp))pWk~{$_d=`-i8JC%nlYr4KL3FM1=~uadl%xAoow
z;Pw4*{%iT4!&lH&d`4a67mL4pf*rEX{oPl3Z^L9gIaGlUZ|2V!FBRC+7t^!2f#zcM
zg{S(FG%~Gk5|LU(dtwCEpojmUkI}9wR+!Q6jjen|328~-YU+(Icwc047-c-K*OPap
z1L(s*J3-;s;&~&lHHnpgdcL*~c!z#(V?{Zi9(+Y}UsBJ<{LJ^0e={c{k3)aRjPbm6
z#~1D&d{jTS;S0Igd_Rxn6`6eBmErzcdeYHO+?DiuPO^9MrTP1X|4+Ze&RN709%rP+
zI_MI(8+v#%%d{@<$EC?f=1cd8CC`<7KA0@@>BgSqb@S^l?ty)()Q6S)COP}!9+`(n
zlhz9?z{q#^IXk|(as4=)jT>-2f)wF@`;C7m=C$|W`3_0W-|?Dy-NkD^EBu_Fkjq^4
z6n4M@XBwnOqIVUwcz20owbGZ->+$d}pl^edD(M6C(8?-(zQom0{1~znnGGK)@oGRC
z?{yc0_>3CtjgIM)#;H86>1(CFZ4Vcrmfg;$$g>hp(5v`qXxBIP!Db~R(udN8=zFOr
z^z)c>TY52>{rq5QKOCx_@avv#zp$qI4{}554DDK~o_Oyy_T#FXRgc1FWX8NCeWNUY
zz5T)(+PfScP9mS1($~xK&+*?>Nro$T4}VuBz4B67{uTR{vWl3c?;z(!{Lyb3hqaQg
z(-YJa=T7cmw{-OVeb3imKQG4b_~cW3jxpYUO5eq|j7-+?RgKe6wRcx`T9&<9j+eUG
z>F906QHMNv-m{m>@;yttaj5)w_pvW{XtVdx#qaa8R!e#%4YS+vU)R3hn<ae`9IFP0
z+GV%0FIw`0yrJIe{L*{ie~5cl?$qxs<cEIK_nOJPY<HhSoofmEu!ViZ*YPdROFh+%
zyP26CQT=sz7V>w01UohIUhI!P*5|S;<kQeTpA$ZC`Uv#V__;*8qF?ULt|p%r?B_>~
zzqn`Rl@fm*GjBwG>MP6xTbZx#h1;?2dyoAb>%JS!J3AWZFJ@00N8R9Y(0hr?y8|9(
z&JjLm{%y~{e4_StvEO(<`Dgsk|78z|?9aE>?y3ABkJ5+r#UDN+Z)aWmbv2FR4cWu`
z1+wKY;>QVwd>EYDk=^|$xjE5X(mTqRvAyyl?b*TGChqpjUoY*Y5$dm@{=V><w8d|{
zh2B*BbIdPeooji7{aMTXgKw1c*M98H(4TSnJ@BTQd`Qof`nH6Bxu$Wnef~Ilb?3RC
zE9sZ#$1C41SpaXRmN@*Z{;e*r{+;C965hY0y{&zJzSNW3^1HpiwRjIZ*78EH$X_V+
z>UQr7f6`UHcY@pZ!}Z9I{&qgyd*s2*UlTvPvUtJ#5O&h4e5UtKch2xmc8Hy3&&7J@
zk9>m9+3t^h9KEsq!UyPiEu1~*bEr3d$gcgnD2k8O6Pe0x)Bl6)2!EKyO=#6boW}mF
z6R2qm@{~ayUTzfs{I>q9lXN$7dn89|!7KD<J$!zjeCu1`E`^&r=%J6bD?Ed5;7|T3
zBa}6$M?OeL7%y`~P=$RGH<R|D=i4QJ>z8%Kq3}QU>Gy!sL*RxTQ1fxZgZz?yZ^v$a
zD($YnjzG7&?=It4yf5yco<ZLaNfy9ow`jP-=+&~{Z%K9{&qKuBtt!`9cX~eLc0aqi
zmi5K;cBF>eKfRY79{8}5pQ55xU89|0m%b!|ApDc7%|nrg^H%t~3w!TV?ThyQ2(juK
z2hZVSjySKikS63-*}6UaT5oIT&St$$$vfoRn0*}UvsU6fo;OZY-#=1MU3k36`?q&m
z{Z+WPspx_p?;R~(BF-;Vf}4xk;r-M1l#BD-^ZhP|a{7Y)tqPw%Dfyqno~@$S`lT<z
zrN(wQ|5uKq1Nc80kXyg9o;ticAFH>HUR+8(ldV=h(ymX7qu^xFPxoG@<m@VZTE@4Z
zroNzGt9*5K&R)tNs=b#gAAa2%*zGmce-i!zC*2>eTy4D;I5~`e<o)z${6CtmM6ZQ@
z2Kf&$OTSXCH{VoW#4~)&58_lAX|GK8rzhWef7mB$(xdUez2`oGdtb_9_k2lD;Y?lg
z?3>!TkN4(qCd${&9wCqB@cC==*J}4`zh2UNW#@T+@8oy<EHtiLWFMec6|Zr=_jVW0
z@iu-o*dP3?9H-&$sF64^m%T?0JmA?U+9Pii|8nrzCtGPfQV~9Xj2`%WCi-B0khj=N
zneUs+ADEq9mY=Tv>DD!e!t<CP&w!_K4sLGtk#@Nch<zP?n+wsyIyyM}v>acT{agNa
z_IRxO4$Hn^C;VF62A^Yof1G_B_ZFU`o@v&(4|#v^^RD?L@*|$F{CxWSTJ{2aqiyoO
zc_`w%9?n*gkNeHDnBN=0<NMXugk8N7pT8A9X785vNoDK#$Upp^=l3xl*PEvzZm2H%
zHnKSUX8xON+*QxthVvcZZp16j5GVa|y2$+A5^jaRZg_r%dWP6Xd?r1>IOvzJF7<pE
ze{oH?)P;S#wf>lxe~Ulo;`1}<#Xz|Iq;W0ICVxjgec|av`nQ*S9?$BRy8O|fmHZ#Y
zuMzz?9gfdc&#`&T?=_uAIe{ET@o(G<w+FD3*T8{laCQYb_Y=?Yl5sFz9No|8Rr#Mk
z(7raFOS5t0@PG8%;dS`8Zq2{M$NJ*&{PR+8-{Bvx3;iz0-_)*Ois$pd)$li;?7b7k
zcf@+O9(($mlAqtiqllW9hxB>#dvkdG8G7LJ2jtVAhX0gyRX&l)JnZVaB@VST;wol?
z?E7?pKhZ8H&QrHT`~8=+uXSs8coz0dSN@N-P`8DV(@FX6|CZm<4p3wta-xX+Z{NfS
zsROSj!o`koU@*%i?*EcGh&*FQ^(A)0&OA3&vbeAAaQp<`&SqIn<F7a&`BOhvg3p`j
z^UW{gtRoJU!KO!YH+=pz4ZrAZ$vNb(4<!E0cqwF0{kGK4o6sAu!_R_`aWnKCMtsbh
zcU##vOBVCcf2_aWkyZ5@<!ji*yiWhR$2s|jey)`N`4e)i&(rc5eMs5kf3ZV06{~#@
zd**BF15sD-`8V_OSA~2V`l$(=eM0*>@t6EUPrfWRqKCavJ$>7I>he?n!oK~ks0nXk
z9oLqhwVn8)JpBSa<Dd9W{SEmemiycZ-mik68{HZGbx9v@|F0f;t8|TVZC@+>P`$P6
z|GkZWC;rlE*>t!x8%_p&0e&JMi)h|#f?l7e{^-ZebUPerkq!{|vNZjkURB|zI8Q(9
z!<)2*UTu{3=!X)ou1asEKU?ws|70BfP%KA}`fufLXlS)_Y`RIm{a)PWx$yTs&u*^C
zzC9;hZ~j_aJf(chgD>*e)M5WTsGfds=C$<Ovi!IFE)C4;x20>zxr*NUspRwf^jiHa
zBd>aU)BikMLocc%MY@6?Xog<=hM#N``i~{O61=Hv{BBCu(chnoCfVmD|GoGLY9um;
zryI;`Yu$_WSvih^UNh+ihySLxpE^ev@~O@r{Q!9nVt@Z`9981)2)(KSPao931Dtc&
zz&=d)qhs7v=YPImJ%i=z_>Ep|Dh|q4m;4``-KCzv{2)K+-=B+P@e^`BJ^PRxSTtFd
zZ3zdKDIfF2^6XjiWC3Z<NTc#9pCQki;AVaJC!SirI0r!vF>gL${;}IBU&jXPl%I;n
z@EPlici?N3za*=aZKwSEWxE2mLmxZB^?%at_SsjJ{MSgnHqX>%pE&=mUp^|ngWoaF
z*3KXE*<C)5pVKA!>#yv4<r>51+sQ%ZQt#V~JnQG5;M00AKZkzq$R7VD-IxEcRsMZh
z&oKT4S^44n59W*N@H*sHgI)cI`uf4~Kg^FG6<fip(5u~)uYuk=-)LU=l%M&3>Zvau
z(zD8UmB(X4dZ6)sbp922R^^X~b!S)SzJFyO{$3oHe_iUsxp~;1BiY|;jHC7T2fr%i
zd|}D|aQCHrZC-J|Qr;*Fy}Dlgb|st}T%|v2+6Qcyg*<Q1H<4#+`20m0aVP)HE0pv%
z+1rinKi*_snS+mK^DK*Yz0Lp92%bCw?<248%KSU}U&ndDv(&q%^NAmoxV;hH2M(vn
zMtt@)aw}wUE;ExkCgjj0S+BpE`m25>>Pt`LCB3<k*U`z0&N73=X%;>L@2zPD3B28&
zMpVz*$&D;imGZH2`5InNLvN6EfD_Zu!~ZtHj(cZT@>V>9b+vz%-@|^GEYfxXwA|lH
zHS(hzB(qrL&DbVcp3XzR&<^|R;s-d~EDIc#Pg=j;1wXrGPL=R)G*>?C=STI+Uh-#D
z&Eov#9XyO7pBwoj#FDUwYh+#E%w~FZ@fz2dUyc-K$0I2()Bn*Qg3qm>{|z!ZME>(f
z$szD$75t6(`nz~Mx2Koi!s)=z)yntaKUtY{$~wW}pUF4u-ftjGr{pkkAAi8z^~LYz
zrNH6p>7Vqgo_OgxS=^FPJ^dTsP5GN|AkU-uncHOb+1p*yoP1b1?ClO#3w_fj+ViP<
z2n)~GVL#tv{O%|YuTnN#`5EXj?@WbLp`SgKAI85l3tom^?GOJN!G{rPx2z{V=cXG=
z{Xd-lBJv&Wl(y9Gf574T^s_R2e%SoAkLMbYr~BH_L;ufAA6I_&<dn1#K0hihFb@Pj
zVJ|o5hdep$p7r71y9qyW?(BB-W_tOiw3GgQQ~r(3a(gj#GMiofSlTX&Y`1TxmHl1^
z9>1*qx#$hEuI4}I1+}|s@;y7du~}Q*jV$#&&te=^)=RIVLt6Fp)ZRR6h)<d5*pG3)
za5cEp+&FEPRW8R-&+JR%y_@pAvKjni&HWyBPOt1M^zKQAtO0#`v*=&atBTk7R67RH
zvJT|=Q!zQKTFPNATo0V-owdtG>c_cR4g6Gr&!3>nTadNJ|BvqXu2|9!g2VOEcffZy
z@;_ER!T(9{tA5gjAEcUkzJ||LvWUYt9}YL+7dj%Vla1l`yGs2b|7+RDqAHy)te1_&
z&)r!~@~O`MyvF!#@7zlL%=v)g3GWSkcq{u$zjt!3rAgMG-oER-C2qjqI6wCqdQIiO
z%l<6&VO3V>m*)H))#&GU{LC9l{iy?A!>{sm)-u}#dZh3#<W>)EH-`hC;d?4MS7UF+
zzCdH^^)~b~%Y*;bMf<!We5m1k%4+iL@0?46Y`Sr=WnM}7z~|N8+h1H>bLBr5zwviD
z-)*V>mT;qEUWNSMD(TU#!{DrFcKe1Ml&{QAu35_Y@cb9_F7_2WWP|bZfAn+M(Y2GI
z{Mjvy-__`~%JR$6WsUWG5BA=>?zj3!|25>7exH2CiI47>wPwGqfWu*@-H@*|?#99A
zP3rrNAEOO<)=XYAZ!|-{Ew9hM-B7%eH_)!S_6<MKkMdOIHQ?k&;_EVcUSC|@W1imy
zKL5&3;J(JZn*IuWZV4~@@IN<!ORaow>%EaF=jF6J>r+l$<(yNK*(-3nS@L??1#0$z
zKO3C-?k;mo@Y9?hp%XkD=0>)^#f=6%<UbBxcVW+Tuu|-RzX?tR#JF=8N!o|kVwm@j
z^OhNKF>q`OT-^`qEl65t&CJWY*<p|Uk*)2&?TfyI=dTKTL!?d?_UAUr@5_sPR_xr<
z+okvq|JYT@TWMQ3dAc2*2IleGM7V|?-kH1t_l^?Z(Z;xF1h<Ej^n0|oJ!}10^(OT2
zULMG3*M0mVc6puJu9y8%{7+u!PUI{S9le+O-yolw#>Wp%o`v6L6?#wfFSWm>d0dth
z?XAaO{c0Mxy^#g?IWN*W?TGlFI%!SMbrsjqi5x^!!Nst*+oW%ld^SqkYge13OWHaM
z`@Bv1dWqXk(`MhvKRFD32Yn0v0H<HYb42@VCELNjXzy0+@21IgzmG6qe9wLkdUZHk
z8}1C@j~+-*WaLRZmgNsmzob{YvZp)izdHOAz4d=}`24wgtVhrr(Wg_=uH;agw=3v-
zB^RTAU;JELf&Xalt?)j2?b5Wr@8&V|(4X7>r9b9<--^qAQoCXvf6@FCc|u=Ldys$1
z3)Z#d^V{@y{LJS4@24L-CZEImz@_im!A;QrN;_m97w(tOMi^gJ+0Vb>XQt2Io-g3@
zh_ZYg_GG){Kz@!!+7suxd--(pIlQXJ57RW;k{n)v+d;3c_iE_B9?q3?$O`$#`eohA
z@jC@hhyC2s_cruz^1t*s=&@h6mG6;vbgp^=Z|7yX@`J_Swa)B{ITzTq#M^_iG;64y
z1zCUc+)x~y^(g7bDc=bGAC+~>+S1#Tv)(2BB6!&fpGRjS%{#voS7)uX&%P<Uxf8qY
z?5r=`_{KiqW_do8)grGh?icI>ms01sTbBLxhTmm5<bUp$MSh{zv(0+)cGd>HEBm^M
z=X&sKv@PZT4LiQE^6zCm;ok4X-|7kZuglhZZ*TbCA=|>bDS@LwZ(tqY0nU7*{;u%0
z0{b)M*@zw8-ukXyKEiue747oY<Xca?^IG^hS)RKdS@<od!ME_+kLEY%$WG{;4>F&M
zkjrBnIBA96o}Q2OzPiTy^pai;J~trG?#ZOQT^8{u$L0e{eK?Gr+W>m)Q{rlyl3u~Q
zc@8|SlMLmL?#Ukb13nKb%b$;47e1epkHg=$#pUoj^!z&XCQ$39d@y@pO>u3>e|>g!
z(6^9pw0E|#_@r>ZpmE(4J}3Czg1@A%d9ay$qBZD8WB!YRK2P9h?nv+dU_aMHZz8Yg
zm)^UpxV#qn(Yc?ziQnte;J`oU!re&U);v)m8A_iT`~OdAcXryZ#o#RBr5eDoz>Ov>
z4bihk+}P}&;tx1Hv(yuJ|Di8Rwt+kUEmoEE7>{Gvi*b)hlVlv6{@OW|cKC@*dTaT)
zE`bouGreO*98=P#u&d`m(LH(c+R=x7?92q-ZjBx|e5jF88$KUyCnolnxA~WT99(V&
zS5GO+H;07ZrX!Ne#dFjnr#L4#s^tG}JNRLLo@eAm9^HGbSYv&04|+T8dOR7K{ZahN
zPM$(Owf(-1J{%?sXy@#W;xqn@aV7qQ{oFG-j{jpgIW*>f9#hg+|4U!PV%k`IEqnGB
z=;5#4Tt0)hD~oRm`8TvH^3+u@&gXf4NH)<rp(Z?wcFD(4;&X#^q;Xw`e_~kKFG25M
zei@qf$v!B4F9yNgnAf&QGvj(^Q85FuADrr(k#1vL)NuMH<iCq}jHy{}yv+6)^!?Mm
z%8w8qJ=Az`k|do>4z<K#R42#X_@hTF-zr%GzhmAJ{f-{^Jb+xfB&Vf=%JwcxtCn=T
zwAm+ikuUp~ek<H`TPJ*;NX{L2r-pj2Cfx3jMQ=W>Jzt5cw<}%JUx2&yjEmRP3FKB2
zK99g>HTH80`Zvq$Inns(oqVO981LVw&5iTfdS_VnqxP;V%U6fXEzoDMqlaX_xwo-$
zHleJiE<9<=4^|sqZzxDU+rF%)9^8xD%|^npSL6w4k<CPxuLN$^NP5z?=+}z0YHW!=
zli^eyc-tACcQo#&z{{{3=fLCI@Ocb+6Z*PiNq4_7oQ->Dx5ZD~m%Av7c2xD*mHY=g
zUl`BVhR0J&`f=!u(ND-i&h_~_CKz8e_#>LboiT8DB!2pc-xyxfFUuMmS7V${8KwWM
zx4kFUFAu=SxbNsc$`3aB9?6E5{rg%rSicN%US*qXXp-9>9A1|Hz&y|_>FfMTeD>jo
z2)R`kU)Lh*?E7cRH6@R~y(jqjHLII-H=q8PZ3R#MDYATIDd&dl%DDf%YCg%lmWbCF
zUAC)HN#B`YV%zMm;urWlpwz2o?B$MdyOZzh=)+Jr8Tc@oUD*MBTt3!~0&W1#xAb0l
zxl8(v@V0;U9(!P#_e6gk=6eJ39}WNe>6dx=FmkGgJ`BB=d>q5!e{b>56V+2w+*~L9
zGE`jNAo3d`-fnZd&WD?I_-`-G$K$_Nas~cF4%eXvKHunliFr40I^=wP-Wh$IIF8}y
z8`;mHAA!$Z$Z-qztqir^?C&0xe&zUmUHN9}dCm8@SN(mTabEQ&_#Js=Up2opVu$>i
z55#{1l8yOnf5;i15zjG2Jxx3zBZ2kxnqn-R?2MC1-WwUl8<`i6NGc^0w6}_px3&7?
z<WXbv!)e6u(vBJ+qQ8_g|3y#HZmp78@NqSK9$B_)g8F;&_73uWARL|s&%&;D0y+zP
z-pliC$YCGvZ2-q2qk7=;k?OCC%fm`~Ai!q&iAGY`AxrTe@mkj=E%fg($+1RSN9&QR
z%qI~CaxZ$|ty@dXn@z-POey)l*F4b<|AE&V?S?+eGK%#}(4+iY_*_XJUeMlX*DCb{
zKCkrr#_-PbB*wh9`d|7w5wd@YhWS}0hp=xq)8*%ZcW)I=hM~uPPb>CrNBfep`_U6O
zbHLA-kJ_*+JCJ`fIGb3fw)eT&e%?%%)hK)4`lWxmb=falus0ji=i%rn{2ZT7E9E?e
zecju<JTu*czKLu`4;*%ymH%Q#cJiofO;K0$$_(WLpZn9F-9^{z=)LttLrziN{h9DQ
z@cGbmmhslye&Lj|{>}7xygD+{7whZ~PDZzHir?t9i_@d<{ZC={K)=-x*F0VOCL}ke
zOUU&T`H`lozouQ_?Mi%pB|XIY>(}CqvV7q4_!6I=Nsl1+O~se$je7o%{%yM0{P;z&
z4!sI}FSI}Kc};paK7Qi03p$S2hk?)4;7Q1*Q5O8zXJJnUy>B);`w%{~Q$GBwy|Z0>
zc4Mu~%zly=Wgz^FdM0PHv}cgj<iYf<0ed;<)!56k@i)NieW1^(@H*zlz~`A|`5D;(
z`l)g<-+P08P&OTY4o(ip4)T4S`(dY+?K%NHaQldCZ{<_*93f|UT=CZd4jh;5NKZP$
z<88`*ybS)vIp&MAIp#h0)MPVC`rYbj2A6Kjwx$=2;PVXQt1kO^G`%0=e(t&12>tkC
zHVMD=#czyPeu#a*N$|M8xVdf6tvum!H>=z?$*r4y`y<;4pS93MAevAA(4H8dX}%XZ
z)L;kq)UR&L$!D7%Wsu6Fe`zlJIB>gTzC-q>bzeI;7wu}IeAvl-;p2Da_d)-bADws8
z-oWSSSp|0SR{7?3{U3clc=(oic3XHKIJ`KYfR9Q1%yac;Mfkm?@!Np^IrtkZ{|1j!
z=ySeR`QCHAC+5w)^7-0ZKhAfT{kuGmaW+Dn^AVY>N5xg14?E#HzXv{Fn;%L4zbS4n
z`K$?_gT4hn^!D1hj{Q8Xv;$v5Z>s#AKI2~1ck@}~*@*qT6**6Fu5b$b$GNtAJMU`(
zp9dHhyW2k;VcfOikKPuak-u&h`dRGc?b&JZzun4qG-A(nhi<>Su_y8})J}Hw8FuwZ
zIDC|r@1y=r;+%IW@vSM`?9BVn2M&J*$Kr;|==YKA+@A7#baZm67Qf;c^@qJWQT;tc
zC6D20YiE5l9X<MM3ctcWybe1m-_`7}4^0nm%hpN%5}y|)d(qz?-7~l|xi?LwqVLVy
zd_*#v-hIk`o<r}&-@?(x{4>Y1RAlQaF84j|Ww^(@5_a-3BcKUC+b#HvdFNhsZqOeT
zdAgzaiT%9mzqq}b{sw+l7H`7m9mt^}d>#+qj&%}fiGD5IJ~KzZ1P+fcaeIx(s6_t%
zwd#p^=X3P9Ke1A}8@u;^;x|IBG#f59PPUgP={S1zMNvPUiynTAE$QbHr*f8(%U`m*
z23|NJ^e=rxdNh0WL-zB|C4C%xbJji?1otZPgH1?xEyw9(<$D_!lhb3h^AG;#(9=5N
zGlIUW)0lf^KbtQW{7XLqE;nUA?~<NRFMcVOl=TEY?|{CK{ln#OV7>bXclX{}|DS$o
zdIp?-n|~wvw<iDc62Fg4u1ZfL_eT8AdzR&2$9Glwe?fY-c{%d)?^5#l0vxZ0{%U%%
zaZ#Hcy?aUjHa$pxPiJqR;r-6FrE}C1e(43i&$7F5KD_<CSgU;CXOS)>&u#5CpQW7{
zzw{p7=O$HlZQEpFQZZXX&(owqw(I|=w}a1tw;vUs7VWZ~%lG!qPWF6vTDs7BiiZ>~
z1`bTlj-?j^l|RY*JLuIN@WVfp9YeoG^=GHi%LcSD>Zu8jmw0Y)vR!tf{%Mu$>%Aej
zMcGmGWk7PU-(~%#jhn~O$=Q+U1I1$;tp1MTnm6m)C0Vq080|b!zts~r7jmw}zp)2B
z86{5RSoX|M_6PSU>wg5_b>Ytq-qXT5{W0$iynRnQ;<VRQ*>Rb*t@~4ULl1o332tn~
zpS;*O$lQOmTdAKvqj%De&zg6J!jWIh8-aUk)zbqmeV!dn-#77Z>|TzGhH$7Iy!|sf
zRljd28ls2&)+nD@;&ZF~Jmca6_gn2rE^f+FzD=@&eZ9rz<#(M=*`wrhG~DU{1%~8j
zD)*!P!Oi-=nZ6Uh@uAt*#?j1w=?md(on$H;KL#HDT<ixgqrHda`_rE(;_{Zz3-@8>
zJC*M}$Mbc_bN~EU^tgv`7xdcB6^0!!L>{}7@Kee8?#=pqeSWxl#<7Qwr#IF4n?wI=
zJAZPB@>{^^Mfl~}#ec+^yq52)zh}Vbh2&9RT=Smf(9k`EyX(if?B;p&u#)pAdy!jH
z_&k^X?=5aRaJC-%c|ZLd_`JmTrAG8I%CBMZ9jczje%YnOpRb+tmo2QQksN5ew)E!R
z&7;TAs57-|jXX#Pmi^KMy$gEBWHEh<8!Z;2^NS`s>6f#4uusG9du~)dgdD<x+6kUt
z0Kb>Q&v)dnTd19llC6^+>CHkj$)WIhwR}fmk2HWh+h<dgJ(B}zbVq)|qgf_V{sj2f
z+pmYh$pLWqTq7Xt-WAHp`jhNSlgA{F7MCSqk2Ge1FQN|zdG2ECpg)S6e2=*6dy@sm
z?;+yl?(jYQ%!^CCx-Z$kq(5ngW<&9``v(v9-q;5`fLs<i0e>m|{jvB`Ju!a2K<~!x
z{eZ>NPQ3bB^@JXNryU)<_ctroKa1C#PYF8DI{dB3p8Lm0>nX2PgLGg0;%4A9$}fTE
z_j~^jMd$Qj`d~*s4Lvy8>B#Hh*n9B#Abi$M#-&G+?~e8Xuh&m*!1jFDOIs;#4iz7L
zhjzas|8vw+n;m@+`Og)teYXBuRm}H2>OTUWHc58l_xO*`3eFiGS@N?Cy(#@!n6A+8
zMR8hsSjqnx=*{%Y1#o+vecOxKzfsRk_^4%EU#|XEwCrZ~Y}ipRr#G`lhVXk_2%mps
zKOX^CBYxu=`ZAiI`4sQT;qzg}L2XeqSJK<bqJS=j2l6qhKj=TC7Z@+@^++%CnZe`D
z{HNIk`hBKo_{;TA_@5Vfubaw!y2Y8d?SA^Urf2|%Hskgp@?DU$$ZpBj7q1p=*|Q;^
zKH2R)d-7wPM{nLO`euifa@bnAh%4)qU1;AkXD=UG(s#;kR9~zbFYsO`a<fCq_s-8&
zkkcTmt8>Y_CM{fC%ICoBYW+5hm2(<<GlRzmmGV3*yVCE2;rc1rAniTFdqbX=tGAZb
z>0$6N@?l<{9f4juxj8$Ne4M|B&+Wx^+=?E0^;mX>@>{Tzmy-Xt_6LtF>k<E4;_!9Z
zS@3p~`(Y17mx(EJ6F@Ra+{fAUtBUxG;IppyjbpTbf_UiD$!TNpOIg0I`?^my4~>M+
z(XQW$Jm0LJjo?=c^8Y!z6dq)7d2_!vft#^ESTDcPbKlz^jQ846%}>SW6!!GF-v23l
z4ms0={3z|&fq(i2_5E$%F!ahzBJi}8`lsfn8%JL|KN$7Y5x=p#tY;$p4?pU@=rN8C
zS8x4fs&go(;lHl<<<0$aZhj?w8^RBAuJKyU{jc%fI^yB3(l0}Oo}=9|QT@yR&1Ly<
z?om0L{MHsX!|^Epg7?-Wx9jp{#@Uy}6Y2@My>32eYFymr_cr?F4e|+GdfRg?q0-CR
z9qXfy%}asLYx5I*-`D<Ov`=)La*^jb(UUcx)doAlRphlu;KybhJ|%0;p8UMH-@fg$
zJbcHhr!gcuh1^cU`TaiMkdgXS?~R?hQ{d5&KCdCqH;bY0IOgRo?JT!sXSS9ZtC#x(
zM=2Nlk4JY}(g?nr-0m*6W*J2M`K06+cstX`yNKWH&teuFYh`|$0I%b`oQRBUZ1jf}
zVmtQDMKrh>ztZC5ylkBESKz0%_g+V1>wE8^-a9mTQl5>|OZr_h;?_;(Ik|G5`rnkN
zF3L9)|9rlA_h6R(J>K_O@kGgg6Ml{h)W4VW-&bhQ^X~6HjvO2EXIyMtjFxTb2LAug
ziZ7CoZ$o*Iu2g=m^WC?ZM_wv^RZonUO-^iyY86L!Ir%)#{~U5N)20s^*KO<@uF&ph
z-C}bze(Lf!Ux<&zNyYRbdi$k3{U>_AdrH$Qv}-=U$hFFSQ;bYcQ%_y~=WFTxPVo6A
z<$0;nGfVxP2)`O9bKv)F^yH&rFLq_vUHigcStINVUgx>ji$l_L&}%wpa*O(R<Bz@@
zKL5aeURLV=8R!k*^MNIO4f}aB{ap1%ubij8qWGTud`4OSRq31R@5t|QGd+2}xDG#o
zhmWUU^S5?Df6#pTG<-e{T_i{PKl(R<Kl(oN@=MNfpH}koUHYZ+eVrnDFzc(ozD-X>
zuK{OYFUx;q-iiBeHY#78T~j%G#^+de^=<HNRZ*E87wxT^T|}Pq`Kj-t*T2{e-^^$I
z?2>Z4G|H~vC;77I0`CGJ`eh;z;BEWtPUGz6qL=b9FU-h3FrN-!KR=`XhwRRrUeb5T
z-mwnvWpx?tda0P^y-|L?@|ED~r0o7|O!8C_cpUW~lKrTBQ+D?4nLX~}2;~E>&dGkp
zN0RK3U6svFo+wU-%Q0Rq&tA`RevQM>w=$1k<h`}jb3^uk&&IUuoNT)Bd#}&x{3o|(
zcj)&P;ycdFwj`fNy*J{*-py{MkK>&)yeS*W&Ux275cE&88;pxt$xHZ&d=6`~Wy+Zc
zv+MotCL{Bb)AU?qQI8DV`Q7^0jmg=y{3I*c$)_tXP5?dPdw=)-?r^vVT#WLa^GESD
zP(Hhx&9k4n7bWU<LMOk^?>)q6oX0*{!%mKRoao7K*Df<#eyMsk6jRj`@|l@G;d3;<
z$Cc*g_r=$RKG$@w;3Ga~!|i*FyBCW6%kupC`2*^o!{2eAardtHy1<{>@b@L<hdZBg
zFI;-BSnj>yx4AffMfqXu>09X4D*lbl<!{N~_S|s4--Z69e9nP~4dvH((sL8w_Py-A
zd*s=Ob)bA-`G4TSKJ4sA&ExbrU#ea8;PJKEwIf`=7oNXcd;_P0zAnGh=RA3quApxl
ziofzZyf1E+cw^Hf*5I%9WVSONQ)9KF_AX0qC>HClzv=9LP<aC1N^O3fhsf|i`cg;P
z+2)-pY|z+0d0L;Ity~9di%IMUnT>g$MDMBL33faWd9l0pU1tvN4?kL@{}eB?&n9`_
zZoC}-wbmVn-Ut01^y#qwApNjjJ;TkzTc(?emFPPue|U^PZ5Ygc*$(|P^j+Pla=P_L
z-SiJR@+z|b3gt7F_)G3indF>a+<M^LK$^hLlx4b7#=Rl-)BcjhPi6`AtVKVA&3-XG
z+5x?d=eLz*(&Ei|!*5}KS6M^lI745A{)P9tk)H3QtE_qJWe2Tj-|X+=ee}Cv=f$M3
zWBOZ3e^B`^)$^no?{fCkjp^@lXIEh7B~DQ$@cymhLiSE2R(db>zmNVU`k!=dSu!K3
ziryXl1@v!y)=OWJBWf$VxEqVGQ*JKnmoYYdM~vYBcIGDaC;Y_gi)W12e(6X0%{@ls
z+eSNgBc~Vj(}Z-59Il%R_p}yGm7DCAo*Rve-F=_PPPjDvrl?1sr{MQS{GXWq;1=z#
zirdrgX^az#%AMf%Q}H#$_+5b>_VC2?N-;K*MJ>8R6~3*IJvHc!(?^vbB+j`JU)UMq
z@z$04vnsjDT)DOORWsXOUL20U*4p*0oZj!VS0l<^E`VZ^ejI?0EA0jUV0=_I3R)z0
zy8XwUG1~PwO^%z2IzY2VWH(qn<H@0Ay4Fn;b&`Rsu7>RP3(}w5UX&ykq5q1nA-p^t
zJpXR`Z81o{%(r{6T7PyDWwO2bexlFF=ld%9OXwY}77l_Vmsw>GBDd?&&qp863g4G5
zpKTW=&Ry+bcVb`fneTHKcIL&rtlgB`Pc%@(e;;TTn3;SmF79)=Y^o)L>~_vH{?0YZ
zd&A*}vQ^|?nO**iaTWG*HCFu#;+DS@wNk@s@%vI94;sxq<kx*q^l9X4IvBqly!UY^
z?e>`Bx$IN&ZzzAqcj(pOaAcQxxp+hLOI7&3nzy)vcC552>j9U(<L#*iA4cYhQM<AD
zFE7u&_)hXqyqCubeN1V8Csrlv;qx~|oWq}!uUAhc{_73SmTf8~=NsvNCO_!c+9x7H
z{YUagP0W9TCzZuvylZzo?!QXS{$Cec<Uiv_W&q_IYG1uPXD$3sJ)u8N?B;Kghs>b)
z-{kuS|Hd!qRq?aFh^%g-d`^<DelPU6UvNVHC)=`W<W-^vuNTMUfpj&^3+wemb$OF2
zupwV84#|J83sc9rZg-B8+mZREVjJ}@&wsZ|80S;2wo55v5Po)5|3jXy0sXGUf8=q$
zDF1{0y;HoZeB?1d)Av^1e~bDitLK$`Z8=^J_Bq=7pU5+_##_Z_`5N?k@>E~Vx3H!7
z={(t4`Csx6)!%?5|Fh|z;!QPmGzxxD_G}p4g2vui?24bt{y)lih#Xd7pY-+jq5K>@
z$*!Hh<3`xHUuO%lIf<1VcNFx|kDZK@PD*W|%nDZZRM<TPZ6qAs0tX9}d)r&Kh4s7B
z=N;+Lc$#_$AJ<Clj<ct`>)(U@XLpQ+-+|BfA9F?Qa;}Hbi74mRB0#$G4Qw!%RAn7)
zgvGI&)`}n5?J_XyIJ&XOcsmDAanI*R{%rtDmMM3%cRlX+M*d$lZ6^0nB_8oiZqm-O
zLCaCA&2ektWSDV<y3aCV0!w3GXh7P~TvF5i;)dc>{qw7}bOXGNP3_XBb!5-`hcCRd
zP_a#RF^q3GH6*Q_yorx9>3uCYSr2_rQ8hzFczr{*XQ_KUoN1!|<$+)7`A0oxlJ|CS
zB%)`Qq*eKjTeG{xAd}O+@HAF6{k3U)+Fm?!6>|8CKlvy6(bl;ZQMe+`rlqw=eXV=C
zf4AEugEZc&ddE@f>x1`WeKysX9pU*`*4O{RM+4uF^IWWdn|kg~Js79YoQf<?O*3z2
z7s1z0%$IRGwgSwItPMGBs*$cTA5C=1DeiQ57rh~RCGC8$c#D?&fd7{4!xiKcdiZd%
z)J>TCh?o1vczCt=Z~BF`S>&zyN#@%wiEh#QZ;JcUFN-PUdIx=r%%v@i_r~7)Fu70n
zh8OkIv1to-Z|nxYSiGxGrx<ZDpGRhbiL69Xz{M|ccrN{HtDVQEosyZQ75XlF`<r4*
z?VOvAuvfa49o)ybcuthlkH*zNQKUo3==ow&Hr}|qsTc*<dzf!p@zy-;zKVsc(pJXK
z7TN3Ujp^e48jGp_zF5Jkt7v~=VfH`SL`Rd$3vO5*Yrk$z@=0;EIJ!UKi3kd-m$OOi
z&1^fZT2UMimuu*+ZCT;&0!qHg;xzAni#yCqk#~N!=Z^H(KUqZa-oj3{hR`qjWZks;
z-C{$wwbAlO@jNTf9inz&8o|?_ipsegFN)`ix78c@Dv!qJ0(w(BcVf)G*f-|4Dm49J
ztHGn;Y31D6Ncgihi)ix(@|RW<yBdCu!CLbwdf=6NCw%T^U0Xlz!b|Wj-2M&S`3bny
znm!iz*hvrc$Rqnj4d+pwrPo8)<2iJ4@&um6KI7#4e{fI6{=6b9WPLFnJ=R~-^DpUT
zOYzY$kF4k42)WhfkN$$*w6kCMJA361`8R%s?~%{{Px?_W*){)%+<z*LfQOOiVG+EK
z_?!LU^AP<b&z*7F(z%qk*p=h$BUYl-zZB<tZ$v-bm|w;2J2~!?MX$(jQIVAEIj3?f
z`qAQ`bGWmnc*c9;e8&U%xA1>t@^~JncUQyhn2#E<kAIQd)6AI1e9A*(-iZ7^>+&;r
zyU*d5j@^fq#oBxWxizKs)yzLD^sqZRv@1)li(PD*{;c=TrrB`{=}$ZJYeXwe#ZzJx
zT8-qw+W0{ok)^1s=QHzZtT0BPt}^$2!oJ>vq~(gIQ3uhMulP%wxcgx~>s7`_*xi($
z!C6p#zinXmL^DBGb<8wlpPlUBuV>^(YN9pGAO>4O4_?Wp|D$-gI0Ux?$!EOY+{;M0
z#aQ^QxPxE1wNh>Qsi(l@zltYW4;9$Z=aa!ie%{3_>W7o}iswkPMe=EJr+<6-d$u<J
z>hlA&Lh<4ub;muK(>=Y;>fuMvrtFIM^<m_3+8PdYN_UXWuLeH<WXrW9sb*ULH(%~N
z^a`G+==Q2MN;FqPMHo9rK2djaYe6+w5%x(xw(f|uH@tV|!Mt1_ZpOLX#nu89VP0f8
zug*UZw;8p@=_TG1<NAPflDA#Vzwrql&=L5al~iC`o(NA<{OkbthLP}_q}C+;7@mGF
zoA5aMej;(mYf~a-M;~1dzka3Z2YIfGan(L;!^R((yp}d$)yadI_S9ZCeFo0zlXvj{
ziQC&F+hzE-yQclLaTigJ-AMBp_gIBB51~_IG(BFFdoNbY1I68GN3%etp0(!C4oO7e
zekhOe1?=;V`u&h}FqC_m|Dr9ad}815RpV((_H7SZXFoNa0ynNM?oC%)e>v}wR<|}A
zmo&=aG|mI;{P)e>JBt$+Rpgz0*oXfVZxt`4?iwZIUa8wl@V=w>CiLMG{4AxhgZ#cq
zyN=Daf+n|$4{0nWv!8i~b(hXguFJN@|C{o7H0Dd2C@yg-Ur}4}Yt!`B-Ni_Fe=z<d
zOa5^E`;mICC5NtX>@-$J?R1%#y{YM6_`F{n^}Nho>2$Y!Itk{>t({NRyPp*EvtD}Z
z?_zQKfwjp8&PPunjraIN+R(M?NfYzX8*pGlwxyBzc5z<Tnf+DYX@jSWG=1DY+d+E$
z0sDY*#rf+HUdhvNHaOo_|K3x)#hcea51f@PwChvRdA>35?gjR<+<nH-a`?U%{Z8_x
zw67yOI@RlTguVY-IMqFm+m8Q&!(n|_N=`H{ydCn(JE~_x@eglf*z>D+2bLK}WAeBi
z?&qRb9;j;G$!i$%W&(ILO@DOD8~GOJP%3Lh1#ukzxcja>9G43LeS7#9_Xu=`pVN%2
z@p;_lS&JVzo|3mQuc<dO{*DIfm&Y8A#{J#X;A*VhcSnzS;N9}Lg`o-@PR$qL?+B}?
zC48>nZEcg|J=a|gCn_Itap%HMx3};|SA#kK6zAm5MB@LZ{82e8&QDJE{8;*ZCqBd9
zdt?4QU+ywE-A1pzTRfF#W%+ya&#mtdNbbsGT)kPm1iz!65At>9kD2f}aML*~^!RIS
zz5-^RoqV1*#Pw&zueqE(_(_vVqFt7oiH4d-K63--TjsSkdhi45k;oiH7O-foJG=%Z
zZUKZXk&nAO*)`M7ihQG!ShsP)JL2Rfla5;x{7UDb>0f5SJ;`kk2vyDe)edE+(trIc
zdvsqj(pYc!ng4l!c65P0e4gq%4>$G5VK-wjMr_1#Eap$X$;^=YWg*n>f!>-;axw42
zv&yYClO6$EMyh3$T9>Q+6Z9{%>JqJ(>0NvK<zeHds`2@`ndnh69!Ae*>y6*(bDp+m
zvAb`AF7#y?^wX+l=`qH9A1z%gZ&DRj^fty)UuXYzmzQ}Y%l|FsUp^Cqawds|ujB%D
zYajf)T0HIT+iT5ex->04m!F`!RovG_W;{35#-SwGBRx+%Tb$zixTuS-hUWFX@LfCI
zk$<8)i}P`L?ZSJvjd+w^@b&_@HA%1kx9FC>Y!15|c2wbWxlf!#uRx!o2i%)X>X&QH
z)b!Li&u6qRh4=kPU=%<30CPxJr#_3~JNY;lq(kYejJEi?g`}Rr$4^$Ok&Q9({BOrj
zexHBrDST#Ra}2vYPL+;HQ@*MF?c)t+g{<Rej+pwE_CvoZTBo<zmm8Ekpj}Uxcm8I{
zoJeN<lat(Q5_<9j`*}To%XH@hFE$#+7vI6*T4vhU(tp%4QudOw$#+KaTY7*|^gMb(
zJBQe3oJ@Wz3vn6HsIF5-e;YsJ<jI@Lx_($3#%{Fg!{Yh$H}n3svc2p~Up^4WCU&8a
zUjqMUvH)L7w<5V;ic_=M$@1bU^TNx;Urr0HPU9><1G^IYCAZlZelI<}=$UTebme;M
znd9vy?3i3sJXm~|UhFJro?dQ^(l<StJv=r!uXtI0j}NV@E|Qnz#h7!_b(x&1#Y%Wy
zJK3G3Y9E$uKQ>MBe#r{=GR@+}olM)-!RPkrjr<5ky>_`zmUIS)1|<d0RnEsH=g`k3
zaCJ}Cz{Kn|_VdmBw^Mk3#Or1QSSE45_&R`EqJeAd){~Jle8`$p*|Jb|EHM>K>
z3hPbu*f$%(>$!$yTMI5$FfKpj?QhFs=xg2JE;jgCM}P0Y57H66DZlqbxx)T-zf^cd
z+$ZL}vT7%7^Lb|LkBbp`Pk8)|bA_GN;v`ER<2!J;8uS|thok?U5XpOaE_L5Z1zIP+
zd0Aggc-slyS5ZE8bDHq8#H~`bl39L__2^)DJpvBR$h)D+#FCe{-ScC_Z7+WnyP=2O
zySwKa`D<|=w-{7&UrJvizO{2Iq3uoN=WnJxP1wc#lxijpI{a?^e7choj-7>%$h&cU
z{u%tc7=AZ3I;+V`*A@4T+0Bm{XOB9c64w6r#nUCdDLlW-xc`K|!cDC#k(cxC_-y1{
z$_whbK^#X!RX<g{X`K=C!f)hW$Nt^BaJwP<@n>=g|8vHtU76jn(v5+`jeuBn*6`kn
zbhaw=_>;9VnC~@Jwl0ptdTdkv9k<rEQuaIZ&9>S&9Q_;l>3XO)PEdWoUvWH3sgd@?
z&cirQ{lMn`m4D*^5{T0U-Dtv=#_Qk3RDIUed;5`&`(bf96%zd^zAkPftpOAJvWZ6<
zQ`5btVsZ$}xtaIGokWXh>fKgBv0r-}tnJ{J@i;n0**A^xf7pQ+z@!eIJX&iGpeY5N
z|AB|$Zb%#VMIDQNuD_e5J>X{tYtnH_x8vcTEGPd?d<~s>_w3i2XX>NR=7oA)9qrPA
z?5o^9VJoA89p(4$yt7l<o0O}vQA7Ijl=-a>?AbEy%S&4sHnz~Jc5K<&d>mKt<Mvek
zAMqPa%x!JSt2Nv83b@^gmvDpq!Pcd8PVt`6>2kQ)mlrkih(@f<Sbp%Z-<FD3j#FDQ
zq@>-IA0aN~_hM*zxj1Bdl%iy&!rh?%Lk2I<$jF}byV>yoIMq-K;`Z}%^uRM|+~qz6
z52Mfnw{uv(H9tn!{qnPtM_v6iA1)uICGW$tn*0Tq;nyvz?#F6D&h?TiSyiiIJ11#B
z_Luuh+2_9(FNs%n=FdodFzrR2-Ni-6-bDr3Rr(rTquA;FNv54sOpzt!=i*nk>_1k>
z@A^IR`+S#*v1PAR%62two0}g7l5=P0T8=eCUSQvD5w3of7rHA+kK~WvQ~5hYi!SEf
zdtX+@=gjN($OjUqX8EGBv+Vj@%nyG_Q9Es(zLowD*Yd7pm*W3s^Ty@;H9wK#W5w%w
z`{8V=mEiB*(?H*Dr$3&M3uUc*6z<|NkH=}OIA8a6TERMKH@oQrar-NKcns}qEo%R3
z_VD52-Q0D;hFO@!Ee381%Gwwc`h~swHEU*J*2st&X7@Yp#*jxibMlrK@^sI2FeVmc
zk?o?Re9d9iHL;r=x0-f`->qqIbGX-+H>ZtV^65si$e*k-ZiljGYqQU{_1sMDude(D
zEVMb|DB?Esy_4^=Y5LtgIo>D785|Dpoea%zIDDNOvwgMVxMD^=4(|L?e9d0(PyXHW
zv%PnD(Fd;8hX=pG|MqmPcYcu1)$S#1>G#0vt-LkjG~$+H9$~mQLk)fNi6wn@KEcR_
zMEKv{`-bPEe5(V8N7JIl{Ko@mVQct2$afJV=uNeEd$`sfitUz%eb<z|9sE^I4pF|h
zeqM+k`ErlXN8+{}dwM)+Rb?-az_0wm`Ec^<EN}E!R>0T%8~u{HBy^+p_h%=cZr&N7
zC+<{Fod3Npe}tdx8#rB^mSyf6oPgfU_j|2t*YI!H`(b&#q+NmMH|0;Ge*~Y`!_@DK
z_wzCMY$i|Aqh`xz>>q};|5ov%dTQY(#Z6`Wz7L-V`a3oIh99Y_u@?7du4AQ-K}q3X
z<jHDFx)JZb$y#|Nd6?OJYo+J=dAhlt81K0aZ12WK?|2y7m=<lN)@k(TPit9wwCv-s
zriU1xGvQyGazwV!?%C|+I0Msyog03<iY(p83f>=H*CrMISMxyN{UT4t`PhTWB=GNA
z7}d&~Plh4khuVt{Ooa9K(cb3jJg>y_Zf1<bq43lE8{<4B{~Es9(eG>M-st4<;%mPS
zQp2Nc#V$Cz0OIbA=c;Kx{odSp?j4oxs|Az2>!+el+RM0VLE>BcwWoUkrg)~3_=<>j
z?adOKPWvJfyCpx#&itKq;M6E_cH8O~c{`P=ojR?a$~B6|I_U)Z*@W#s&>bri`M$+=
z@Cr49?Qt*Uy=3{c_eD0jzu@XXc643u{V(l)8|M??<ltCcq_ezHrVO}tEqvQQZD*YA
z;eM;BS~W<P>Lzfwo%|SV<77UYe;mm=ozEY8v|hNbtmkZY-bC$oV^8YtDw4Pz{`N`c
zIHxk!(|yF-bvK7KfT!I_bGU3M(|xuO7d=AHJ<jeOsNY&7x1^&;rGfalX;xr2$eS<6
zK2P9drTr>jae5dWT3vjf4uwA>;OTB!>s)Mlpq^Y;{O<c-?8eX1-DzV(cJDOObutLv
zz0QL9B|SjxwapW83vt9-EQHRHH74+IG`l~df>srCvZZSKxmcb4V!!mV<RSaOr)bIf
z*(Lb7zxZEzP4O|$#-rqLmiF9{Euw|*6>Ty%3z67oX&-ZZWBKIxZ+wo*#;LzMyzPws
zkNg^a^ag(^dTTA(lE1h=`{|Udzh13xUogf|OHoo?>BYD5=6Awx5AhsvTVE^DNZquf
z3i~wt%;6UvO&eP}2iRNrb;V7-hyQfB`Xi6VMOjDVu8C+QCv%O2i?d07jr+3P=*i>s
zRyN$2?c(0pskm(+N~&LS7|qM`-uM{FAG5o$TTR}NPm6i_y^HmA!Bg~Ywm*6eaUDBZ
zXU?+^*W1(M<nf<EOM0=dr_tcs;d9)6Q7?JH`?sgRwcxJoiu}ku@M{^6&onN0K=YGn
z;~zy+xYmQ5`scHZ);jzk`})1Jyc|P~pl)zCvewt*XWq*=a84`V7MHNXXT;IX%6B2f
zChYDl^^Tin@)`7Bgi^kve;fEc*3~uSxtpnd4a7a~>$wfZ0eRr<I&mG7QAMNWJCRb@
z(c2pjv7Z<7k^L<6zV!M8_&7rUT%1St;K1RbzAuC0?xv*?H+wGZ;p_9K?DJMjFU_66
zBZ<fIIk;>p@7;57I7_cJ@63e5uc{~fEi3Xj;Q4yx!a7?GpSRK47V`GrXqH|rzAjG9
zy;OWpK4Ez$o{szCSJUxf^ejydMQ?&Oo0KCmV-kG}TSx8!?P||%jQsbZt>d()iSIF!
zcQOXL(3EC+sV(`p_r0B#4pnR96Cb18CNsuxr8?`!X{6ZEU)w0(%m|(YRpP$8-Tc=P
zAJb5}c>j)iC%hX+!}iE;vnRak>y1a22=DxB;G4N&3Eb+ghLh1_zvBd)PEy+yq}Ec2
zGmVl~xVTgsy5sx;<@Z+Na#$I>pGgLDeZR(kyZGyVc-PjO&ZOl>>4)#&ZeMu$ykFb<
z>m={FkX$z6yNBOCz)>qbeF*;VQYL(zowfN#mU%-xd6aryC+oIwj};&;V*xogveFsp
zPL^4G9C6BeS9_Q9$-X|?47pB!wWh8MO~=@$+>aL3vp;!++Mg(fr_E{64Dob((%ugE
z+bKClZ;ejtYu(ZC^gwd$Y2`f}XVuy0-N;m*!Rul8?INCH0c1WFeTKPaARL~L*DcZ)
zj84&ssXM^UQ47)glhthdklQEw%cIdx>xa9IavS|)t(|uFJ+ie7*7`nfN!U-X#eG(j
zaeE)Xz);klZdr)zn$?vXsDFMco=f9ap^>s7Mi#g#;+-F)1(zs4S8sRauQ1m2=WFTp
zdh^y|1AMM${Qi*6@V0*N`5<yx&3@jR9LLEn5VvuSg!7j}$EC6rguFV)LbbDgu}@<@
znZiG^b9NeS5W$vB(QiAb|24U1=Q^i%SgF_5GdEN4ORN&tG7sN(XQG?n=YVV`c}@`j
z(ad|T!{Bjs--l;=)03V~Wks~nGsRxn5O2$!^PX3Zi~X}X^y@ou8r`+3zbN^g^j9O%
zrIYcOi<b*Kq&+)%miITs$3*Rf7w|dmS3Dxy9lw#+d4RE+@T<gpXQiFRZ3Ruljl``}
z)sq+DdtLQhji1O*{vZB{J|z2YwvBdHh0nVy-QB#>4bJ=`FUBOl_i&%p7PNA*xISn1
z;Md@MFuSF*=ce<+%wga6(7$hsZ`+DAojb}}I3w_ab1V~iFJ(ScKJodm`C^&T^@;i8
zWNY5``4n1^i_6%#9C4N5>Qw#L5q?&trvu?{Wi9BNhaEmhe*WS9WohMm(w}<l;nS3A
z4A*zYXWXl@fZSwm%#SYHJv5(dq}B5Ny?qy1nmfxy4%_Avw6ePM+<U7>j6uG?f8*Zn
zDdZCOTZJ{=(m9nuM(z$icZD~96ldZu?A|l-F2=|H{LI^_e|2$DzK2#fkw>E~)OwQr
zyajEk0iXBv-p1lL-f*`_yYvcj*adn%mhV^cw<3QBZnsRYX8-TOetsD}?Co37XY)|r
zmCM=AK7Sv7;fGn1zhpJPLY})$v?TTiw<DhxWP6m)X6DEP0tL*SXO{ekEFh0MzAq%z
z@JSu6|J#t|EbWP#WEUl|lh({^9QWLHL+@lfHMcs8`%3yH5tY|LnOP;ZAMRtFGgiIL
zJRh@d+_N~2jJkV%s`4>^?FkFo;ZQ`mvYmb3gY?^&HTw8<GK|?9Lc5O*b_9P%DBFTQ
z9EcjbluP{;>!Q=)Zf|c{u0B~Uc=ivZrShNC$ysVU-}`6a=SHYhPyMGVy`NuiSAM8>
z-VN1a9dx1h&S0fK&#vx61E2F>6P#XXG@gL>4LBX*^FtK3liKg>D>u{4{0A{toeQ5Y
zQMz(+HS6Gt<REr+cer$v=g#xqYV!Ec#p^Ksh`9anz@%Y%mR8k(<p=Q|t}ce9VZCZY
zx=iWH@Oc4y_)B@~8sm7p{kc=nWsbw;qd3?)ji`;S`6Eu&ZoZ~;lD0MCZwL>O8?4xw
zTd8|cT0?*D?tYYk-rlc_EjhyK-~e`GoWnhXT{}XH_e!^bYe%z(BZg;)IKGMHn&?Ql
z*NNO8PGd(@Tt0m4iH{55ZDYOuDE{K!nWxgNQFr3UI5FVGXY3aZl)Y*@IND1#!KH9J
zcaO?^{q|Y$W4bFHngM^qJJMb~mucs-?B^p&xxWXW+j#G@eD*!83pS)vXxXmfp%3*r
zjs3nHmk-Hvw7u^#f0Iu~Bk_BDHsPt*B0Exh`@rqFaOj96&Ej-HJ@(_l{_W~q;TE_%
zl)ra7+_SB-#n8-3C_BLO1NeJ)rf~^7cu!-ax45@u`032gaJp99BR(T;8EGlrd8vMH
zg+9+`9r14NOoexgvg1lS>ac7da%tiF9PJndpO5psseQM-=uK;%+u^^LsQ)89-H3fZ
zn>14Kd~@*8NBrFuYVR*<eot>tlIOG7MeOGHF}{n(WtR`3qbt=Hc^)pxVx@b8==@Gb
z%GUB`>_*N#+1bAqm&*Qfezpz0_(#0*49_=nDxxLZ;XUzr`_qz5;+2En55-rv;qUuH
ze!Ru(haUFnB71K|=TYVvA+y|rwTCgcEx+-$q`(^rhhsgsA#>NF_IJp~>u(vG^TWug
zw!9m2wW}{Xd7|3tz~>W<(;D(RpJ_BZkqOuO@<a~Fk4LYS?2*qV$4=ruj-bCa;qpG}
zmsu!}`K>E_Uf``8`Jcm1sK?JS54|40^A6s(0X{D(`{ihO-5dIx$nFmR_sQ@%_BEDj
z|0v_*lzfi);``!~{2))aaE`DKH2IJ}WG4tz6gTAu<GyKfP5zVEv##k``BbP;*}bp_
zs6YJGf3uTYrf26{EB`ZmK1}Q6LCL=pAJj0tIUhn^-xi;fPt>zU`8vkki}_Ys@hyCw
zjsMsmJV*Q5!slc4W)E+^Og+uL<5Yab35c`wW?emeIBI8YKLM(RM`F49tHaJCjK?@(
zexh;En&eN@?#ljKrj8agXbE~3Bji+1$9a{r{o6uY7kOK6I&q?3V?>_gEs+Ur7g{-i
z<Sz1z`~~WX^DhS(<Lx~?#tz{${)S`m6YB`)yuC9#+UI%CO7V0@;5zo;kM#bIYTXA{
z6}|Ng+}1~5K;I5zH}CH4^1H(1xHonQR6bdB_pL1U4mi6{eX$NYwQSk*ux|(2yfr_<
za<w;sx>G&*C{N^k*5p+A>@Fv-YF5m1{dXH2jdg{LUr;&XIPdem^Kp~1BO|`PN_r(e
ztFuIQFxOlxer;Q`xSK=LtJUxJ1D5*@YHrV8P?scH^HIyX%O1YaOdy6FP7X7J^|t32
z{;1fyyPYIQ!`1M74iq~P_g`${(OFJjyR&y2s%cVsjM8jod<-yBPKKXuLG*nO&uw8p
za=KPjPR>pDA>F@=dFcfGF*mt3jnif+e|2PyugWhyM=jg<J`6V-<=@|qZVYfvWgq=9
z5Pdjl_k!p9k>V77giE|FgU4}7JI-+*K-ZeW>r3=bRXE>?7WRd+N6{_k*wVe}={Dm0
zrt9x+{L)uJjr#l}BaNTq*zr5ll&NKT*>m_q##=4jF0bB{QuCT;hpBBx^dq6hzU*PK
z==63>b`+`f6gR$y-n=Afp3Tr3KNb6Du^x7VYqo@Tj+X84aN6@tu{c{uUVk`0cpg2e
z54ZOv#l-o*S$eO9_=&(<zSC@;c2-ID%pxXrpws@x>lY_?)EDdf`S4g&I2^vhdm1@)
zwT#r8J7u*WZid0{E7j8o?jGd1y3QFMskD1CvlHomoadhAeeIm?TB4sC`@Xo;|Esc7
zz5Tc1PW9Bl@m1MQTG`*}sN-pYGX~j#xa|O+V_u!^9Lx6f{6K!^gY{P@<%jB>CFMTo
zNuJB|(C3D5zo(IOmpJJ&jD(3vwfq>?z?$MS?VE1(_!2%&GPAXWuYn&a|3&yeE5X-T
znKXoZ-Su08JnV`%-+ioEr4M^}I)1x}%ecUEb>Q;RTGfePWuE@6m-NXmQC>WHeo`s7
zUY_qx4~MxwHRQHyew=^BT;=D|ewlE+H_qkl>b-HF*fu_U=z(4HQ%X7R>bb_A+)sVY
zM3BwXj`}2YI9w0=<=FhF61PvzcO{R$@c9&4JX3t#@qS$;zi8ldJ8^ftX<LnS0e*I-
zFW2XX=-pQA=bT=RN>B0Lh#-4J`8w#=<*^>`44;?Kzb}f{yeI5)`R&N}OXtGJ==FNZ
zx1Ni2^v`))qo9rWj-7nVogX~h_eRFXx%llN(&Hj>9jGs__r0~cuJQgj!F?4}Y(th;
zkw)yiT}_%Dlv|<KVju7ta)}k%O{}n%{=S~vYU+uLm2HogE8ucxJ+dMhfu9N1Kv8}w
z93O=;Qu%)BnE~ITy?elu{>Ix@5Umf&BK5?2<Urcckv`0@r_&$b$3nr#JGvOXt<nc4
z|M9-{aQkxF-PG^lZy3RzT}q$761i}ZHnC5v7iN3+wf?G8w(dM{Zv*E$!rLFL>=)y?
ziumcPyuBT6-GIwme5VrIDfg86!$Wcn^jOMDeuTEqM*mRBW+d|{EI5jV;od^yzYhQN
zCAjYdhv(7vd)U(vTgbBxhhtuT6Fsti)MNjxw;xnLy$wH2;rl7(q=o#%2jH@+{l>f1
zCgLf52oA|In1+9_zqscW==K!T3(Rdj`H>Hx-3{UMfAHU&|N2t0|H?hDyC^00kUbgq
zO)Tag$k`>Qv3K194xjfney2HSvH*$}#p&rHEw>K<*9VwG&h|adb6pBQ8*1rsaJPs4
zxWW4(|Jv#48ES2Ae=wrsd-G?6ebJ2FeX-f4F+awIMom>XJyR`heLqY6HGQ64j-w~w
zepmec#?JmkB+OIzXsiEj#&`J0zE5v55AZLQ^MZSW%s)HQuLju-tekQD)H7KQ&x;S)
zmxgxccQ}uZj&&X<G~`aVHXLomw6WMK+l}1ZEb9H^tz7D4=g=uP*21rrf#C2n?6cjn
z$5~sCh^sh^l#8NIb~A44$%b_@{Yu1D>_biiWOG?zY}A9>OZ8fFc)V0Q<9_eJpL(*a
z1TJ(?{xq$t;P>;?(*QpIhaBsc<>UU=qr5lp|Dr5z2^;1-_~|SX`w-bh>Zx7&Geq6P
z?GsCn<i+Zb^QTwB^Sb!G)O*|G^M>qd_3$jg--s}KJUa^BHFK_InSZ<Sr^jiny8IWX
zqc>m&hy64GeNVnQv8(2z4&?WtcG`*N-#Cfh9twXCMXe(5#?|=l<Q~FhM(IrWyf-fF
zcjkBLT|PSZakufNn)zYIuQ>hu_LBcr`He=C4Dk60^sEPbzD2)B{*9}>zb!lZ9JR)I
z@9VUyuKmL+l<p>e?|ii+;_$8`g@*8W1v$6kf4&HA^z}XJkND<0=~GMoj*CdIIluF*
zr95|0K2T}De5v36WIvy!)vegmC#a`ca$0_@*;jrUxEuGpoa*_Am|2z|NuG`QH7@jY
zJLd*xvbfeI2f^*h>c1`z`>nOlen!Wzbg6bsG#@_6?vE_Y*X5)1-=OqV^U!oFuov?)
zw5n<HDLcIdo9^X&KI^SR@&i7qd(V&PwIS7K`EvS`pf4te)+D}<eu~WSD_9Sm_3$%V
z+e%xG^;|zXcr|{*qw|cnc2VQ${_a4sSNpY<nx8D&)mtxi!g)J5DcYRmuF+3@;O`>i
zbAN4pfOS^In7CbgThWN!;ny<HJO}@O6%%<4Zn|sJV$YwT&ptM8K62*jEaP?<`}mf!
z{_UJ7=}m)=@!7}|i+Lj2lyA9Pum4dTpFE4VrciK^=MIO&m+1Wu>;XMk(vLE$@wR!}
zQqTQaT&><%kHmSIE6DR9PqeZ#a~JPg`0Gy63zzcNI^T`=zvUOb481R#q7eynOCL7B
z>>?uJeZRX4*Q_#?gjUHHeZBT=EPgZ-G=!V4k-=fC^EL2%wC@|qse$@G(|e29?KRWm
zJU^EI`38K(dG8sJ`*bsK*ui$*(_8fm<n#LgPwIK!2EgYh(49De&l5Z`EnTX2yNKm@
zM2ng`zj(HP>x$34kd!*{BVXei97#{}9-eS9BK>HTbKkxN?p|P=)#b0g6*?BhBKCF*
z5<f3J^ZyZbA8=L{$^P)4K!$-C24;qwbCR61fQkx;D2O>=&RH>sHRoM1?ylKYQ9;Z(
zN6a~aB1V#8M$Y@K2k-yA_akRdO`q=S>gr!tS65e~w{vIXLF2Pl{PRh{jdQ<?tnpRi
z?e0;3UH1iJ|J4%iUT6MQ!h4xrwY!7_<NjCJ(Q!Zfq4b(!-aO3j9Q)ZN>K{h3E443s
z2wYu6KbxyR^t%&(#3Q_rE%~J{!NoXs_i=P(MA9RB#_fpq{3GM6z~8akcd_T0B_GiO
ztI}xqNzQ~jPdmSx%l;S%_jbfxysGnvRsEEtV>Ur;ySVok`;lE`qkX{md(kqxha5i=
zhjFf7xv!QzV*b?dZ`^D=8uB+EPa7sC+h&jY)=c)nTh-g1-{TrmBj;>M!T%ii9ueGo
z!`Y4~yJw4vdJfC>^?f#+K9}_BiNC%Ny^4SMBK1dhv4!@KE#dQn>Mw`em-*`ecKE$&
z`>S*{J_Dbx$^t+7@E_lU>yP2{BGRl#7Q@jdB)tefac+KZc9+ss{LpcuOJrhplX~lk
z2l<y)y76Bh4sW+ezV-Jpdj3{+rE%{oUgHV++FV}I^Jw88r62Jb=UjjJ>`Bk+us27k
zu}=Odew=aV4{NUrJimlKG~nlWjCRq`{3)Y22wq=jy=xAiA4V0qnLmr(mi>IY+GH@v
zAIG<hBKb?k*-0mQ4<kP!U!wMEc>I!I+mGi9)fVy4&-k~RpLr>INB6t`&B|;jFI|jR
zm3!Z3_>_4iKUc4t!q@whhi-Y;cU|PEJJ;&inqT=&fA7x!d?_hUg~zA(`#AoK`_$f9
zT-|J=wUT{(3aLzVKjkL;@PFnL&ARF70{!Z)z9+3iaX$B-d>h&{IK3(Vhi`S{<M^-o
zTJmFDR`l;f>qXc%-&l8IzI=}lw_AO`&Md0-E@7-wk@@;s;~ANV?t-HO=+^6g-%{C|
z{MQrj3)R!vZ(h*<9yq$0)FZ;?Hs8X>y2$8`hFDh{&+h(vmY$dM-P{Hj`}^Nl)9+pI
zyxKf_M;zVdsN?9~1@P+(HLcN)Z&^$i8@CB;)PMNvWn}dOTlzipOVB6sB>o2<C;4v;
zJYOsp^iuRmFz?^`e~h*51?c#yymn9fF22Zm5qYOCU=f$`YhJ*&8uB?wYm@jNu7kP}
z?{|Y6XK^2CAJ*ihtkM_YdEEbffMp!xy_cxcw^(iOz}B9;K+m(qs`UGGKGApF(tgKD
z;skttRM7iaeOmB8j7r~27Si`s?ycOT?>%_<cGSizb~ZQYw~RROiuWz~LeEY}?&J4Z
zfd8KC=+LW9A_z`r&CP+k*Xhj=pZAk$7yj#8%^wk(@U?>(GYGy#yu`TlF8yw!2iF<L
z?)(^c>35}j?sxgO3eLa4+G*ro_d^99?w;OV==;R<Y2))#X`l3|0zc177aQNk@+!Rr
z|JIj|S3V*&FH8e(yiA+EZsk}j|Hcwp*MPq_`Vsz)XKB|OufQ)+4}VVjoWIH!hCZ9*
z?@6Cmsxe%B!Dk)khmYekhtn@BUk10IGq$zFqlG;*%&Tjmht1gGcaU#;adtP*)rgaO
zRryMB8aI>HZ|<$!<F6y#kGat26!CRe8DVjU*=b7c<~;FgR^gTI_dN{Pyjz@wUu%Ey
zdEtp-yW79VD{FnTUHy6*e14H!X&n1G-k~4F?+`KV1GA@aUnBnQWxe^nG?g7%rreb5
zxxeX4jr<>_J+s@OTVMG7l5uI}-rrGX;l8qC-LHJu*H5F1FUpQKs(U(TypfNwQe4H8
z{%yy9{EYfL^OJ-=SMztgtA6jI`+eZ{e82Cjr&njMpjX1@2hjV=v+<(Ze=Xgbz4kXg
z->aS;aQYE)66cQ|e%gDj7ahpu-Rupx`l<X#kD|79A8;5{lC2<n&+7JS>BDTX{&yAM
zahf^2uRI+0vroP(t+8H&fAVL1bw&B!zG4&&*_b_B=u<6t8D7gIzlGkq^9h$@A6Jz>
z4PAEUCwWl6)|I-#;a23(E`Nv=r?IP_)$8xX<=v0o)jjQJ%z`0s`+nSsJyxy;Wsm$5
za`b*FJ3Y=lcg&yAPw|OxxdLC~;qqiH9;*IWrw)g6;Wyd?zPp9$y}`!|Ih>e3pdXR9
z?p~NDLtKI9E#$8|oP4eI@Hb*3ZnZCHf`jw&vv}424X^)6tG5(i7votY9&es`U6VeR
zUu7JJc^Bm+^*SGC7f(w*miPG>_Qojjbk7>aw#ir8@d^_By@%9x@%gg)TZpe)thPbk
z9eh*0jo|dl{*8<gFZg@BDD(oUj=}4jzPIw<%i3va#=U~tfgSw<J&0J+7xcG<I^H7Z
zjwJR5dV8{ePW=(j@wM@+W6YjJ?PcwKQhzt6BXJ9_4xS&v#d!aF5!5}5%vP8;@pAUV
zv}~-oa-;tiK%y1)>mQ5Id`R7Ks(7z@&Q<Sv^<N9OA4VU?i*biuU##A5wRWSsFVDl}
z5pejQ{(b|TeqQfhl^bN4QEKa?>u9+7l2u}n@1Mfw_wnCB`)f;&%GUM-e&cTXW63-E
z*~;9!L9V&?()X2<4Mxt2rG@+tWvhR0H|<>%?R;7K4F1Qvzl&-2_54(yC6Al$KP$7;
zd~B=rbFH&C>D6L=ch6P%Sl53}WTN(89d>QtLU|hU?*NDI(4TQm7_QdtH1_k`=xv<i
zz2RRk6|;N0qmM{GFc!`DC*JjUFC6;5lX07teyo)9ZYxOOb8p|L;$ye;bNa>ZKo9$6
zO8QC26h42dd}IFGkJVPD{&jptKbKBQKQs%(+bY+T#_W`S1?Sh5F7Q3@`FuTXhJKvS
zG1jlE(Ia2(-RU>}UJE{dL<{95CaJ*f2h;EAoP9X@RP*IQ^#%Q5pWRslkD+%qBCo42
z&JiB4K5R}K)~1j7>nMJX56y$7{Lv5k%$)z-V7&JA_jj~gBme0C^mlowGP}X*wT-_&
zsFv>HuvbFmKS~wZsaDGU#ogUYTiGA{Ml)CiO|#3j)6Z)+kK(?S{75g;^-6a0BEOcG
zDti@|BF(d}w9=UW<O%&Qmk;TAe{I9>@iwV%5Fc_EzIe{zdn+2dV|EVh-3=Z;r(AQN
zFZt~4^E&I<B=H%KlW$$`cgJ{$=gig=^JtIk1A6?Y_pLrgZwa42M<2{ieg#)!;qq7H
z(_Ea!r|7-qLweJ!6sMbgkI$y!t#|j=ec8qD(WXjq*<Wg>QvAn{B56MoH}^Y~SzTJH
zorc<dCaXhVKj61mMIV0h{@^=Cy_2|%fw4{}tE~g=@VPpBU$33OWnY;6-F;tVww1&=
ze@_3yu3kur+d2<?lKkYdfHNVVO87dGz9#t#{{DyjqAx1l-g)68=KUz~9<R_wnYHr|
z^}`KoxE}tBLG0uXY8ae<gx<-$zt`ziB0ti@aCne>8J`q-axh$sJj5gNw~ab~X#N&C
zl*8??zpB~OuWBvjcYaHM%bg3pTKsOlcKYb)srfs~S9<^T?t-7|;AKR%-;%#u;K1d+
z$9>!-`K|PIEAJ4#PYaqS*Ri*!B#$N!<X7T%OtQGBw+c?*h`VFF*L|K=M<vhUJNzEY
z^3RNtj9$ul)d4<#uYR}e@{j#>D183HI90*tpM179gI43JgS%*-n=hN|`5N5vA0=za
zuN@0wRiK0s_=bGC`pYWxh-m+%z~?T?$3D6be2n<A*2!PAGG69b;n&?+JS+S=*jy?3
zYd1FZ7w~M1p1w`XcK6?J{#s*(e4)OvM)pZsdo@~@bVu=7Gm_=xHJ+uh)NDEn`gKmb
zB`3h=sAnuI`4z2Ss-}%vy$n8oVf;t%UfiX=MeukfzMd|fgTLVCP;=z~R_c}TwxwR*
zn0%>q`yxGlJ~@q7;0x0Ds`RA2a_pNT%IRm(zVGt4G=)nqSbqXHPQud?DEgjtEaC${
zNxo9P%D+$X7#*K{$}S&nzJ8Aq@y{FB*W0kz*6^ncA)O7{2|TZt-cE0w!16oy^^3-E
zD*P4Q)205kvr2E&|E+y~i4!L)K0D%gsQrI?HBL-_QGfU^e$#$K9DNNBo!tFR-#Pum
z_=qI-eV+c$P5)FXaC?JaRmm6fyITH|m*Wrm*+fo@EN!Z$iRrK8#E+zW#5J9p)=De1
zv~#+O*0zJozoWa$i(cvTG<ZA)<!Zm%gwGA!F#ps0h2NT$b@-8gMsFzI`7b(_x##z_
zey7e0*A#dXI2{r9PpT)*nV*8^6-MGQ>qUF@{sfo9p7|c%wakkb(pOk3ot@{!x@ZTW
z{)p%QoZTJ!$-exv18Ga`>|6C$vy+#xKQ@+H*uT#v&xYAI=vLrtHMw_6T4dMK{~lg9
z`wE}l>B!cZhnahLkJ4J1p#8pqk(rpS(7M?F>@Cuf`w8yFK4k`cjCnBwKM{B0wF2cj
z$fCQ1K91*~{uCEA?BW;Ed-8KErU!HQD?Zas&d>3w+S;?DZzqST;z{17J$1!Huft^x
zzen9P?5ls_H{SiZ435YBiyPr-OYPnSH{F_t&ueib&v|yb&&lowzH5|uA<^R;{LZY6
z{;qI7xIsH^3c>&8>U}PoO<K2$kNeHM$>4P0L`QKP5e@$&JNa>ZbocqScH6PP|BL(i
z@cBFK=WzE!?R19QFO%YE_VjZ4nzEah`&|oukI&WXRZ=)QSpC)c*XVNcz^B&qs|)<?
zsh@-MAMxW|B)Hg^WC!MNDBq4h<43({@4b_+{LcFc`8U?t9{e4FKP}<#3Vb<9&zD(m
zhr#jhjaOszmFO}@!tvqwIRZWG0r@s)qqtXmjHmx6!^6Py3*q+w8h#y|ZvuyJ$$$3i
zR`T7w3_Yju8^6Vp>+8PYFX--3!tJ5<VXw3I%Z>CB^~ZVNGwkJV>Ukso)VK2F!F*N0
z=a>0!xOEde{{bKU;c%@q{P*AHb<%jRc@6xI{IaX_CTTsAk-*2i8^Tik9bf%@ua$;f
z%_d9ZeD9l5?Q{xGb35)hA7m4xagYBm*<B*C+8#r_ao@W`8tXI}rE#}9A`}AOM?joj
zW(u1q?Mogrln)%5$bT`Ggd^L`X7ohVKpJ>E4YEyOYYt?=?I>O;%7>l26P+5yU$MXP
zjgqVRtLvp<haU>(da}8X(2?5ihaD)wp}Sw7pZr&E*7BnNhUeFv5B`dq9=yOgls`)}
zzz<fVcjaU&74v8ZqjNtrdOBH0>R-5*T_+8Eo@cH+Djw#|<VU|>&HED>H=6Uz?8Q3&
zi^cJcK8xemR^ao`5O^#4{vP{#TN?fqO6*@6z{er}?mcfc_EyUV^p;xxNWa87uzLfS
zD$-J+&mG`aJ92HqZXRf4I;AOoD&ce7JZwh_Uxn4dA6=U~n!wZgaMjCl=})B9*|}iD
zg1&Rw-tYMP)H@YlbJH^4E5zHiOXD^6>FGxER8EMr0X=E%j(Ddu?B1Q>=w$q!2oKB2
z;ZnF<rR3i5cnZpb^apwrpa0dnUD(MbqgS8*`8)qMN&amo68V8IPuH4lzsk$;J$hq)
z=Gtl4r?==&d-LKEcpU!QMd}@#^o7S|^sYX94!f}fT>rz`-68ow{SD0UFReqe$9c!_
z8|bhd9A3`bU|*=e4QRq^=}Ko6!}x{&qx>}a?aIidjV0j&EAuX1*?CxNVvn<Oa<GGd
zF9YHAQ1)<rzw51hZR0r>enq^`sO&?i(2AeCep+9D$HT8`<24#C#|^oWa5MH7yJWv;
zPlP?(j~iP%syF-<d&1MWm$H@ez3Jb<a5eN~FL>OOg!fUt%!+U*{0uw!{Ok~=_e@UB
z{?yL*rPI_C_v8MRjW+K(y4TW@+}<u-l5N7V+u(OgHQ#96=&FWWvh_x>Ex*Qx1^uDy
zqhdY3%ev9cta~E6-1j|`SBiD1RC<;lrBi|9&+-Fp!7sgz9;Wgz{igpd<n3Qd&*a(m
z`^ePsL-uo_pUbRQ<JGsO!1wOn-wt|>^Tlt?J13BEIniSCyaD;!Z|C39%hB#LE;GO7
z7|YAxjuV%>wtm$X*RfLlwcKl|2T$6%pR&;?)E7thCu!G7Cg-I>4-d-g`yEe4-YAU-
zj2Y-r|H0aco6-IA3~mhI*C?kCW$?L?^4^2g-gNtdBk~Hk=f0$RBEQ`kd72J4ujiu&
zo?i-Q`&hyMV;%TI2AjosQ@^i~KlEjC84aJ8v)($&f7F!zw1&rvXy>l(mwX2+-P6=w
z_`#myN9ji6mf?TByep5u-|!24U|r}01wYj9ME`!UF2sCToxh38&Ea)z`d>%<MvZw`
z9Tr6@>$RIkb<=J%u~ce~&uS>vI2~x^{GwEyj^sy`i+~*4lgzKQSlp;R{3LqSfn^r=
zm7211cV{CugNL*HI~1aXoLlin&r9A+`m1k>c43F|J&f&l@L~bP=$uaEN1Oro`<mff
z!p&*PLb$z|$gkJoa8G<p($halXCa)9b>kT^-rdr$OAdge1CxK_sJ{NbCZ}cFG;W4m
zmV|$NMRGPzZ$EVRPtt1R8POq!yS=dhQcfr7TlkK`&m5=S3%mmLB*ct-!q2EL?AtTR
z@-lV2r(eC`%-cK=vfuKa><0T2=<|tEJ(AC1c${asajzGL;Pww_lK9@-xU^ETP1@VK
zwbA-Fm_BYm5C6#qav4Ko>%mE{1UV;cPM_ml!q#b#Q6OEfp7vJ1HtM(EPOJ5!h3~b~
zQO2ncdf?<#cv?^Uz2V+y^exg#^d|6lBQ0pjPhC$tav7w-zYNlFx6JrVfy;42;$Y=t
ze|WTdBA>x7zHg!2X=!8nQ6=BUZ@!63N?RJocH-VbpS`=pu5D$9cRaqw(~ke2{sC>=
zLcB*4c=8r}-c<f}cc<eX+XCy!Fq$hbM^Vq>v~Ake40ura$WQrf+Q|CSl!re{z0Ht(
zl6Fb|lmq5n{fK?v?d<P!{D0l7fqr`F1-?hdlt<GQ$;HNg06VXinVn=^wBLij`A=sN
zr^r9CjFcwxcXUnL7|~y}zaxzs$shZt_{N@D|3aRVvd-qQj8br~yL!C`i|&2ntkQT!
zev90^ZsJ_A2LJ9s!0EuV0dPO!c6QHd8LwvW`7iB>Fj8JNHu*ps7J9N@VfS|9=ZN`K
zDbB7{F)rt37iwjWEa;)v8{u;`Ih2ckd(^*^+*?V^lLq`C&D2xDuhED+r?}TrL(hMA
zA8`X*YnnWeh2C{yPyd!g9?ko+l7Gwjnb+Z`q4<uk%(1TgAGOsZ@2vG;irM+O^&oBu
zf19;MuN3Fe$o!n{KH+NG(>Pg$&)9c;ku?_4I025YHiu>W`kVhX`ajgXtjK@WPM4%S
z@1%U>53QF@CiiOew&q3ayq$U1PF{@+u6XY;Uu)JjVn4S`tIWq)xyitXIF5bXNk6yB
zThl|YhQQIVF9zj}>06C>jaCKy;Jm+2`O?ti=HKDI2i_i<_f+034efU^9!KW`jcX(4
zdLL_`g8w<AZ}prHMpoJ?cJsI9ManK-4xh)zJKN?(2lujnCYR=L_jh#uAow0}F-!O(
z+H3D)_5Ua@#1nZ#xLV<y(96*D&-<@wW?UZ0>#D!n{q6O7=k9*KL91>q>2KiqkNITn
z{wpcvUC1kuFR48}Z)y(}{W{i3b(QgroC}@lS!6(MNd8wr<}UR3GnvP_s;3pr_>o=s
zupTsFKSz$<eubXb<7YUJtys_B_ksumoM6SdKw0ug@(-BT4eoA3FDA+&6ZZ;7Byr2G
zT$?3U(0cas5b~3kj9+#-8&>8IR?aFh-ow*QFjdr%`TV56g&qE?+)G2!$kTTSOVVzG
z9bJ?5;0Hd{nW@*I%+deR%iqo5h9Y7=kTb16Ik$$j)6Djcq5@WEpC1(d$A0ZR9G$An
zWBBZA1fFps7v};uYvCY1kmW4ph&yFP6mq!Vsop$&PVB?up4-RPlfd)mS>f}@tpS{g
zj9Xt>N2+MpSL)rte!;Fqe=2a+)%<BkV%6k42K{%kYi9N7rQIt2jeh>VQQnQRbP#;$
zkoGO`YDC)6d~Bvy!_uwDvv1l$yK?ifhhv<(rX$i3^kH(^Og$0T9QJd>dklfY?f|A8
zih2%!ca7-jB%cvSx_{a|jr0A(($4Cukms%+zT`8(Pt5mA)2cLXEFO>Eiv9ZUbU6JK
z_XR&i@rl11tDm*u^N3<zTnXnzJ0%O#Y4~^)K5wG`TYvhR*Z08rChVC<(m@3tF4k@j
zdiylFMV#1k>A<4?$I}|~XK(MLw3f-~kJ5W-AN;grKi5moPp<RMV2?C1Mtl$7Wz&P>
z73tBuPnB6y_C^K&a_2O%oWBRZuW>R^1NZvT&PMPz_5*)fhqh0Ku*3f}+5`BXV;ozH
zw;QMZkG$JmI~`-3`eg$PIfs25>(0omsu+jqS)T&8_f}u56GO93={V(ggXbZ)eX@3c
z*H=Y<JHqjP1@0Zi{w>GXu5fM#@;_Ytf#>ICw;I*i&H<Yn_ta|?y^8Xe^9MJiC+B8u
z$YW#aD(g<*?E}6?e&6}NZ>yfg+7DcM0Dm$6uF6^)_ge6|v-#dAdCfWz=Z|+~9n+EI
z_F~qvDF2~#BW^joYhCGX<W}G(>i+=${mjNy)|o2xt<8qhr^@6_^^FYq6>>}1&yfwc
zTHd`8=>QVisGec!D}$@Ce`y16df=-SoEvETTIFpDyJ$4KH!?2`f@_`e-yfcaojIkj
zZ!`IxgRd6wIqI*1&rQ>u9#7A!%>TFtzM1}4@q3h$cWZv;M&#Aa_kQ}{y6|I!pWsyO
zMttG9c~|vSd!IWckGklM(!p@|Qtip^<h-vAew&G_tE*fOaT<T9y%)SLH=nEJ)h|yY
zll;QMe(ubUZUg^Xv3o1iUu4963O|9Lk7`GTV|W~y9yW-(+pL(MU*+MqoC3F-rxVPl
zW%(fOw1LOF8{f0#8SRy}CEuU&zSfa=-)b#fIL1xG4#p*RHsjP^WrjRSU*GqAl=@$b
zo7qLXwczhgY-q80=I?rU%~)#qi}S+;Fu9B+xSxI5fMlY6#!mG?m>53egx&Rr9D)<f
zmq6kh;n?PBFTUVi_%~+z-%);I^0fMUrkk-}=fdZGct|78Q06@P@TDC<XL$08ywGFK
zk4|uTayl@1j^CrBdGwjw5rfk>tvo0BPcoVnD`&2C<QunahG@43yZ3nOL|yp2UONqW
zsYe<2w&<rhhpm;~!>;EAW(AKo?rnJF?u5`!@RqMm29wJx+Kuv$`TxE2_-jbp&-eG7
z+{O4lCz5V&wRr*qZgu7T9?0Hjzd0eC%t~*PCdRp))n^E~cERD#NyHa>mz%W)QPNT5
z(8{VbJSA(R(wmIBz}>Nh9*#;UlXoSndvo~Q+Nv<9C_g#f(%)Gm%56zMC&J_KSI<eO
z8aMWd@>`oPJETLEt4{Vyx1ujzkVwa~H{4@`yYc&@((UL=YBVOnt*-F7cTxUQIM~ka
zW?DMKIM(n}_f)P0ySlUf%;29Mr$1p=??LYax7*U&9oWG`(@(vN{hxF%^v3+tZPK_?
z`jGV|<IjIoePO39QD2{Aq<e!k>aF&>ndr^oYBze`+PPl^`3@J?F%{n}_?hd&@saH2
zO6?4jA87=+ba2l*@PDej%gyL#9q$qLP`;D6y_)nWJD$MhxH(e}C&Rzm3O)^1e;4$~
zzuzevUyScW_*89Q=k-#5cQT)iDd;<9-P5>tJ1T3z&S)Z@V~YBl@gq-3BMy9w`bL-?
zyJg$Ji@0a9O)*{*wYN2W+Qaw2w==UxpvwX9xj#8aeBGw?_w+2{9S(F}I2OKrTDpK8
zTZf*W#lKz7uD&qqrN67apAs2o%i!@aIM<dxWSsfVPlS)a)o0*n_>KMzFSqcyB-=uN
z<NcI&KE2494Ygls!0$Y^;AeT(15P)B&%M!|W9e`3xk`KC-(9A?Ui5!mHi$knV^8;?
z*A3*&Xy$iylRvT{%GG!eb{IYQR34;?G`yPS?A5T7d*%~-zEY~rtI4Mp|Hc5{JMd?8
zgGXKr&pWEW8=M|j;J{e;6!$cy=AF!=w%!$-pxr9xfnD_br_wg?Zh-cu<uP8(y(2gR
zZhujlmv@I_<?eS+C~)Ik{A{%EI-H$4LVu3V17})^dydQr@-V6|-U~ZlebwgK0(N)A
z_g}7_$TYPCe}U__=fl<4NZiIy_~QOfVXuoD&Byw?mt^sun=cRKlgOdHdxTr1>_hLS
z45B9+y|+?Dk7g#z^AV)+qBxGtefM6Qb)W_s{+#baiYJQW*v7d3DL;Q#a^;tE-d86X
zfij33dip-M7}t#5+YH8eTakXe1#V^*cFSMx2ppYUi>K-7Eaz^WM9_vESc0(s;%)4n
z^hx)X=b(SGeIbX)w11h|%EzBZ=9<aL>_Ywn`6>=c_BAGL(tWhPT`~iH#@+WP;oEGo
zZ<cNcpLU0nUCl$;#G$~Tlr58v<$pU6f<^wNgPfFwFMA$4b%H$Vhw!TmB8Ri^za^{n
zO7#pfp69SgdqKzr;-JU#Suf!4m`Q&BwWf4b|8>@vonZ65?Age)_5@^$bJbgwohL@)
zP59r_D)h1l;)rKk4BrnRpU>3W!~FWt&FZkjUxv(c?HJauc)OT^E3IGQH*~6)j^_Vt
z$PR2xs&Y1xw@kHe42EB)HG6WsILs<I8|zeO_HipXRF!V69{EkuF$Es>gx@hvUEp-!
ze@}QEI6OH$kp9ZlgWi`MC#18KuhEPB3Y;62P6*lgzPoX3njDlKtQ{*kJRU*=j!L&T
zAH(0gr?G{&+8qNYPll5b_j@^f?*JdRNq2%L;z!|X;OOP)l(fAa$$WU<#{54!4f%vW
zWW4fo+0Waif0fpIe{eT^$S?r6yRjc0hwByQ--GxGe10_@;CC_-v+r9agMFXjdrk6<
z{xxNXyhtA7$m@N$9;Y2^3wk5pw=}<+xeqwNe75eT)BH}pOY6cA_5Q*RkABu;_r^W&
z#_Yz(WLTMP4yQ!2XWJ^jUc5&y`>YH;Pry%W@fQcAwdJYl3O~oelfhZ6XX0PrVPgJ|
zXQy^Dzb9uq7V_N(J^nrl4vsg|cFks*UrpTKp62gW{2)`<x0(3pX$5|soc(09_Eg{a
z0zc39yW-p`3qRK!=Y_-4{^s@n(NE3lkjr*(eIz+Q^EbVr^S&u?rG0Xd&q3sWcea~)
z%95qo)U=lNZp<c`AM`cbwUG1M=n?mPTQ&>5%zH3n3%&Zh;H#1QEW4s_5MMX4kpK7D
z_V7xillq6ii}l$K^rwQ~d8Ya5{RlYU32tpf51cHAPc`_ifPdZTVPzilmh$2S?tNG4
z0OumFesw+zy|sHLz0HrzIp7HWZ3nN%8($fi+0jk#HzpsfT{qU@R-Ak8!hap_S5C{f
zOCt_{YvsdEI~YEe<ayj9pGyDNl;&usUDBVuygB@;%dVba93u}$=xqz}beofB2k)dz
zQeGy8JoKj8IpF5n@96t>_-~cmtNrln-jYwje>3*<9Qs=yK2O45D{*u)l&fSn&w^8H
zOZVi{@!0}CPct5M`8js6{=8ngUp;XjWO*Lm@)zY3Jq~WXSNhaC6>+n_=UYRaBlx2a
z)8DU4KU+7vhJ<#ERp?sydW`b5%$R}Jrz#f1sYYQjjXTu1zQ_MOgFd$+`PnSc^Yvk7
zy1P?FI-}h|=-b=Xe+C1`qzB2E*e%%#zY+Zqmg(izqCx2#>;6CyVMFZS2J%bqmmHbO
ziIg4)hh^I`Zd0rzOKI&qGV2M?*NVp&Y}}@+X>qcL^Scp{N?ujC-IRTFIPCleZvV+c
zvzdAEn%q8h==-sD3a_xm9}(Fx!hBp+It}iQRP!xj>$>9UCVab3FTSoP+}+Yy-+OlH
zZ?UK6rhU|ZAKZ?2kr(o_&xG2~J27j)fAS3EYo83|;c2Qr*YiG4HGT>I#;(StL-H;U
z@RrF@tnF!Vp$@xwC!gYA*vmuMbq(Q4nGyO0p2tbL9P`?bcRWU=Ke&I<3O)udbxe2A
zPDl8>gTMEO$MfhzbGSW=-no$pPh)@ABi$D+RND185%f78K8GDYg1s91j@_&Wai4rk
zc-PPGr>DCX>)?^;5cTy=_F#{OU+wVpXy4uIQE%)I_Jn&~@O4spobRoS;6C`7%`bfb
zd)xVUdW?Rxkq_xW`nMzdc~AJ*kY*f)UIU-^!{1Kucz<~IUFm;*AN)VaA0F?JF0w8(
zv!8mpu-99#ll#LH-Z1=hFdtt1dwux`-|g`AWV#2PsD#He3mjgV?u_rw;-z;pPs^Pj
z&QYJ2M6Dkc_*<Lqg1%9n=~?9C2D|vUa(dJ(8)bZ*WV2)A{#=i&jNMb_-r(kG<f?5C
zuLDoF%l_f@om)y>*q?LEkj=84igBBp?Fs+BE{$YgZl?cRWpm+GGv|FXwBG`b@5@do
z=XaiKzQj9&+rTULg|cJq<JOk;&-U>9M*Pd0;eUqw90%)98|Q+Dp*LhlZw5D}!{JlN
z@nbl=iGE5pSoy8Rah#^yRsWCv?<`&h=pjyeHvRskbi49lC%=M^de*fC_>FUfCE0-m
zt}fMH<h}o2Hc$Ch{LXv$Ugf^v0ooI1kR3vQGv|V#w{aeL1pZs_a~ue7s*@kJSDb@q
z`@yTm;-F_5m-hV2JClz{A^z65hh8t=mOLBDSG}A1A!rf5HkO^dm3bBCgLCL%4{>r2
z!kih-3BxAyE-yQ_8$bB=`4vi?B){EW=6UXYtZ~JB*(HC>Hzx-9{^>yb^vU@|^giwf
zF30sSIPFB7JvJjB2{*^Gi@VXQHr^lHHysVP_k-Kv7ru~v?ADQcgtOICm0X114#^<*
zQpV_SCw`7O#-*!#MhELp6Zg1lYo~+!8G9CTydyub(389KGm>7ga(R9-xqM!_&)=KT
z%g6J$zjrpf_z1uM2yVwZzc+vLV71)`pZ`goc9!{0=2s`>0(~D!{>(2m&fk|d+IR47
zlEDHfe-3;;gIpr7e(clYC9f;Y{)qiP35LEY1IiI_ySegrn5D7j*qOcYoy?azllO2Z
z@mHwn?$lW@eg4)7=N52vTRp#n)qW$H%}RH59yb7T3}Z(QaH6&cRA>$l4^+=&JMzF`
zImuW#hmq|h_`I_4d&r6lDIZVvf&zooJB|H(7(OGH={_vj8T{1;vDZh*3x1mRBhSeN
z{0_sRv8ZGE^F`_O0_V4bt1p{vJ2^qxEgfpSZicUM&*Ndf9MP#_HTGaX)@5aHXFLY*
zVw4-#3$1wjlY2%YGn5|+uRFl^3-w}Wcpxt^ySfwW|10ebWdHmQm)%m;o2}vN=At{s
zr;*R7K|0qsc7fLu>EjIL##_^dt8H8UtG-ZwU-b-2+NDR(qs@}a^eF3QTdUG}^r0D?
zK0^OwG-3btFw(u!Gm3e!H9tok_%zJldm7DA>WRGi`=le&Kjfz#qkP<h+Y<irrjq;&
z<z-xFzs9Z5-P0qDX%DzN*L;fnrF$56C&8&VKK%V$cpuT=C!~kaA9;b(Gu7WoKBNoO
zw;R9oIdEDIQhyKs|AO><^xgTT;}xnErF-CX_!XZm;yb3fce0;;G!Un82swFA6Fz5t
z_YfQ(Y@R=#9*b_L%-)tI$9=-1;A(yTjT7O9d@^bHBfGh`67^c|@Dn)Gh+SHje>l(f
zF-}&<>_qb;;xyj!YmwpE-r5!EmK_Jj+!V<^K_A0!ydS>3EeS`%&Y!}bER~)pjZ@En
z*j=KBUvaMee_j5vN!c;!C`hqow!iOjkNfCU6q)Zw;<Goq_{_q7J{;~vK8%Uk8HImi
zKe!+M;gjHBYxCof>?Gq8`9QBMaQozJ4mnSQw`Y3^;7>TbprD_keE9Ph;rngx&io(!
z+H8099`1SX!yfv>{gnCSAg453&FSk6_!|m`mt-;5I(p~z9Q~`G+>;#&FWU1XpP+m-
z`#A8qMm*igY@o*Si0)6m@~Xk}h#!6tJ?!mOa6kM5CF@V@*Vd>f-s@PW{@BMg%i~;E
zE~q>*mp4vo=T4Z7XKUwYtOxf7_rz}__f~eLADbnk@;b(0sQcYVvZKnJ6NdfKPh3V_
z_TxzRSB|$|tHr;uPtop9c}^bV{q7k2tmfy~w7%?>*D?;n<wH8ceE-3_SF_;K5dP<1
zl87cdGM}Fgfj|4PqvJl^#rcm$ZCiHqVa7L8ZgPR!*W~Z%-EQ#taN|}fFVepH-wsZ{
zNxP=;XYAqkUHCgrDf)Y#IOd2h{vh8``S$GV-PJRi{d}o^PvL()6|Q|=x;GzS9Cl_O
zkEiwbh}ZZhoLE!(&^pple}2nlBV{G7gU|i-_b>FwoSg6{)D<mOmn9UqByXtx2R{FY
zv;v>6SI-OZ`8;|RIl}K?m&RWID7dmhp6A`uuzPbSa+lhj_J&gv=!v~>fzK_SmmTYN
zLKS@5lYGX)=>g<2h{s?zbvG9wum`-InH*f`@emg2u6D`Y;NE2R>OPR94SgRE{k>LB
zclO}-c+cCYr<lKO%((NR>U1{yLB{u^(pfyCv7b6szox?Ni^S6P;g`N3A`$R^zjML*
z$?^Odr<q?rJ0F~<-@U{#M?AycEal_qRc+RK*nfjr`JIjPJotQ;c_V(81=$PEzRgbV
zE0XJD5eiL7@qOz<XL|NS(%A3z<KMUn-zogwUVWo@qvI98xk+uf75l+8Ji)OZRi<&T
z<Y4}dbNyY!54`L}!kyF6zE5R8UqaSa0C;8hm-I>J<3pxj<p<LLUT`t+a9cQBFOhe~
z_t=k*PtPLTwzwUTMx4_y_&yG9Zw2QfOZPGS3ng)Bv(wX!S0$W2fSwHYzHLM0hr#DF
z(#^^1V0a&N@3<Oc>t1@AF|H!L`S3XM@yGe(zCN$_`AzAf^iXmb!an{&FAovld=xoE
zzR}b4r(5!bxQp<4-N!EPs~?Z0m-<}``7?%EFZ!{cPe`p!&I!*oF5UT!+nA>##Lb<G
z&nA(7pL_?v<=V<m7MC%f9a|&rZa%rn9iOc;8q@flkEIV?_@Vy=7rdq>E}>zvCp-Ei
z^QWEf*TIFxNe6KhW$=6~+^YqzJ7ibDuV(xf!^vT_dxA$A-xmB97t(+4Cy9rM`vJ4F
zi{PubKeLO`Wu{}7+ugXAeMTCY`*z5#F&<6W#~0vhjJW6v^{W&A#!bq5H!M5dc*^OP
zU2T2r>H9zFXLI)R>FS*Zzk`p)UU|QYKFimLE-IM+`Fwo+;QiT)$j!TW@Ug=0u7I=M
zw0oO%B=$AWS}$su&o{v9ej$JMa%XaUyI5x$@psHmhbMj6&nJ;{5An|7_vk9$=y~{T
zB0l5%qTL_JCGOQMMUPCT-|26R*M`ClZ{>HV+qX7>&zC4)BR(VWaA)xw=b7h?*wqIX
z?RLoDCpUS{(R;Dxs`9YAx00X#He6oK|8cc_*&n68@Gs&Uwt;(PJfrRME7BeLTc+iw
zlSlJ-=hZkK<Q~hJ_O(C4;j@fm6ZY|X_3Y%`l?&3*@Nf=X41OZcxi)zoqTP|k^C0U+
z_!%zCo9VAiyl^}GIj6$^u%B;WS2rY&Q}dJ9f%W*I_c0zly${>XxOH@|F!ZptIFH%%
zXgIu%^UMl%^^s{0oZZeJ9eGP%&Yz+Um&r%>4>%L?8&k~JneL(NZ!}+ZFXa;R(FwHv
z20r|fUzpS(hn4wa^7oQhKAwKJ^uNO2KFf~aQuD64nSHwcHihNu$mc9AUPF(*;@=4S
zq&<!!9&s5?FUHqb@cFQ`k9iUv*t4AS_eze%_jl~)1MoB5OwQ61)Y-y}o&?XwBzqg@
z*-3wX1Fzv4>+{mx?BixZhLD@P;m))7C9D0}8{d`o<m--o>r|HIj!<bTzsf-JIYZ=C
z#4Q{MCpU+-heDDu=%**Q(WlnV1$)zvi{bOx*3G&+!nYc)SS|L`KY77ecr*0!h5~ov
z9nyKmzr9n->&W+~(jxdD>-$@fEZ#xA+gcL&3}1r(v0i+|zHjL7PpN0J-+jr)9&yOO
zI@hdY9OCZx7Nq%|oqoi@*H2gbHjh;Aq(5$Mut>vSSDi-Owj5yTHRR|9AY6>|Q_%(V
zVLIGiVEkI~(q8}%`z52(mg?WmsNF`t>+wciXnu8p&tvg9RQY(}>5tMl^tcx}m*2Qn
zGB6pMUPZ6?k*p7G@qfH{?1Ua|lU_&9|19kc4+DqKfWsO7=cfN5AMf*8A0ltzzr>fs
zKJ)-MyuB6ftn^}bbq716cxiD4sa!`6&DqbF;J>4tRLN*;<9Ao<Z-uz%Xtxi0xe}l9
zY^VP+zL9t1D*E3;o{S^)tDezbAfB$0e>&FaQSkW?<03zLy3iWDPJG>b<07}e^T-C~
z(<|xS{9T`UC*@e}c7?ljtUEK4W$B&P{hz!8a6+*jHP0H-bCOB#N=M+V2F^s>TDL6D
z`*<MXVHfRI!`HyQvDtd_WgL6>B6{9Q+}(-v)9wxa*Mqk+;c4Ur*dn{p-<^cAs~cOF
z=CFf1k<WJ7o$7BYo-X`2L%idC3%Zp!yWY50yVrfQemHf>78&1a=Yjt%)`9uiZS-pz
zJiduM%H^B8y(oWjc7xyV=p670a{Qxoy?WwY@*Z|_L;7(hKF9gK5Z;G><ALlf?Yk-9
zd*olbhaDdAfp4;dW4*c8?<3#xi`l8_^#&X}ya7FW4$clB$4~GP_ea*+C&azO57a+g
zJKtmr(ObxKcWlwWW#l+Xdp~B^l7DA$8eQqzcz)`e>D8K2ZFXqfQtJ*6>%!ym{3i4D
z*HR7tYft6I=IhMPdGLIpc^`S6&!@K?ldbc(pRuFgT}!VT$g6R){#L=~?(|_U`*{KU
z+yI|%!(S8e8lhJ^h|3FoTT$96|F81R;d4iF9Fy#)`~>>5AG<rwvo7RksZD>6&Mz`g
zOX8p7rHRem=iW+vvTo+ri%-b-p${p>eIb8GE&aP3|69ZP8`V=yPD}Fe$H#r{OVc>-
zyN|se{`1%KhG`Z1<Pmmqyp#BB9{Mv7{w~GsRqm}^&)?NZT;3+V`pvx*J_`Afo=hX&
zdjmgB9db|LRoup2A#+09|4L;B3w)NP+;~p+_v?-C`cfShQH-x#d*ppD&TgXjtNAzn
zQH-y<W#-Q$bzedse~=-@$v?YV&LHRJJ6RJJn*Wc&=M&R`=JQ|{-Yk3k%|u!a_Rhvm
zW_0W$Hs}A?Dru|ymFjxQ+j@)Xzk3vrWU6&x7Ci3=w@*o)M(+ThccmW-lB3!G-Ra5c
zGQfwubRGV?xWRB9ydFURFJsBYIl_H-ar?kY{*&-NyfwEeZ~w<KjD7u8cG!dY7atNq
z7J4iX9lecn!Uv3F!mE7;ddr9;QO|Vt^*#71XD2^g%$M~n+D3K&AFvqX-rV;ha^pOu
zZrYJtcj7IN^6vNGC+Jn|+j?+gqw-_nWMjAz^>=}*b@5s5d)$|ApWbM^+iSH;nw!@x
z(#7;#oOyZ&{#)|;1kUV0a*vxA@$S{V#k`)wo{sbXp2|%%ueV8WFX}%sZ9?C6C+$Vf
zXPv8{2cEM!l;4@vzmR+~Cj|GPyLX({C_l%_d6$fLwcz-of_?-3>zilCrnjKW6QSK$
zPjBaMX<+=%vyQ~eFE^_v<oSHs3!hW1`XQf;9UbG|ivM{p<-5_ghv0wh<Q@GEoLrgy
zS3k;>Uy?>f`ephNa{J!;(3sp`@qHZq{9b)=AEC^-T3!2(kCflld`#hF#FbSP{$?k|
z>8<p|&7<u8g5E3ZPyR+QyO$lfvD7xZuc&8O*2~{Tl4ncMfA-$%P55Z;9(P~$j}~ut
zPcbfIv&)P1YuBt#ni;Qg+1<v`%z;ml@A3fk*Fm4-yIs8ZRv%YSGx+<M-)-mJ@aNNh
z<ZyKMMA6@qvKQgAGamd^=-)rGC&{gq`+^r(hX?uHwe(t?O!jDjKX+R<8Yex)Mc=Gn
zUNTTm%+p7*qsXa^dxif?N0P&$>_P2T$$NAVeB)QOE<~QmrEovuE0*!^)F=OEtp^bo
z_o4M9)}J4;<JI38uHQn=_1Me*EBafTT^W9-W%|1%y<G?IJ0=5?mh8-W{E@%opI5Kc
zD8I&f;)Yt@CQayht^6MPnX#AuXMOihXx^T@XGQ)y^d{c1+N7UTauNINzOMPLaJ8I&
zBjnM7eSE0$-P{wrshD5W^C$GPQvAjcavlfQ?-5`0t@pbZ>TfkW_#pi%=g(fuZdoIa
z;}UZ0&JI4(I_@p;{2_YsUFo9y-}=)`-nrA^Ko@Z$Po+nj@3-Xtq4)LSubm}aIW~Va
zJ-~i{6TP1E!+($`yvZM-4=>3tdMn(om%N*I(C)$TdI_9gDgSf$3Ob0-s8Ie$@pq52
zLw@pp_jTHBp8QG<_29!B`P1f0E%x-4^ekSWxl(=MseU+(OzRs(q}J1)n1PMx;a}+E
z^sAm7W{i8aov$b%JqcV*wfKVeB9p@y^Z8dJc~3fsJ`8pe6n-t9H}YDQ*a@g-rE|cA
z#=X5A<pK2IE1LU~dOqQ2UPAsY-H1E|{Xr|n^Y$HIdVlZ{<Jg`r<Vx%Pek`xZ<ok{c
z_czd!E>7aEqu;ZWeUcT{?-%|beW8=Hh$lS7OpSfe0=OG`cst9qg7@R9<YVilcf^wC
z3O*l54)N*5o@A-@b**<`pDgs@eSVXieX&U9;jyIk0t+zm-F?B1Z)jdW24~|1oR1+z
z_}|v^@5H+HE<E2U$@x28Q?Ivpoo9ug^HXw}qn?rzu)vvS>G9}2L@nN1;8^4IW%Q;z
z{0Gsup-H{;etKwUl|EnK>KJ|uS&FQN4;OefD2;Z##UMVT5qqOc`h<BZk8Ao`p>MO`
zBGhu)`6PMP;R$*bKh2%`#yQvy$*A<f^bqvD)f4)8QaXcPY{h<lps*i~R!{hK&vjl{
zrvAgdkUCSp&Qwpd`(NjA4Xvst;4?B~E=XT5%HQO?u#x^=1`oF+pWD)<MfvCWZ|Wr@
zm0QH$RmZ5jRFr?kxuvWkR_TT0T*V*#hI!aHS(%=up160ikloV7_xJq15&QW_{BE9n
zhR<>4`|9)^e9OpWA79@*{Y-y%W2a@=tHpe&$j(LYAdWiZ$@8AQT$Jxq*p0*G$GeYx
z!9$z9hc14fpS3~KH))>Th5riYes2`?E#X)LIMgw_gMHD4ALMoQHsqJy6aT}!vvRj_
zZ!JIc8@`t%`(=Ch9PVCA*oUp1BQC|aI4|`yH1D?0oQN8)W3!OI_aoS;k@w<2^zlB=
z%R)ZQo%6Zj1E-HeAB&#_`W54HZ}xBUY0Z9q#QckQR$eLa=TYlM<fp#YI<STHdNJIN
zec!w6=h*k%YTcP@p1+tqWghi}$3gEaE^i?`%-kb<&idPlfBAI%?e4sB3HfLI&(E+2
zME2)f>-V<&Adk?8UrWFEjJ%x{&g;s|;@`7}j0<GTU&N0a4EYE+H<#W02)Vh@T+qA7
zmoZCuk@oE18Hu<3@}-5nG)n!A)ZZUoleYMcH_=;&e~$HKyn8JVvp*YqfAIBU{o0?s
z8TvCJUj%O&$cOZFp>Ol~m&?qfS@~n=4c+H{uApC&pQ?Pv<REyvZGpqj8sCQU>fcSy
zZQ%V&`rFR;=L<c#Grw2+?ZkUHv6dJ5=KO_1ukO-b_>->py(`>a0@ouy`dj&Q?Z|_h
zzb1b8{n888hp>~D<=bm_x_gFqvqPLT`(5mJe#s~K+|m27kD*sPFT9_gH^$imK8I`Z
zBX;edrBdk=^+cw!JB|MkC&C}4@e*1C5vOrJ>jrAtnmlEYhnE|~Kfh)C%9CDZZtvto
zJ$Qxw{0g7nBi~9p+%w_kPDbbx{R+?E>-dv@$_!-<8j%muQRd5R5maHH#LJ|;>G^ia
zpT^~<($Vlg&gu7r)5G9~6Hx1M!h`&haqq}(elqQ4ypBV6y6-LHSF{&zP|u|AM<oZr
zXRl~@!|2sw-2a#COrA%HyIWT5v+nl$klPY=bz}RBo193Ebbh*+9Ul1bK0ifmz51_y
zhF$uS2!ilWF1HRv9?m=9@2>2<&-5?)`vb(PFb|%?$82$4>mW_Yt*(80__f~B&t0r~
z8<V%mx0-z%`?I#<Jf1gCQ{O*UPX#<aO#8FkR(}=lZ7g+1k9Nn3mx%idb>QZa?C^o<
zyUNA=?gRW?4(0R(<69p-e_HTAn>}04s0~P8giF;<H$N=q(ZT#5&B$#)QBOJV&L`?E
zH;QMH&sKITAM4j=r4!&}(9hMbTXIenJ}cte&rx5{Z&1D=J7;g@kJjI-ln=k|E$sG2
z>OTX2fs@`3SFVXs3!EImKk{CBBK{vq-$!qZem?mRvr4~G>^I*~U&J%4<Ogx9jI`fR
z52PpGYCr6gRq2WNpXGO-!o3yp*gap+Q#ey$oqa<;=V>>GGf}=t_Aq(0g3l|hU(3C(
zy|keB%`Vk`zvO599AaL#&OSh|FJ9wv?e-GS@fLo5cRu)eF;Bza(I|0aE_;_8c)+ty
z^+(<){^j6vv+RBQk=pS26ZF963($x1gS^RJ%6#8c{?P2)qWpCAPq(i*8lK1ccs@Lh
zdvJ5IkM+xYK<w-A+x!zftfM2c&x-kV_21?1VvonZ@0jdMcEXRPJK=Mz?~k#M<K4oG
z)icdL_d)FkKkrySB0u8g${#?VU&~%#Z*)lBvkpa^*F)Jd^6|cT7VCQzJYJ%{8g}*j
z_*`H5A$zB=PwLvwNB-e={eGVL_^Wj);)W{Nw~@tRz4dR7dDk$13(j|jyAiKAQ=Ih6
z>0#FQHgGHabtCih)icaF;?wED=D~n`d7<Yc_>0TnQg`<8*2ZIU{tf=zi_b5h7lYyU
z6XvxzoBVC{^oOVaGQNG~^LW;{RPaZCUhsbczebGXbT~doJtya}zL&X=avC{|;on#c
zw+FG4SHXb>aQ0?$9w46MCG%i&add0Y>+?T<pnvWCF3l#8!~fCmg4f~Sx+7nKk6%lV
z<zEzf`!@f01@yZje?!04mY&Z8SHs_YmUbtL?}+_uQ}*=N1wZS>qllW9hxB>tdn<VT
zIeOsp2jnx5hObV0D4)n=9(Hv_fkSP~xZ2qe=RTd`PxQ-;^VI9m{=PQtZ{OMro`pTr
zga4xg)NO6%bXC6B-{q$`0gCKHZWOWqotu~;<?w1UT<ih|hO%7Z{V$n=$TLn<Ut%}x
z!gEtEi}&h|#ZTbvjx4Kb{1vAqzZvH`@Ocyc0PD*H`-r1uu<4!L3!i^X!!LSgaxpo~
zgT(92my+zM-xT_J8+tQ#_=WH>UWQ(1#>cvOubq9%<VYU+PmI^wvZ{Wqd?TlrHyU5>
zI46J6&-dkj{*)Xmd0IZF4=H>64|d4L(sJL!o>^&sAnFP}ueUCLRg!PRIMu+}$Mvr(
zf5}>UvZAyBJ?xE!>08>V;HO^8zWugT25({?*MXn4qxhmc{SrOnpZHe&&G{oP^SLv;
zUj{!ncr$usLEqf@UsLq9=_>QuxmNm-dK)|cdkg<={G|=D>2PUBI2rVV@Dur1MDu2o
zjQW1+k8#{YcfygD(m~>0&P>0fSM~TQE;SDGc#~Gqs}1rVeP7_!_30h-XA9o{)#lOn
zrOVKx{yX>^n%nK1oNhF3KbP+GyYTlu&u%Vb-(H;lYW-SQdQ$mV2Vdl`DQEvYpq>G6
z=C$;jqWm}fF3qg!cc$yext`HlUGVu`dV_J6kypL_=zpHAq8Ig&QhGB#&`hKFH9y%#
z^dAa(9e7h={%%ZvqQ7fOHQ5&h|9$xh8YMD^r@vd*)_E7}^I{$ay)5YohySFvpSedE
z@@dE)eLr~*VSoQ@9@XLR2)$|qPaiP8gWPlZoqd?_N5{Nt$p5@VJwxT|SWmAumJZLB
z7yKWQ-J_nN{2;51@0!xd_z5|mn|(wMESfCKroe&ol#lh|vg}#%WC7{VXtVNqpCQlN
z;ASQK6Hjei+=C#8ST`TH{y6QFuj6<7l+~q2@frJzx8ZA)Uy#+wwp0GSqF;gAp^shQ
z`d?`;=j_W0{u?DLtusy7C+>e6myb)|!tYpTo8*uB>?NPansmPL`Xl>JxoY@)7dgmW
zs=d9*voik_pZ0_K#q@J7d;IHkU;e|k`FBM<Bls6&<%jRzTQ3^I>yTR`cJ;&R8vw_D
zu|9rW+5%pMUhS@YBlLFp2J6CS{LCM!r&2zoXO-_EkH_!nq2~LE`B&swpFbk@oju(9
z{*it7bLo_PWuXt3<Y9k~W`D0TkA8JN_*Eh2e-`|Y^j^wJ>x%c4@~SNK>L&F&m2hux
znei-h4%j>kdES+8B+qv6`O7roPM*nY74$dQ+ttn=Z?mq<#>caHmPNnb;(w`vClAB>
z$m{!l{w@74cVF;A_3q_<;>QJUZ-DoK!)dYspZ(3;T3Ou7%w&!UIn*S-8m|`qS7j#Z
z%Shw}y_K2Q#m$UvGK0l!7Cr*)mRUgpZ)eenhFM3sk!7k<K6WlE;q^51W?5%AF%3QZ
zZ<Cz3cVi`Q!82H)|2z15*biHYv^@w~9%!c;`B4s+SuFBqY?EA;?uY(QC+y2h-^1aS
zS>UjI(#G{3_}MdatAu}}mGWUfKVn?=mOrC@7WX$7@-T*cZsm^<OTr#*ly!$Qo9GS2
zYy8*xa=bV@9!Ytb{*V43d~OT<Z;{C%@}ECK4uL1j;BUm&-^1fMi(Y;MrvpEiE8m;{
z<o%>;))fw~A>XijzlJbflVikv`~r7>E&Xg=3LI{j{zku=ikB|W;+2Gk>7V#+!QXrf
zd7jA6+&-&hZ}&)Z@?q()w>#S{^iON_=QHmRmi)e){k+KhohuHnPBv2cndq_ZYzwDC
zKl>;@f`4fTco}+iApEO>52MnaSs#4PNjDVwe=Pq+<U86qZDZVjfy0&bvo3so$oe(U
z@0yXP_u9}y|1U@%Q+|)+?6eA>AD6DM4g^18FSp`{JS**$ZN|TM8-C*6*<I)@jq+`2
zSL6GJ{2QC(_G0R0HoN-Kv||?8Zr@7l`g=J%ep&r<(3@vHtbgtc>UaI*J9c)pRa@ST
zEcHFlVjk5sO0S|rTJ`kN-#lxMPnqc0kMX{61Gv=6JZ+iPE#^_5Y=!yWQ~AExO#ZP}
z{vLKt-|Q>&UP<Sy8GU-AG_au87q9V|ehi{zoyl``X{)S$A%{6|J#eO9)-fAx9Oq<>
z@KX;ye~K<|LDmlcKX~7}c0oTJ4p*Y@gzui@f3kXl|1;oMWzwA=q=9-?!smKf#9>?x
zhimwSj?2okar}PQt3Tv_1N&H1rTc|Vv+?-3H!CBbhWyW~%->G#tyE_22b3PyZs^0C
z*&oKet9vaq*+6>xj&=*&fWL8n?ltr><-g5-EA(MmRx&QF_&plX&u{see=qc>9KMEM
z<*BSqwkz~V;a|wDDco)a2R_I5w&dJ^y&2~M)%NS{>1UP)|I16A^4jpBk^3pj$#bB4
zF3qy(=Ean}j`D%e%e6aDTwW{XzYxFiXR+Q*QGXk_(Iu}({%;oa=+`lDRy4bF!_Lar
zWha*vay~X+i{9P2V&`lqe*TYsF*~|RGMqoVjrqGAy>U_gGIUvE{k}JQ?;Y=3{bl@`
z^Gm-+J`==8cgfnZ-)@G(VW-`azi-}6fX^G%x1JxPJ$aTTuUR)*qTiWUvTuJcy^=T6
zuL|de9~eh@s`5r~@?-IJ89lEQSNEvj?+TxP<R|c6W8T1c1wOZdmz(iF*TAK=zIV`W
zWXgFt?Zx_(Q&&0n)MWMw+-{jHO}j(Q&EU`PZhiNXIVSjN#gEVx9**!L+n?e_gC6pq
z0I$2VXFA&{cE;Z%Hv(ebd5a|7jMrj>_9tj%CR_|0n+jL=hk6Gk?Xs5E<=vgI$N9+C
z&foS$pU?AGkG&yMCky*?8|C-qMZPdj?&<BB_z(Zs^~sxQ2RM1I6P{+)@jFGhh92IX
zyaM-55Z}?>yr_cP!wUK${q4kBe^$K-JzUHK8U4DCU&JY|TiZ>uwWSZ`h3-nuBGJ+N
zs{eKJDKkI5ck?X#Hp|faps&#XGV8c3Df-)#zxvfQaC-v_?h9U|pY$W*f6CJ`zw05c
zqboUxsDg`OZ?{ihFZirVJLy;Zq<h*f3;Vo%y0pM;w`sF)<)0h@zk@!JKfvu*@f^|r
z#>sZ@FZ#O$`@2Ol-QP!9FTP_x2fZPjZ31_O@kbA)Co=M+U5fI@rYq>xZtUr9#;=@z
zqMz|^2%o=DkNpUG6@5B8?LiJrc)NnWcXB2AccnF@Yw;iby#wCIs9lu~^xZm!9{O|V
z-}Fbd_l>yhC-f`U@fWQ>ktcL%+ME1SUa%eopWmcE<7Y?S{{hCaOY#N04_x|=9bAL{
zN7_02q~v}1Y?S#`pZ&ZZKim84=l3P}JgO*P&YtX;9Lmp8r9W}6yRT0#pTn!B{4g!D
zDdg}9+zxs}qu0pz^>(kMb5@dnY(Unln7>oublA^*d~Z+xw)&es8$Hg;w(vdjj?Pg}
z;O%}{uKZB(ckME#V(ta@DDd|1EX|s$=b&sLdH!BHG3#B>Pf@-K{-2Qb%sSB9v$B2#
z{a^61Ej~}oMq78*mj0c!)j#K^?B=fQx{I>@aN}#|fScs`VAhztx_e)6Gq{wx&)udN
zuh;!umP7vM0a@f1TAFRrlee<==snoiHGbEdU!y}I|F7Bc)ylt{^?`dom;O{w$p5G8
zSMBzL@13)W_Du;K4SF;C_|9<VYxVblx3$=xA<rsybSL|}ruiuCEGu=)+mUZm@y_eu
z=T`FE_0Ga?ISsyr-+nB=K^JyHzkG=GRD@g}^T16j^iK4Ay!I;0_vr<_0eo&op1qPS
z^Nv}>qnw-%D)iwPc5XB1HLt+c_65C`b@O6)TAmE&kM6@B_ys->Dav1tUICvk&L`mS
zo6<G#JM{cU^ctvjTRxOMu&Q)J!G9&YI_MMS8|{~ED1BP;zMy&C0zN1Bp2%O)-#XY*
zKG8<>qniJsM4u<|Gk2l)zp$Un&}-xsU7_9G#O1X%j_&>BHU3^fg9HEE3-=;@2kS(w
zWH^1Q_W!HXUhK3VOGC4Wmud#b0yk<{8lq>-xbfLvrC;Fi_Jy8!`w#uFWE;5iOles`
zkNG%`y%_J9)Fcz&^h)<oI^riX>8<1ES^yzhWk$z}IIf^iWmoS9Mfc*#>qsBwIhhH(
z-5Nb`_-Heu34A`*NlctCZ}T_(6u8_HuAW_#Zv_dzNk=8ui05cZPH|6gOu_%XPVmG2
zywuE#Ji3eRSYv;&2)(0zJ(i5lekuLPPM%6WP5k{Q`f!Xapxv_9OP}*^OepXt?B_no
zDf}NJ$)TG6d0atX{x^LUi)lk?rR>=g(ZgT8seA@+-!FY#l7B<LB2Qf{^L#(QAC^tF
zPbh<D(J%Qp3Vd#sjyA8$`6osc;}Z1F)|cVwX4wa&pG!mFZmeq))6BfyMO4h7?0dI5
zXQtbj7meJ$3Hk3T9%I`qH(z%28T13w{>qOMA3fZBaFZn6iX0k?!)Qp3yYoknRlaR9
zAAZNWBl;aZ@OcoqbWhGnhZOxiFRfqDoziBXIz_(vZ~7f@(`%jZc``Y7=A9bucV%$9
za~7ldl>U4rs@|z|L4N`6HZ?Dnrjy963_g#-X9M<gYx=i?)pN4>(=Yi-Ju%<EOIw-e
zI~tu4*$?{rQ&GMlTyBj%lN~)QTkqY*y4j?no(g!<fgh|1y#BpJ^4YASo~CdwUN;*J
z$6k>qq;<AEx_l*Yvr*EAw#B&Crd8t${MialmBZU^@Vtw8KNVhv-8dT_H-XRN&}-=H
z+=A|XV>lb{&d$J3yq9}e7X7I2vj_PPb-yrv-vl0SThLEIuSP#D3prQvcT6(B8u3T8
zf;;2j@M!#OCVpdNLBBeyHm}CHpEAby*>7tn_AmFt$9V7PKgtg^`yS4Q7vuX{Hq^Kb
zabIPdY<QA89~@bf|G+xXGU@OBN_=j{4-s-}D88<B*3I|Nm1{vBe`+WA`7x`=dRb3@
z$hLqdf0eR)bRp;F?8<omy?(x>buAIEF}CPeRYBi{Ut&h~M`<m59#rU6OZIXXxZTzF
zpXkGII2rgbmR;EyeL_CoivnH%&ZlTsUhaZE7v2uc-enI=(@u=nF}^n=|FQ6YfN|L`
zA3;t{(MO>7m5*a2{O>2;d9r%S#LaazF2lv;4I#f_;_Wu|>vFhR&VPGVzB&FIC)eUX
z<ZvT;;Pb88ORT$r(;?@Z@^0u8#Bq#7-@twj{Rn*SPL313w=&#*bD(!r1{Cvmsq!t=
z^P2DRuKIgE<G$)@_#Js=U$wqeu|s~$2jjmP$;NtiAmohCi07E9o*KW9k-+|XRcSn&
z?1qyqwHq16tE`L1C3TWX`diP;+gkl`^QapASQ;_1u%iZv=r82Vf6+&@Tiav@__!QC
zk1qN(N&WqJdx!Ww7!FT^XJJ>n0i6Xt@9p>P$zh&$o58Wjs2=!yy!z|o@|c1i2(XEM
zx|tMq$eH+$c&!_f*2edw<YY6gi~Y#;){}?>S&SZd>(vtLW{r4_sRjRwtrH#bA9%gN
zY3L&?qu9R$J<7j{&-dxW3;G-VTBe@B=lA`7LwM(T5@TIk{x|(65wd@XhFK$%L)f>Q
z=<@TxyEjX2hM~uKPh0kG7w3|)`_U6GbHLA7kJ_^<JClD)IGfm~cJjH&e%?fv)hK(<
z{$*ggbulgz*_+k$c_exYKQ~XO6>^@+zV2sT-ag%vzKLu`4;*%zmH%QcJ9$jDs#GC*
zWv23h&jabt9-?dJYPV7}<W%Lop9#+cpN~#=FyC4^FPvJ`zlpvdua1oLrJtM+ZiVjL
z6u&WQSEeW6`>&GI1LM|6T=R7Oo0Qy|&L`JT<wx39{bf#pw=3}ZmGmh4uOCaV7v%$=
zH!tw{>GU{q-&k6q-l*sQ=-;GAS|7hG{e)hRzL)er@Of2wEIwBA+65g)?8Ct42Jj^0
zQ<Vij&RN)#LGPc9%|3z;9hDFNYQJn(pFLSC+h?ogMHvh~qn@p@9rS02-Q*GUtr>ec
z=ndG*JK}GU)%#GN_2G4_kActI7v*PW2OFol$pP98`r+Ah_&GE=I6K_;pS%ydZPBmO
z&;z%R%l1(|70(fJmd6!;o#DVK*<5<k4IXb(jN{etH|{ZCna#HDd8a0uS<vrQPfNIT
zXSOxHsDjTk&94gf@mP94&imYRvQftI#cWIbR*K))T=`+n0k?$51I5j4gKp;umwVdf
zzCmt1joUBT&iHJME&|bd`iuU={7mz`$)OQDxQ}u5VotuJ^-%_?JjR#ivX29|yW~4%
zzuEV7gmcla8s)=I?hhZowZ0GeyZqR^r~U>$PtR(xi?_%(_3QuW`@_RGt+O-Wec<qs
z`6PU7$<I8;c-DsBQ_SCH{LjJPc=<PYoI;=TEsAz8(N3(Jd*=t}Z)M!?F2?t=Jm%Rb
zan8qOvL2PL_xrFDZuIxS=Nt0l>HpWIy9z$b;B(L?@<Y$k&!5=O(+WHAHS`wB-|aKr
zRedMlfjq0&&s&i5RQC#}vVYub%eT{B4SXJCUhLugaFlu1mOpw1J|lnK4(J!MlV`Eh
z;{SFp`ccK6>jm9@_F_-uWoVM@<}>W-(Qx<#J)fukjpCelE%2=c-0a5tuo)cw9FD~c
zmoe_6*|~k>_vqs0RAYX{aq16yb+Y<<i%K5H)7H`cXgYd~*HnImdAtsDmG5D7m`BsY
z+p={su)ya-lD+Bg58fHvh1^>t+oJEo+k9NIBfa~K{XCoAi@$}V)%-K3vQ%X2DqZ7y
zyvwl2x)OHsd1gQjKimKC8SBnsc5cug5PAB0X*K(Kx4&_F6a97kykB|)KJP>h&EfOr
z@a<$bf#w_6lGkTu8<)W0%?sRKB{C|J|9_o&V%_-yJ>E~OlkU#${ZRZy$dzWp#p-01
zJV~d}t1nBH=^XU%V@#o+^WDlhlU)9g<u&la4WYm3qtX-Es~@qScPZ!-;G4Vl$q=|#
zn;&dax?3?%w^F{Jd9hV`vVQ);{~UT+E<PjZySa_ISGLA_anRrN<KS`&_Vcdk<@920
zX?{^p;PX!C^PC@E1_yri{@@<kZT$b}SEc8}`M3BtVtmW^m*@NY_~iQZ405mHciyWg
zzZBo~>Hih!Mb_oW&%bNI=L>MWDf+AFS>{C(cJv+v{hRb~<2{|deZKbHYfERVC;ZX}
z`M!hGjmzQf&!u(B2Y!~)L&$T6)8-5HGvk-uQ+r-gW!H8{4oPZf^XYk-G|P7TfAo&<
zIq>%5(r2ZP*)B!9{j#(Cz85V$#D0p06fOo1Y?Yl%F9s`rhW0xf)t&IeKa`zBzeV+D
z=g`Y$v@z-_gU9pzZfLSycDnIto9wIIklSI|3G`)9a)iIj`b`@*&7-rj<Ix9;$2da$
zUBoqS(zgX!^mhd9Jl(i86*m`hZp^>2Cp{S>PUB?u%xdR@dlvOSjPDBgbBlIb+owOO
z-N4&-^&@V3U7wwj+1q+QWq0(z=bhok7W~Oang^NpuXZo=a}9b|<M^y~XE+>LYuyOk
zTc@7haOsQeMEbswe`AkgUNnb89pUY7**}c?@1^GGVZT-7+ZXuUHow%o_`v&Cdy$Km
zvXpP1?BrbUNbB-D?x*Zo@HrN4b%p}N@(Yyv!TI1O{oh32ncw*6Y^8a${onLM;B0xa
zEgU}y9<C|v4=<y?$K(gnpQ+;V=F<!BVdgs*?OyEn<>Yx_elmKzL%1t?6ZZ<k4j3kn
z-5L0)<9_!heZDC_Ry`Bg!>7`lhWyQ;|4rOKIZFA7aQZO(^6cV2;!Ix4_ch)#;qxKn
zQ7Nu@FLG$^9l||~;~aMLe)O=8`zU*pTMPI+hyL#)ZaQ$bDf@YU;~V%q-}f`k=#!LR
z#o{|!J=OlQYk@y2-Sn3&tkftu)O>BD<vpyUC()=2^lOzoNQV~VQiI+dy-RW=eTx?@
zjzs4dO?Eae7x7@9gWq?(sC*PTgax%TJih{dp9w$TmcQ;0{j5s1PUg~^L#!l6!{_Dl
z9fdv84D!s%woUd*4yDmu_z6#BnMC>1;9oy~JsM69g2R`X0b%#vtemVr$-XpsT=Gci
z>Ll!uY8Lom^x<&7yV5@Bm(p#%N8I(|<RJ6+DDiR&eGfnLk%eB}mmFBopKwC+d+90f
z4<4c2I0rnKTn=*s{wn(WLurM2V*Y-K-jm(?0gI)hc=dJa2|fH)KRRo7y`AfCrPtg~
z2|CX@{H@KN`^!x0Bd=7mbYJ7*W#BZ*&xhwrwEumnTY3b2a3Y_E9-QcQ<V|qwUHE)B
zJ}Z(5>G9+{*E!%##>orVejoPI7Rp;g#YZpH@3-ZDj(VD~qYo$lIij^MGG5C{2lyWK
z9|uoslHK_|{^PTjdxpms{G5m0g8m$m-fY}UrE}6_3jWVWZ)sevfZIPgx4n}68};0V
zkH+TpHR^9m%Wh}Sh8^{CdOLe$7{A9q;qwpd=i}gN#BcnUzKrE(K3h9Ed_KlJXd;T{
zI(oa6D4;9hfqabW5Bm4%73Pa~J<_XvX7G3u|7muGao=7v{58fW{LhDJ*GuI-z2eN<
zwuHW|Dm8;cn{fLu@;xYNo&7KSwe)JK1A8{)vsreR&p!MZm(rVeO8v8A3OQ`8T*Q@i
z&Hm}!GiNU!UC?*VZdG6G8n4i<8@bt0MY{)NH<Qy4yQ@pcyNnhdS;*(m?BB+11S{tp
z_GSi;4=?0-VRoIr4~6SzXG8S&eC>ujuTgJfyVGOfVdTTSCOZzjNpgF30r|Lp51%`U
z>$n3w^y<;<eB~#ylg}joZ=4SvU(_T1xxnEYvkT$vM(@KOjV=>Y<|Tk+OK~3;(XV>q
zFM`hs@f#=U|0MCy=aSQg(%PbYh4;G8vJQ=g&(W{-r99uHpH=XyHTkc}u7U>{T;4S9
zHE=V|2b<=%`rUWV2cumls`)?gIh8$qiS|E(&mm`;ke{GGJMmB7qP{<!8-`waNd%s@
zRsXj6x#rPI_Xne%a`79N74=Mp|KUg77d__DvFfc%wsjBX9Q;>^U*0q>m*m&cw_*Gs
zmzb{&y#E^QmWzkG-nb0+d9i-WMD;iSw-@Cnct_<T@>^HB9gau&7qnYOZa3!VnP)3X
zkE<u-w$ysi!o0ZC-`g9P*U2Yv=`Fu&1C?IZ@7N!GVqFS+UYDQl`@YT(qkp2~l#4vi
ziIHpst$ueRTu)w$1b%G7;j^<=?8z@mOPt$2%folFda5DW+2nQx&X@RnT}JAEXg5yk
z&W1<F`}{9?zEK(uk7He);$*oEJF}h4Sbe=GI7YePe{*!VCC%V_$?e|K)+~dFKi@Jr
z3EpmR=KYJ`?6=YmaICHMZ4$hW`*I>Ovhgt<c8DF>H~*r+E%}v>OfJnPD1R+}nrQb%
z8e6H|qqRFcc|x9za|`-CGU8Sw`?<MtpZedBr!LAj7yo>@b@vFC{vz#tUV6OXzlNXV
z3ia>p{`a-|^St-FPa((V{25o87h`2xx`qG$^U{||$hWyXNY^Pp$Nla*ts^g$epFA)
zmyK?0iE0%`cMbVG&;J~9v(lyynAh!{8{Vwn&w9n?MEq3nH~$kKM<%t?2kGq!dHPS+
zzIRH~YxU~@evuoL`?@qbJx4ti{LeSg`<>zQZOZdfr56<XIT?OcC$r)Ao%H16(%$UK
zu)Fq!zp_R+7rfE$mX?l6FGeqO&*XpV-<?1DUikby`}w>=|IbHn2A>Zt=&RVzo9LIQ
zH%8@B^_5EBv7gT`>c2jHL;YR&J#MEb&zEk*PvGHW=}P|A&gc(VPoIL%=b($^NI#^1
zqxhrmvo62n9``u~Ki{S+l<)5r(F0k3<MmDY5A;TGc4<-mW9v@5ce6qHhU}WU+0#D9
zv#alfZ_7$`*>Ta|itJzHc>q84ee`;*)9_7vR%Q!|`BIf#%TKbR)E(XhJ`Bi2Ai&#B
z+1=*Z?WMlT$GR{x`@niSi2eMu`X6*Ub8bQ3HGA7WyszD5^y{V4H0?(D1C*}=SGUZT
zWaE-2OM%Bx|54cw%C}%=-<3JzE*+<Q;MK+18hj+lp4s);j>+StbK!E#mus@6S<bI<
z4Eh$<@qcNzv3hRF?)O<u%P!8Qo4<>FHsn9KD_dyXTZ`|wAe%xyk7zgI!rsa5ppToo
zXLwsSnw|5Gbs*@UWw)3YJ0vgRC-OP0%g$5II+)$$?_M&pF1bz5MHcnQz@6V~e7%^Q
z-M~-sK0Enb<;4l0M||(k+V2I2d&9*j-z|RxUxVedyWKkbnRij5em8XT`~1DPIE_o$
zC#%@WQI8uv`Ca;DWy`Np&+nye)f4jBK7ZWjSbmS|tjq6-uM2%H^RD2-K6ixMi_E(h
zN(UC@`StVr)jylR<39859r1O6KTY88OUjRQKV>modbf0$cEfLTW&VotBiPgbqgTuL
zH#U|3U;dWgjr8|>(4Uac`Try8F2L?6l0Q&S&%|B7xVyW%lLR6JNFYEGG`PDv1eb;2
z?u)y-v$)IR8VJE5A({Ku+5f%w;o&<ueP+6=tE+!qU0qcjc-UNijmNw<4Q}5>-@8+u
zjo1gu_f=d04|bwwKV%&z&&Bcj)dU`2rC*ET`d#q+t@0Z<9p%3kxA|Ny&(bC2?eB7J
zajW*?W{Fof%%lze;!I{!^D&vL)%ABpc3ruj@mfb_cY(^&*j5^{>)elrdy$t$o?U3&
zsYM5k^OGlx=^38uYHu-<{vfk4>l5!iH9Sg>XCW__>EG4X;34p%ZCYQxNI#pYy``)i
zPuc5ERz6Vqo66_G{(X$YpXwQ79iEXkl<z9v+Vcm+{L_aK^p{PQf1-SeJ5^4x|7e{4
zkRz`;{jc<VP7{B@-6=E0>BX%F;s#O%I}^?HT^aXAI!{|Gi=WIA>iJpusdV-W$kA5H
zJ9>Y<ER!~G;tjur{k>!jmE%nL-paqwt{dsuPI}3j_owWj)t#HIE8kImJM6pw7Z#`A
zD&_Zi{!8^dX2rXhK6QQiUGD5^^t?<IWd`qGFaJf~sX<Hcul{$Gf2sU0vbG{ykkwY+
zNBMKgzxCN9y(CA}Mow}6DML@Wu~NT`vFS}dhC}I@8`Ph%6aQI0X}%6l9~w9J7~yY6
z{alJq&l{&{=|?$SH<a#aEn9hRmRowRH!qg?KAoO$Vfv<QLY`-%_j>dnn^w6+`>XPn
z^gW3YvFN#D{C*O;W}Cm)Di3{ldb*U4%`9Gv?ofqq*UFw6<t@@fo*&N7xr#09RQ`Cs
zR^;c?>@sWR#`;&sYI|{c5c)dk*SB(dze8UQFMGKF%9+M-C^{~67QD**sA(3o&2Dr1
zk2_=Z>k*P1Hx+e-W>t7MLOoOQp<VjfO%#o?VYII1^!D@8YPT0<+4;(ULDxuDp03`1
zD}7rIH!iz6J^0LccIRcXx%Ga!&&cQdvho*{cePvC2aa51mpvTcu2Fuj@~O1&oyqbU
zPGKVMYAdG`J8NfGpWD(iFJNWu?YUie1BL(hURHt4vXA)1eJ+<xoou+%&dtofv&`}V
zaQOcGQ~a+<FJEI`g}z*eR{tEo<u7@y)U#XszQT|D%;vuG>%PtVH1aj=i{7r<eFRFo
zJ*Ipn|1bVGm%rmX<#pk3WS4ode3kc0ZTS8fYjIcoc-OA1FI@VLwWkhz7*}Lw?LXyH
ztUNoTJ1aicE{l`$i52}jvn%-%K7Uh29R8x>PxaJbzy4cn*@kjf@eldW<p=#*|9C{G
z{}A@5>BVpGq$WR%x16rW{a4BA|8+T~_z68S19-l<{xvBI+QR$l3HcGRTfB}RGJ_WD
z@b^{rjWx<^qvy{uvbv4;Ia9v+9hArYg42q>=$5r3uM#<Ur97etq^oaT_|rJll{cvx
z9rF2d|6-L>m`38d-8qhLhZGyiP1Ju*@w-#Ph)=oPDW!}-=vkuv`@LTe`dx+o$m4!~
z@dx>PvwYd}k;nWr-`i{dCiTry&r8M6mHD!-&k5Rpw8*U*ua}<`KPqn`Pxa+&3p4mn
z7uhzRUsHUb{$@CN#fJ6eYijCd7Oe8@88EsniM_2{f}WcG8*e^D4y({l2Kn~^>>T~@
zuCsr~jj(aQ&J?^kj+PvE6bv+u-OZEko|@vBYiZSUVE0I6<KXBN6zuM~H?%Sz)-NN^
zi^<SblDa<|*Sq>1v8T(7@4o(bI!43q%I3S?S`nw5f5PaDmvaXmAidZI{<fCXrXBqQ
zi{mt{Jv*}7Wnk7(WMgmh?MyVqJ)a-?+YFYh@Z4eAdc^Nl{;Qo@%RN+sMLd_Aw1;fa
za@6W`+*&vbW?Z7~)6JN`(l{3ynwncn>N{WjyFAJG{9-TN3~iH=Q~K0M_Pq6M;XSyD
zZE%WVe#5DesbTgSI!+_^4d7%G<=gYB8O6iv8@xST-BaOAOZA@<_@$op>NyR+H-#hN
zJ#%=f&34>@-pvOYpLT|)v8x%PPg7GD{>-)TVJ&;|YVy%ZTnjH;9%pk?16=>vJ>9=M
z?UF$n?X|UWsQLz?{Yam!jAb`?{+0dp73gT@`%&JD{ckJp{bd9r`b<=0c~Z)?PA`J5
zA6qXYI<^|jjI0d>X{wh#wH{3u<<wda-csINc@6!%uY8@9tU`Y~`r)<s6LR=KcD$P~
zcj7O%-h6nud@6lmZx(s0R?B?bGczoD|4n&!`m&sjueXx7$Xwdid~c!M2k?EK7M?dw
zN2a#)-Z%|@zI@A=&Nky>J&();(`kvkfXg*-cuVrxNk5NH-LuUqTIgH!?QhB%`nhEq
z>#X!=dhkH=;u&5}KbTj;c#)36qi4&R`Bd}nhH^Yy?`yqj&sy_@`zrRNm9{r`rsS{C
zH|FvCYr&`f`|?^^U3KRRd*<)UCOQFMo^!+UB<FRDvX9Hl`O*CaPk2z+y_|t#ujQNS
z)wSi(aJioGnokRN7f|+19?`r{mA6`#BJcb{?;YY_>+|s9y@{S|4`Ez(%6segTjk&R
z#%9Zd<+HRrcZfQLse-4g%bJB7FUn`iH`E*XDi1^F?&PLnA!5wA*f-X<S|t4eyTQZY
zY0W}xB>ed~4{!5k@|V`(yBc<m5qk5o^1v(iPWW7EU)!|k$x84R-2P3u_zAexfjpMz
z*xCs6Eh76xJ@F_{lk1W6@d7%DJb`C%&N!=hAMVN6UsR`s{8>&_9{aC(#h2u=9slT9
zNB(5r2);FBkN$$(bar0&JALI3`8R%o?~%{{FY?hOTT-mY_toXW@G$Z`><#b3|7I8X
zJkt2cb7!8m6PNNjy>hB^#2Tb}O?j?%!~5a-;xc;Q32~pS^6Kms)p5CrIF*}~AI1;5
zfIB~yPiiOPJMJyMh5zHSM~aBv{S0o$dQ?R}UL&`sm9dERl!eB+5&3_9Ely?aK9gNK
zP9NSae=h#Uw^qcyj`inSBkYb2{mQee;}n~uKds$`Bs-#z{&F(^BX6ZSXv*wD>)>2y
z8>`e2S&DjjKewL74r8p+Pp!Ql)33L~X}RJ_)IOx;EB4Zs?ta*n_A28e>~6)*AQqI}
zZxh%(-Ad3)9h+IP&&am&uP5b4YN<D^AqHK+2wqC3|D$}M+#j{W@Mo&g+`&w_$z1rY
zyp>(LgQq&NQ_qIWYs<%I57p?<=i<S1cHaGH)DL8Dm(Sp4+w8yPZT|N6?}hsOi_iDe
z3dPI&)gAX_&hz%Kb`L*zH_<EJF@}-HX=6ChJ#8hMUp;jGMVIT0Q?2!W9b4{l<<-1V
z-R)H!J<&!D)nV)+`9$5xtp{~rMd&Ak>AGXn0C+Fv!MfZOZbn@0e)a;@VP0f8ugg9V
zw;6Rn=}PUyyxt?t)Y=8?8y~X)9gOZRvTAh8W8rB+&sK16G!DOpYc119@br7xgeN=q
z<B5Y`8!|pS#^_@B^$SVg$9p}^t1hV{9e-T*pVX38Cl6-or@wCc44gG4Z=(NWx3@>O
z%dl_vN`v)rTV9U6aq~&{ScNtZp_4h9j^^dwpVo43c}MDI70A`|vo*A97GAg?$YXpS
z{k)rT-#?9ja!;^dbi$R7og2PvKFy%t_9b=BQ`2m?ad~-H`po`Iyho~QZ#Frr$|D-*
zUV8pJ*6wZiiSsJb=0N)4`ttSih2*YLJno;|UV`?;+Rezr$LKkp#18lSr}}kdz7aIJ
znSV$NKAD59JG8sBFuOY682zuw<I#dGZ92ciIc!Cp_^-`1T6dJ=;QhYnk1Y9PjPHl)
zxe6b8!LgHR84c44K6`W0T6EsSk9v9Tu5_~7CC!BSa%&fJjPA$fu6ci>wXWPRy=QOo
zp7`k5xbZf7NJp|(H*0AfdKC`*ozE~c-zd+{d(gj{iZ*znOz9ElZ2K7Xdz}Ny73W|3
zvr3+fvJu66<9lcMI%{4tBXD}Yr&FKm;`t`RyXWZ7a`%}-=fL+J$ahw>B7NQH(aESg
z5!U`yaH>xcw;ivA!=ZiG$d0uxyb=5>x~b>yay@Hf=<}bl4y-VbCKho!+)rhLB2d-3
zQ`9r(tpxCBuJPzyRQVQhC^hw>8b6Np?!M~+$K^s$zB&AhdjxvG&$(vS)FN*4Y`~5j
zZ^_$O)HfPAdq*?%%VQ2l<Nofsa5eVs%an&d@X{h~VW<U%ll3C(9ibJqgU{8p)+syM
zdwtY!tmnfo?kxD}_7?W&IxuH_d1fId68$$656M|^ZgztACz0>l&>8mL>x*aEa#z6V
zjz;yZ^6?_CJbzd5x&8ef+3iKltJlgG;CIyXUh%8-V>9?1xG4@xdHn0=;#!z_M)rBp
z9MzwczZ7!zpeJQBdAporB^qTN`NR#JuUpqT8o~GMM<R0+UcjQC-QhJna|<ABiG19B
z@UFFfR%aWXNxO{*@9>kGg*$Fd@GF^vrfaQ&+vD4I5UP&#tFw}=EB5PO=%YJZktS*3
zC-&!|`q2~muz9NQY}7Qtho$CX%-Hbd*pEH=1}j7I%brlbuksFbk_%WLp7z|&R?>rE
z%Q&@+SL-=y|5*7KdbLt7Hq+LQetE#Wscn9KZY6pMk4KZUg+}9d@?4}YG<Nq*kcB}s
zgTZ>$Ivruo57g72<xQ$Zi{8Xs8YK2_nY_&7X#TH@fBA$D%4s+jwvzMctpm~Xa`}YT
zH`kj9WNB_Xi=Cj4UEJ4YZa%lt$5A-gH=WI&Eu#27DjTD#xpjR9bT>?k*(dtYI3JPM
zF06N(@JHznZ_k5UGmZLFW$*N&HSA*8QH#yxZhjK|1AWTAaBmi_U#vHC(n%4|XSN>?
z?+4?+cy{uk){tJJKFjhu`8an^qsXg_w&=PEr=CQ|YCF})#u$12H>D@P!#?&nI&-`^
zf?gg`r4v(Po7&Ae-WXcQuk6g>Q{T>c=r?7Dbdz(r;n}_V^{91c9Zlv~JR6c7=U$VL
zlU4NRKiOO6i4VNMY?xYp2ZtM2Y5$Yft7V+*C1>F8g7UYthgtNj@{Dwjbj~;n|K2V6
zWk93GqK?*?Ka=Ijn?t*PfFH(EQuRUkZ2HZ*KVP<&ZOF@e{Mh&|l=4fU|1=ul3uz;q
zTT`BtFU-y<AGa>NSgsW<^jV5nfM!l5cFF$jT=?yDO4%<>5ncJG{mju$6BcLZm-m&Q
zqzlA?7U^PpltJk*`tYRe?D9qVJwC9nI$vIr=VQ%DzvgnPmhZyzhS@Tjs&iPn{iKu?
zgR^Vh%d`b6?kv*wD}3&fu4hLu>-EchvbZypHz+Q6uVyhhJCl4K4p+CQ4NTAfLw~-J
z{dP9%4}aZ!D9t497ys2+`AzV7HcMq4@qgn;cr)jnVIO--T(dhA?66)_9_MBwSv`NG
z**1WS)y&HeSo=HC7zWulxQh*bHZtB@v4eC|-iqCOx?ExF+%FYY5%-B{S61z;Q?cA?
z{ZTo#=m(F#5m(qlEh1TpnBRfJb)eq_I2_|ILZs;Lz2v@?YNSqn^Gbd7;ca(#U(54x
zn$wb<C2o~!kZs}j*pH5Y*JI((f}*!lnOKU-ZTIXLaofw<a;fsrdzX2y%D?t2;ueEC
z?n@bD#&-~>64KsMe*V__(~@31*i)_fL5JOKuupeV!m-oQ5qUSRDL#RJ7r^gUW@jCF
z>3X5Q1-<zp^Xwt<DWUCuUp`SOZw1dUGVecTuW(Z<P2|O*4?3&Fr97ve>-cemSM}rN
zYxWtjF8qe?jhx@T1-F~iAAiD^us`Q)+BNAN@47K?j2RHS&U)IdPG)OEkH2UuBiLS(
zXB(q9w8xH>f5)x$?LGUQ^=7_4j#2)N{B(WQ8xd6Ru~!^TQ>xORIC+@ttyOgXU)VSH
zz=4P^=uHx4n6K-~ImWD&b_e5*`(aT!2NM0ozb<Ygtp^ha(TOLRQ}eV_J=>q=+*&(v
zC(+&{^$xqBIIlel)^_#FR1_WI+1JeR_4L5=U{Y6a9;P>Ykd%_luVP`i1JcHQQAa92
z%fDNvzVNfFJ?UgmcV^+AB`5#ZYz;kF_ng;SXPPQs$O`p}Iy$Fe^sB--VSBTJ9_9D%
zwAnokz~$O>)Zo56Wqun7duF6Tth6;@V_UuIOqXrQ#&H=tZa>ej=fBa?+SUobI?!D&
zf!p0#3IBFJ*rCFm6SXrTodY)qv7$yE(eSmI#10<%+wuIBBWg>Al+?%bWBH~0UXDr^
z^Fww<$xCJq+>P?}c<>yFjO<CjTOId+Q_b}tZa+WM2t1kMF8A4J7_U5VyMXl@vtxwb
zFFz}OG&WAV!sSEt<Q;fcpS|EB^txr${a9`Bxk*+luWeWC<RlHIf4RSue*Sy;0)J&O
ze`e}^sXu=9;TIie7u94}8Dw^ir>76Wna-k^B1_6o<u7#E^>)Z_`91Rce3$sx(pPHa
zOU&Cg)`wyE+(TT;kygm_oZIb<s$b-V?uFCi*yFeN{H?r2_ha3AM^?sXtm}8m2NF@U
zY*G2?PJJ$5hhJGXOl{KZ>3vkoyOLjs{u`|u7qi!_#>a=tSB&-p`5ZgJ-?h`s*lub(
z9+eB_XZa}H#bX_hXsn2@dm~k|4_fLpeHdzgp$|_aogH}Xe@!1gh`*b=PUtXu=5dRG
zn}YI==7e#f?|x02*)y*)qeeUZj=M4B5za;4vO=EXy{_iO?s;Up=q6utXmu@}X2-3j
zec*Qo65Iyv4Pwpd=#+e(+064NuZh}G^x1~=^ZDM}O#kb8{yiGnB7PKc8~Tpf_xW7o
z?w%a&$#DjU!+Iw}GaL?E=Rf(*dT~^_pqLDI)|6k<*N5PLpW+Pdo>LBlYfa(7Z}7hh
zSsPI7<MT8361MYu;PpmY4L^;z<(Nen?k!NmpkjKZd|@%o%!Wkh-&}iRit)ZRg2NL?
zQ499tp`@?_d>-yQj}hgq^mlW()&+_!EkeI*Mc)qkYG?a<et>b_Q+edeJ-Qf&+RpUp
zskl{}zC0Ga@(UMZ@UMrw(UWKaU$buv&Kl#;_4+@Ao_vaRXQ+|5O+69+dv)<3JJ~mI
zx-Kcp-8VQ*d28SAvakJ-eZ$!g&FcmI3Ov7|ctZJy@cB=e`hEFMF%g}u<w<(TYWbw|
z!_f9$FMm)^1N0=+)I{$)@OhYjC#!GRk!qW3aewBowDgHe68wuiS)Fh<{M|R$E04nu
zE1Pfajr?G5w=oh^z4tfWyM@_16~?w8MH{JgF8TS(Ue*~c{W!Ggk>=-S@ULTKMz+=O
zh4kf!foV(64Le?Snr>tT9|Er%;tKn#bs+G5Z*NC@?7nys_;(eIYOm!JU`W`Zb|3@O
zVg22tw~ac_uHboZE5<=k_zCun$=;HG4PBkd_myOCLiR}cwO@y;;UT(WPn4Ypad$*>
z?KIf9w-L|1*wcgbV3xL4myJ??^QtY5Z|v88?g5zXof`Zr!rOHKO>7?N3s3BJ>?GT;
zcQ%4k<N4XmH!kvada7X(t)Ao>MPs8hjeNGG`ww%+%5=7Gz8$PWtzmoI3wak_KB2wH
zCbt%@4x>jm*6vfJ`wf&&gOej-cagTxqD&cZ?JD@TTk33{ZRdWgIeIl*mg<&pxU>8i
zbmMGSI{##x6`ju>dzewUx>C;>^t|c%?Z%$u?kb$P1^y1q7Ku}t<n4j{?fO{5n!(dP
zxH(2PlzBee@{1m8<Q}2-4l{0TvzyX*TxrICZmu2Jb@JxRvCk6ta7BNWuQ(kDhdwL6
zPov<^Sa`ZrZ^gx?y^Q3q<?p_)r8j<(mXXHh^xnC+D>4Y)y+VUollD-1L+eD`LLB}U
zdqU^P8WVUpf!-fpL7$e3^5fO^Q~6n1<Gl2u?0)CKC+f+$`AYQMQ@)?BEI(q|cnBX(
z*PmPSy-DHQWyjpjLOAwG8fcAgA)g%kjnDD<WcBxfw>^}vmtSL`(O@rC-a(IMuon-Z
zKb@ElF{(|S3&uQZ$4jagx%gJz{O;)O%bz1|>ub*&skeUAqMwGHIqbp{NMk#3fCD`L
zYk7n3VLv@b{gFrG{Jfia*OE7q$Xqkwf_$c5<Gw66da^jZo{uqSd%8Dv4r<%-k{X;H
zNb-te06NC8$1F2<>&W}@-*UNe?`eNsvJ`!r@20#SzmBc#GnYGu8{qB9^7zjtCH?8w
zb4l<m@HuY3Xp%jr{msd51Gp=@B0F+l^x6g#o0%6Zpv4KK@sF|<T<eQZLy9fT)<*0g
zJNvzdyd0y<px$sdveq|YXWqd)5T{klM<uNA8GdwI6x-rrOM3SVqvNKTVgdQ*p;RpP
zx0&B#UtLe0yUp~k8NcV9z4v#yM-h1YE5D9eN_nFdTjNsb(VLqOah@0Jk@GC&gUIzU
z@Num1xuA&b!GXi0d|v^_-Azj(ZuDO0!`Bp#JLj#FE-Xag;l#tmB2>1L_wE@uoTsa-
zJDb7bm(>&YmTQYw;rXAQ3+?PP_`HeUww1U4daLwj{OclW?uGJu{0YrF^LE@9{}~w{
zP0mtwfby2g7UFVvX3QjSq3g(9pkH0+jgkL8q;;|$we&q!^6ut9Pm<EwD0RaBF1~lx
z(@|=TeBu*5x50`r##23v<6K<q=3kq5zO@-V6RO00cT4@>4IOip^wj=hqZ8JR!(e;l
zx7i-v4btKv6@(Z68u(^yI2>;EQNsz!<GkY-l+IM!CAikk6Q`La?NM=|KJ-EPd7j_V
z6BonEp#3yF*wXha{lBe$-2?ABY3VdleyDNy4(<+um(TjOi+>%boqyrWKj`l3xA#!g
z-bn9{{@Xnhw$2{<`~%Irxsg0nJ+I((C%8uo;FqyGK3Cc4jB_W;7Hk|5Wxd4N<*s<&
zRM`T#PJgkdZVXK)I;Y%)6g6=^d9d0aEytubq-X(uy6s7CSM+V29bvR4q^5dz7(Cqz
zU;Ej4k3m^oI(lzBHD>U7G<tjTr`R1bAE|tSwPqL`-W9D=(sO1f@5JN|aBI|_%7@_9
zLg$d1XS>OxF<9@%xQ%iX<72O#`uHB%T1M#oK({39V$|Y3t68YMn_Xa((td7Pi0qnm
zJvYqwtS+BPajVcc*$^WOTrK|2_mP5?p5M}F_hPRw*Nx|Y(lti&=JId&+{FA{l@@4i
zFnrzzUp}KhZ;X$VWfzFsxW>Wxi=pH3vK0isy2?Vejd5{KV?CM8KC(@IGHKw!md`eB
zTdDs)a?frlPVc~qT-VLrOu;VkDZiHG=)NQ8-2^{}=9}T?H2xp0wQC;+kL&tACf|{q
z^b?g8-bPQBJLDs^R*3UnUYQrW<%`JIcl<Q^=+zKj@>?6PD&D2D&{y!63q7O@J$Vc5
zw?fBs{e&0rIqp|HIA4a|$m=}RT+P^3V!gA|&f~U%mi$KI)~UML^YFc~dM-y#<R`y^
zePSTazLjsHpS9uhGEeuhuJnd8Yvje4>G!_wvzkIGXYuP3y9d8U6eH*@J-j!M9cB^z
zzOV6pgMZsbxGC-^Z!1RNIdLr0SubTi^L*y>0qeyIv+HB)#|ielU5eSHpx~FWO=ZT_
zgsXFmUpM$!lbjBNzcuw>P!W3gaQXSi_)F6&wkJPL=))&_ss&u%2Ay%Q%I^3kb7OH>
zrQf59EzPtB+TYQ4o~4CYE_|3@Ow-G{;<<NJ4<CbKH-F>a?%DVf_gjTF-cFp#a5Hx+
zpG)A)ALVK23%&Q$qNn+>8$0u+>i?`fzt~Q%Tgs!+32Hq?f1W~G>cQuowcCRK#;fiY
z>6|XXhi#$f!^JKY`mQbBgxl@X<@EpU=+7@I4}JS4<qKITZ!hF*r=P!rzOcjmSiE32
zf2}-s-APHD4{nM-ZSnR{pRKKtdjtwtJ5Q_7!?OTC8u`8_u7*wOAmiT=FSpR2xJh>J
zEKbr|TaDwMyWYyXn@?@*&f>n3!C83ab@j{^mD0hekA2Q0^|tnYthRB_;$%GP<NZ0F
zkM(PNSkM`TJjy-W!}slQzoS)SpkHUfm>nUs``BPd(08zB+meU9l*TFL@%|P2qEq1R
z0Ii&(K3OeT_V*&C@}HB*E!1|d_7|Y%dZ^Sy{U>>P7r)-(`BB=u1FFS7=wI4hKudp?
zUOkWmKI8wEDE*h&cnsSAM(IeOA1HA<sq@Z3ax>k?eh_QbS@3zKr)y@H(+)1l_Munz
zflG&a?`-YXk;i{aw2o$vh}$3c%9^Ls^{O5$--q?^vvPC_?NuMr3QyOB&%4uyzm&JG
z1&XISpF2^x%yFoE2n8Fb@Y>jzJ>mrYW@}0_^{tA%AuL30u%c&fr0(IVp7CDhew1Na
zA6)S*IoR%C4|-$7;hsvb9jnJXq$zOiF#2%#@QmcgH@&haIt1=@$M*+PoQU$vhmZZx
zaUQ&FVbmW&U)($Mc-mO$*6bL^2E6!;^P*w0S8WPM`^zSHJlrnaqq3`U`=tCKEg?e-
z;BQz*x~S(O{d}7Kd~k*DZ^P$~+I^bMzOQ}3-)S}}Tf!gu0H1T|@8_WMetC{I_g&_1
z{OM*Uevi(UEEQAoL-cnb+};un9h{{+q6?bPANTdQm$<?ys5^kYcT?2Ut@Hh$nVnF+
zhxdoF_ijVtGJ5d#=EeYiZ!6H#gPq|Ny||NqM%*&ej=%Hq#=X7r<vttncXMYdyxTiJ
zs-j07nD2xyZGB&)ALHTkQNFiw?zRKD>ELry^!MlWe~7oM==Te8Bk|{3gpPsy@204I
z2(S6=wVq|q=5dPH+wT*7=Z{M-A4x{vRbS+JI6se_?!mnCyPGK+%bT$jpZn3Xe<?4N
z{pH+z6LPVhzw!d_w-y!Aj_&X_|GeEu$p-$)LGK6rt2?sy{UJZze)NaF&gmk1Z*}n~
z%gvB2+=I29IX9o(cs?$$hQi_45B{CIYf=BZ7E_J4jLpS?_|#C|jV<+S5IuRi+8V*<
zW6jfg@;aYpHjBuFYlB!KM;1pbZ;)+QEX2p|{67vRzxCnrPU@FgsEGBg7ku7btN*Y+
zhn~=conyK3ChX2zY430Nymw_>4ujVNpwF@N?y!HK0H5Pr<9PiaZ+@IuEV5pFU#=|n
z@pfBrgae_;2kaqRL!h#}q1YGot+FeN)qKx-rPGT!P@|@MVfRpf*sIskliQ^;ij6)0
z6MQ~U@8m%#zT+R%Jl$A~#IJA5&+#Yf`O))@%)94{jr8JM_`DGPaXxsa{&j-SM;gt(
zTE0j<ZM1O`x*`JNbfekW2p^=hhrS;J)xsiij{57u&V$Xzh%i6aJm`S)C+l}j|F2L-
zTM~4*@}6eMN#2fll{5TptFL=&Z2*}#)~_)m&(um}Lfe*9PQ$tLy(52tdLsU1A9K8m
zw<kIwoXg&DBzj^WA<kQyVbMNYJMZ$RI~dh*27ie5yQy_2R8`mNsi<wLe0TD;7rl9z
z*5!AF$#HM&;ZXSm-rYCT*t??aZuP}J=%h-|o`ro|k>-ur5zbM2OQ<`?n-8%>?n;}S
zBcI*H_*KV_c}xHQ8;-`lLdGws9Dba4YwujtBzk1{*VjsyqO&edWGidU1^n0MTgBZR
zk}g-j+Ye~&*QvP+dqHEI=)guT>n?rxUseJ><ZyDd6>NYr$FN7m+1)KTIRUPQ<#QO{
ziMan_1B=c%__d6_+gwdE(-EGgGoxdunQ{XBbPJ;I+j(z_^T>I6Q8PO$?S#AQ%H?UA
z@z^rEDn+zeVy}*@@wM5d7pY|v-$$e7ANlt;B^yJ<sqADthAAI|+x_ABF1R?G9bu)`
za(En3+7ZXS2U%+cuU8tK+Hk%*DI5f64<%dT*wT*VbQ6Al^Ne?IcInHYMpO2Yapuoa
z^!RN^%ACscvgfdeOto9OMP9wx70GLpAE>s)$`64WJJW~xqLbT+`JuSdkKg!qMssD>
zCSPDQR+qcwu^)DWYkoNCoFLobL8RxKa=&~}{Q5)u;MwG)Dcs%}7c=pJTNu5z{3il$
z*-rE2`dKU6J`bPNVWRzyHZCG})EE2vUEwjWa5#L4cB({Ot-$p*qO5j7&1m?2sd}p5
z?mpgYEY9!{PrD~GKbHJQJojAfbr#)qxN&OX`+gPqzbrpV>%Wz^siz)_FUz;q%ORqp
zjwS_S4D!8D+Z8^?x;jrB%jV>KFLvjBjaPTik1{%kSI$9?^IlPeJU55?{mi7>`AMH@
zCQQ%j6i3hoek?!Hzj=0#FQDT%D_cAG8u*deFT(y=1HQ)2q&eK{W89h*p;ttF_mNhW
zf%M^d=<Urf<2>&*g3E{LRd;rk<;J&3Hn3RfdH(3daTUJx_kJI8INJTG!M7#FQU3C|
zD$XMPGT~}B;_{YgH|`VL#Ajb4u%tM#!p9}vYvIjZ)YqB^*>e48ibDs%_0V6AEDo*U
z_6fxj{1^nEPb9^g@vl4Dub;{<8u;9q-`xPxRxj<2o^8m>HO2l$w>|y2AXnqliP{Yh
zvIjliNcq)8?8ke+=flb07v+Dn6Z*OQcKG|HxbTTay-D`1_hKLYQ_;yR=*YigYu^g-
zg9rIuWp12>-o89N&d1kb#_}59JE-eQ?MDRnWl*srUS5V9aq@OKZg%zDwMH$@0k6cD
z*rDA(3v1`!*Wg=yBXNOeyP)L~xZJ~tT$_zW&oq0WczzBXAFpJb=Lf520ep-8ZU;|>
zm~R_Fw1G<YR!{6l_96}4$io6>Iz!NXBovIiqx&iE<mr90E423~+`gD}xAJ?~8^+RS
zk0;Mx@mx4xpXev{3k$V<m47v=^zLk}cZBoZ;O#0q`~A>di~saxTJMZnH{h}s-{y%;
zJ@>f!!$NW;^f;cD{2*yxsQd#@w#J!<V8Nj@4EGkA|Bcw6SE9Z<99~Y|@1#$MZz0P%
z9FBGQHRX}@qY3@*Pv=2R)4$Qv3cjCcP1=*4cn?(ea^84{+IT#r`{9r*gDLETL-;*k
ztK6AlI?vkHj~#gr(%l?BUxEHM?AI6K{a5aJ-PTil59yO}-^70G0|mY0WcsdKz~S?b
z=I>l_Cc8t?vOFd2t>?}G;QCN&$QiyzJlBQrv$>ug1$X-zkL$D-`PWWKr>eD$^TF_r
zAHbdw`bBGc_XSp$7VH@RGHYtX>CM#A$@kOMU*G2$m3j0i-0y|H-{{#N^MrXE9i5Ed
zjpz;=+4t!N>j3*wWnFM@ko9LV`D&J5N6VSaPQ4k;;aUD6JCo2J><(v>(MjTQLPBnH
zYr|o7O#hS{<xBC+&7#_$YUk1@Ka)(ku@-){3j~Lsp`R_yAE9kM$gko+TrSIj`HiS;
zA{*8T<SXM>u@gQGmCfZ^bE65|KHjLcfyc+|XWZ`{_|rs|mB59ro<CXds`>qF^)!Re
zSKwpg%JXr5>!I2W{69aBTf#<*2S0^I;v6DBUp);g_6%P4aQoPbMe+jmNBrrf@Vqg4
zFVt=qbY7QVt{#>p_!}N(kK~8KyVl}bR`}bSJw2ke8na)VqP!VBIP|A!%C~2m<GX4-
z>WY6K=%<rt_KoAn?E&!j0Hw9$-MAdx-Q7dD!YthkKJSPM=bgpvMwgAwecT<jRKGaT
z{N<-#+)|;xeQ~|nBm;bL3_0rypKmhmk$>ZI?RTO_pQ+Y}_r6-c8aqF{#M8a`@13ib
zj33_BxX>IPUyIM}*`Lpc8-sk0`oq8ZHuBVtz2kh`Ys2n*bA_MVdOlETm*RN8|3!a3
zU9a2Ir;kxj>+Iy>NUN{>GH^HUc{$1Z;W4wKI0QdiuxtFw+nvP?Zbsw!HQNVn&r<)@
zMd)uGd=54{MyKQTW4iV5F?xSwVZOQ;Z~TU*ldMDY?7*HcPSvYc*?;NjZRvC`7Q52k
zx@N1;QCB-ZC~pX<J}J&2Kbi9V@Sy{a?_``JGyJu*haN`wNxf~aFGqTBFd4iYy<yRL
zQmZ}Hc#3~_#o5dK+Fs3%Rr)o+D0WAAXE@2*9OteyP6OfZ-sb0S`g$+ztd==(i~hDJ
z5zFA$3hz7v|9{~Vc_nK4=+l1QKgO7SWZrxz=IV6wb~OF?rb_*riYOUCf{*Z7<&FJV
zB05%Dxx=XcQ68N=jkZ=$aBuG&1c_G~{r8*!-B&3;)T+kXrnTd}_g8tjdSgEl@iLd-
z=l$Mj?_}n7*0r$L9cL6SWUUq7jrMi&i(aIB5S^k52YROmtS{U0NO;HZ?!vXI%)y~g
z<%_;X|Nbd|uo5(foBzRs18L_!!t)8f|ASA>)c=Xm+ne5AKON=$N$k(pp)=yW7eMY)
ztiYiMJ9$qx8yCpu_o3c2^1cm)&yOk>aRQ&Gd1G!m-stw^%kiKdwGqE~hQE#Z=l%<q
zy0ar+=^GqLCu@f#oR7$05+&~2x4_-=%(KSq)i*=OvfP`#-4@5sPNymFAZFu!<FkhU
z^ASOfbH6jJ@m2ioZdZRz@q)4cY7KWUHUFBzdzoFedk+pq{8#AF5ug1)y0|iL9%Ofp
z{p>yJABnSHYhU&dxOyk~Y_9&0?=I{S53xeFW|uw-6%*;*2a}c2S+D#l(GeZlM<!Z<
zzo)lvW6!g>d_*g)O5?<n91nM%bbhy({xJsbEks@1s`HRl{iv*SK3Q$si2IBE$hNZ4
z-e>&XXqn%Rk00>EIK^Ay)$)hTpO)+!*BXzy?9GRehAG*c{9)gk$v$|!dONavT#Re@
zoZnNSe@plt9^5;?+0IIq=66==**o9a_eF5}6x^%D|N3s_RqVrOs6Voc-D(fn8b05z
z{zh>79KQ~xhu@*L_2otA41B&Y5B%uIetbKsKZMVB;%4LQE;!l{r|(2hoSWZ~-{R>i
zcIb%c;+dFVquyHlLC)7o5BBSQ;O$1)cYYtQ=WpZ}82A4CH6A0c&E*wcK?>KFzo0YD
zxz_vaP0ni2H^-{6M)5FuoN*TqYOfnSKZ`unVdr>+bdk{F38Od^USDdxYYv|uRLXO+
zcv^XT`tyxylfk5T6x}k46fYQOC!NZB8Trx0J!)?Ok6-Y%{djS!+QJ|DDSunAGryv|
zv-s}IX_<B9rHk>Z68C+aPnlPWQ}ntCe7)QA(5(pluA4k{r&t}^vMc}F?>*U{&&K81
z@c1adPh`KiTkT!>)h#kwU(>IT!j&oFQ?5Y|`)4u5tecls>Q_(oJ#HO}^SLXEO-R$Q
zbWL%RZ#CrOxKe$s*)h(k^zVJ^Md&x*T6bc;tU`zAR^Kl*i(0ssFxIKae0_=Wj7&tg
zz|p~E>s9Z!_UtwO?~V3b)zj5G&*^_J6kUt!;bC)wZ((D-)98+aSQi=3p8kKDoHt^-
zxdAQ?^6!ht_cmx=ZJxcwkM3Nh6Up9b@as4={iGk?(U{IOZj<S#C;9apy!wGI{f_dp
zluuzvyaF9Z`2Qz(eivWRvz1SSd6(<|f!4O?pySK(+CAwz|03%}<eff^MqHm=^E9^A
z;Lj0So5ucdDbx*rzpF%?MLg1uw8^t+rO(0hi2uEhW*p<aJ+IO?Xl-x7*50f@&(g)J
z^!r#g(YHitzvU!xGCDt~l=rjxv|@i4o8HN8CGS6qTe)4|d$I5>)W%D8HdpDlj5zR$
z^)35c&koJ*X7^Z${@(QHkgF~{2#%%AZ3%ZT)tliy@5R+_?AO<uKRh(yYbP^iD0~Zl
ziHYeJ{cfiRmm0^O>=?J`cT;ihxA<EH=bxkP)DzeJKm`w%rrRp=J|#VAe10u=Oixts
zb49w#_%@JN=?(bzTX~4*!&CG06nNui+VrZGV~zY9_mH|e?7h*Cuy;I7x_)vC{5|Sn
z&q>etRlYFgi*Ww-^sJ{Ez}4q`)^L9KC^`!`{i5gV!|i8`Z8iRAp%0C8>srWRGkW;V
z_}h`6-Bn~Y{N!Htd{cfJ*W%V+;#O|=>lpDdr~912zwSaK%<nKi##2k2CtgG=yg+>4
zgK*8g#d+AZcHy5FmMFTr{d?T9)<56Y+jHUb^Y}{Q=+AKv{ZMv?@M#~CKY{v|{NG;G
zn^om(dT4#m&B&kmzw%d%{MvH6{08XOAAY}JT-u2H+utnQS$3>@Js<k@lgjxQ<p&tm
z?VK}S&BoZ2U&Z78c3?k#O8s5gNkX1muy?$ze)pn#KXChS@Auc!i}IJ0H-*plDIX-y
z#`9|bv%Eh4&;Q}`9qQ=?rys&6e*VhCPJ4&-q7%NnoxcWGKb9ZqVWsWG1CE4BvK8d-
zSlwPO-_P&T|L*)djxmRKmWShB`pFmNPu7dDPyUXs?n+kKSBxbg8}g?s@>C68hSf4F
zt|PZ%KH+lg<En}$q03TslKb`R=dwE-Zi5dUiU)9UF1`9`z5bqG-o47Zi_?C}EEo>A
z??oNoW6!lzvaI+BAKhO{PmgoYg~em~$v+V;H%8YaxIA5pd#OLxseRyF*o~IKcTuSB
z8+^3Fhr^2d^ds`t-2wAth^ydvEBWj8!Cz}V{0-lT>+K5~qF_aF0;~Gv@cLh*dM5w6
z7|)jc@pdw=Tc$^f3ytGQ_oBR@UgzWV;<?#}@;)C(-x$lE?rEdgKKn{LZb5>-x8vG2
zK3`OSEB<wNscoox2VYljJvjZMzmYNGIlsq^LeJsqc(lImdmI11sGZhk+)GM3(W9Ru
z2jNTloc^{_#~b+E8OL5z-VyJgQGfVzd}BOo7_-Nf_ObRpuD?^sNJQb)K=T8rnB?Dg
zLfyUb>`U_|Zq9y?lua;KuJ->*Nc5%s`iDF;A5eFkD&C=<Q`GyL`Y(ao4=SI?igB}E
zpQ+yOw05=FmuKPfXgK^Yzh4EXpVhmU<p%lGD7AOebpYIa!76d5?;peGchTQT`)kUF
zWovs3y%C%KNcNU~wlO!al56gr^seV*gORi1l?wm&qN}g6n|3dXcD^Y83;*NZ-@8cn
zW$aY{%^o%1fA`EQ=3{%UpJJW8Mz8MDcX6(sk9Ga`OeSjo*Pz!1E;LHP|4wlDX8oDy
zgyACX&ZR%Usl1(Yyx077Q!%}_r}EM1J!8>~ed2AuyW!CHt&H2;^r5GmcUwUMpZoeg
z8y!p1C*+IXp*-}L8R?_oDSZCe^9|T<KU7<N_5aLf^m}<kdfzPIZ|k`xBxbAh6`cRM
zJk9sO=Tr5xneu~uj<<eYq&)KF-j=@gdo}p{0V$N17^eca?@z19oPD_R+2+gr>WlIR
zefFdcJfgg-5qVX8agK1G^<gS$Sd$*{>sWS<_sxSQ?9uo8%$@&TWxRIw`&-&=DgWqA
z`ukbgG{4H~wTa*FS4&TR*k42CwPoY{Xe;F|{O;}`t@ICeqxrOgCi%JA8Q`{?hf&`~
zexw)4dQ*Dyo!*w0Dt{T3Jk9g3w9<h6<T3qjBp=eVer?C@@g}bS!$0H}bg`Vn_ckPU
zVSX~{T>_7v@mzDCFZk^1^HS^CH2xWn;%`m&cgJ|}=gfbq%%f%bd*pbX`>j4y-WooC
zqI?)V`6X0MfXiRuPjh}6A1m)GAJXe)B|qJK6*`;nw_fVko$1Bzkfx^mvcJ$yQ~n>n
z@TC2K-`w9&W_9_BcIs;Psk{bxeUIJZNAmEi`v>1L>RtF{42g9*TWuZafX>zVyL#;e
zF8|!@@9F!UW?RY6`4jRVdiAZiILCS5<M=0+1)K@~G=;BY$ZJ+S=l8Ypi$3q^j?N1o
zGVjOo_jrjk%B)?yuOA|;;d<CBhSHNesbN_0f$}cm{$3?lnfyo(!r`IvWqeeTlRe;K
z<RKniylK?gLyI@?p%L5;{i_9i`em&pcIP+rw~=$fmn-ibuAP2*dUWxY=bO6!^|lH<
zm%_{NY`?B}yMhDf`X2GP_Y~KY*IDi%e3uk7&n~5J&&VFm?kg@p@A&MlO1)KZ`fAi2
z=)Ue1S{<7`gYK|<d|Le1D9PyMIk!5%=T+(#WmkOY*AejfbK_J6pMUk))(l#Ws!n3j
zJ~3aW>iJKoWk1T+;9mzC#E*dzM&Mig>F$>wm4`?B9~FG==K0u1_k)k&U)DBTPb%YP
zjxW94lg9I<zr)Ox(yu+}(4WJz@p}3uDO>9Qzx>+L4EbDr6O8QRr1m0Z-O@t-S@W~c
z@M{uH;}x^%1nAc_^~er|&r#0=TJlR;KU+;3w0aJF{@nPFX1%yYeRsm+uhI2nc{2Kf
zp1sVKU1_Noz}wb(eRcMQ*6oY*_}T0jR)NoP<E!#<d*#?Sg_qOsynWwhZ)pOTp0oZ0
zZXAK8d!Xn$*0Jyp{3!d%^Hu&n!D4hs_A$MDl=-?!N%%kiL%-gH#`Y6?%5dEIM>~P%
zwbG5`)(I@TbAN9%fKy?w=$T&e*Ul<kt^XVQ`~oFTR(y6w@d*3>j%u8e{!o9|FaFYg
zT@-x-51rioU-{N)t?}VW?E6mozc{V)RN(eM-l~!><ZrdCmzQHL`D`esMV^|dX-fJN
zpV*N+AAU`zq-v?LmbOkmlG+Y%`ETW7d6hTyc?>)rujC@{{STk(h%jI0{=)Cf${Os*
zzbmiH-+4V5%f<P9qu<GS;ZGGj37ig(`^VK2=gd#Q^TtNv5$i=q_5KQ%L!ViN?rP@6
zbLl18N>}H3u`b#Hs6YJqKcRQWezHIN>=4pYJ^xPqE$GRg(mytot?b_y;b-0aTjf^Z
zd^Nsz$y()?kpEt8H~R{m?&-*XHV<=gcn_0WnV`L2$H+{{ztlS4|NITyk^2em#Xe;|
ze2jT9A3foB;<f_Mb&^H*9`ZPeefncmw4@h5ue>)q$6e%LOZJNYYNuf5_*iWn>Crdh
z!)*Q}Z<3yx{GoqFWlMICnkn?F^U)jk{+t8HBmUxQc-mUK*T79t^YD2MYUDZ3kM%iS
zeBj$gnH5rboP+;6Z>PUsIv@N;J0gYPe{=OdlP|)p8~MlmW!~g)I&h*hKaTK*f1IBD
zC^~xj{6@PS=-*eO{&4vGz4i;Z`@VL%!tEDvaU6a6GxD0~&7XO%6}!hL>UAqA937_q
z7R5Kp<>Y}+ZOK<R_}g1QhZVn|$Gu2!u>sBwDPHq@2lk9#^rEBtPQLP<`xA<9t+Tz@
zI|6@N!{IN{<s`lM)OtG-j;}Ia4U~VaT;@nPJ_<eiDi3`?z75jI?-d=B$p4Y>F!20z
z_&u0}Ukc|N!r|+R-@V;NzPlHp=UjH<H)wMG#S8wSTpT6b9$_E$Dt*6^k-kU$ao+b7
zeYuBvUMoKKtx<M=@neO~FN(iVD-yi;0Ud+jaJ3Zn`|pYxDei0j34TX@+0{kER10S$
z@G<X((^UUP*C5}krO>PCWGT+~zAdY#87M96xZ`|~PLLvwf4%H3;aP2uq27r1?v!Gk
z#-kLm)#0HK_&yrq^f6QDM5#Z1%=diY&=mHI2{;_tW(JZIUIQucb}nR_OxGMjgIma7
zDV`5Kc`GtClD%RV&)3T?Vy~{1LJ!{y&h@5q@2ex##fR<2!=b0QPtC5>n>DQHf1&wR
z=YxNurWY%40p(BN4e*22=xsUK%E~<2!sy%!jh@VY#`Vv|W!Ff7&pVkb5A%n4J^RJ`
zt66^{<3@9qneA!k>uDU{>N7unZ3RC64uNNp_jl;ub4d7CN@D*~2R;t>yZgM=*jFw8
zC~vLx5A=(#1HCtJsc|YR^4tk-b->ql^yVQ(rb|lbX$qes@~{Igd=**;dvtaDXb4Yh
z!&NuSrC)KYt8>A+mGZ4qNAI!ssdqNI7N`2YH|B5GA;oR%^U?<MR8ELghnzGQBi<#2
z-n%s%osQna;9(<tI2$fkd2$DMJVVLK^aDAH&nxwA8+vkS^lGy|fA4R@>~cGi$PavO
zT4T2TDKEz=<qg=GtEbRUuhXB7=EXzsIPAA~s&`n{A0F2yceUYj=#8D=`dVvur|bvy
z*D=Gtunx%{=N`jvp~I$d_%qf9`$GM#LlXXzzIIkIl3n-;&(D?Lu0FoBvn0G{W!}au
zI}d7&?{VG;AM7CD%Mf@yf<9c^dwo4$-FQxbU*YdFHh&)~v|;D2ooegvB>2_Bc#VV0
z5g|7QZpQv%oBR*$@vw*c5wW#Uy<xA|4xUC_$}G?KC4YOs)sU0z;c;&q-qG{*tq6O;
z&(M=k&G+>5cG=PSI_<0~k5Ny=$DN;#Gw(W!YiW&dZ<c4}|HH8x;CE{^Uv1s!u7>OK
z-;82=c8&Kd<qzZ^RMzubtQ$Sdy2tW!ecvv7sj@DW<<smaT`D;KG&|5n?9xAz!z2&W
zU;5ul-u_p}nLOLxk4zmu<iA(s^Hb~9B=!AN!S|l--;VN@&KJKk@0>uw<xGpsi#qsk
zzg>J!F2{*y{M7uGW2~qTcbvEs)%B}7zmBifUrk&~EqKyie98u+P@5m!I^3?2O)tud
z9PVD!_8v<{Q7?rD#(d>b{~p?j$ml^u4mSp~YcwJc_2F|p&$|y&d-LoI_AMI2J@Jz2
ziTrlQ6)BA}uMbxqcz!mV?PmqQ!aA^42AjKzCf;u;f9Q+&G7dg}Mtkcl|4|e2(-t1z
zNjkR`U-CVy6sM`Zu!B9pj?#m~eTx3y<Xw3P{)S!XJ?lajDEPj9XZrVpbs^@%>f&`&
zPKDRi$$t(08!gSl7NJojS+7MJ)l5A|Vp%pvXA3CSAPuo{eqOdnW7tvUBEZLvIP)hd
z<~M2&KaE^<qM1d!QWJXaQaWNYc(}R0BOprfxea^tPTA|(Aob1AF6{7pFJt>XyjTe_
zx~3`Yi1Xooe=~e0+?<=;3bzOH{CX7*_eRGwJza-8x5DXIH=g3--6MrwvMU@Nl3k9X
z+WPw+IW60#hzvO=3;Xz&*@-N@1C)zDNiB?Lc!%sGdSfM|oQKobu^ok-IZnG*x&`WS
zh#C2WpHg4wx5wk<IqG;vzxu$LH(4NLzhymH0{b)Q^O2`|WuL(CIL{JsuNQlwb}cl?
zd~a@C+IX^E>TBKFVEr3L9{*7u_LG0`Wju+k1t;AS<eacMd5(Jt+onoJf%KbtI$HhO
zso#D(wa|}NzE@9UjZ;76fs?c0X)W#dg?r<aZ<LxUZwQY!kb>6i)U~uDmq7~pWsruu
z^^MOAxEv7@dw4$fhX<%9@)>O7`$nESCN&@*Rq}oO<r}}G)Y>?9;P)2t?A|4MZ5unh
zL(n~mbo~G2?~%rh_<J;jCvU*#|2==B*y)I4TWLKRNpj`osMK>;YM;8B0rz`8@>4#Y
z>RDf!u<+-}-3-}Bsaskn2h7|05&OOy>E9>&uV!9HKi%{K-y>tn!|BWHOk+Qoo>$Gx
z&hl>B@5SD{&RN7!@=tt<OVimqx~FzV^bhSXBynTdW7qL-?41v)@N-(;)jXC_3hwn(
zulrz?yWco(YCI#qMPXhybgtME{o)|tbl}-wxF3EyOY>^Rs~LP=uRR_{o|lbHKG3=q
zIayxOdwZ~R#C&SX&#p~nTu#YP*UFZ%pod&<fX^-Pp%MSLhy9%<ZY48M>ac?}Q%_@d
zje7VwLtINsa{jk?#DCyglkC1c<gN>S`mZeVXx^Kb{x)J~{uw=W`FDI}j&*1MsIDG)
zXRQY_%+61&2N5OwUEW@KQ+^)x%+Gn^30ISz2HB73jD6SVc>^Aeli~Pkb6Cc&|D*pW
z{U2doHZK0uPPeR4(Z%zTKeSewj_)m$w>K}^79GsH4)SW`aK(L(#Tv7=9{ss>sxlup
zFH8nD#EJCdF8VpEXiE;=8UjZ{zZhCHAa5=CYqY79?@<i$DPNlMi2U2f_rTk|ir$_V
zrJ?<9#^Zouh;gmwT<=2-G-iL!$y+VwgOQcCir)O4d6DSFpTXxzanH7S(MeqP@A%Rj
z?*6TuJqW&sU(7x15goPnq56N37viy^E?jNwoY2kC<j?)DDK{<;6*bl0Lj3k`dM9?j
z_(!WEm-ILA{Fh?7cF)hsq8ol?@+Ea7=S}RPqF)C&sjf1<k#nIdIg1Rab@Bf~$lQ$_
z|5xU*?&@hnGJc^KKBxx`>Ccg)cR)qXYq2w|pexq$`}PoFuoJ8}7pR{-l$`|gdcfUH
z$i);{WFoF`bQV!|jkH<Pf_|ev563@w$=GG*(P3r&pym9?$9q)j0#kV%na_{=E%fl0
z<z5<|B2V9*G)cP+dUVUwiyioAXQpn4GDmMBmw%hVb$P_TCuiCqd~OSC=bG)Ec?Eo_
zeRfdzAN#cxC_36RkD#-^5qQdpT$~GBtA*X!Kt7`>hu<kJqQZxJo$BqR&zXIA#JPQF
zJqbL2mKMGfzSV&<k#XxQ>qr#|`%1lA*e}?%=ucymbvJ)H;8+WM9<TgwylZCl>7(5$
z_Kg94-yrWs{WKK5bV~gzcr`k8HXobm)yOmpKl`Uv+LfD+J{;rRJ&jJI$;0&2Og-V(
z9Qt$kdklxeVgOU8N<F*6yL#kwn$Pef-6i!*alXHI>Z-oR^4twTmwYDZiTQqZs!9>D
zc!=^g^w-PNDDuhg3x4wA<9|0nKdZy%(Uo~|0i5UUl--)<qT^xs{J-+c^{2mieLI|Q
zNS}Eq4XxneUE1wMZlA=r@DqC`4XM=sXliNx?BITswlX=bE#FD~(9@d!Tq~WLUFx2}
zUMVt0tb*^d>A~^F=>XQJrg;<kMq~Ent|_vdzXQK7b~4Zs?hPQF_26&p2i94KHp_<7
z!`B(@!R*g5j&1qdP1ODe?(MFg#v7*r`H%{qLqCpnXG~sI8Hag!zY1>epuSiqM&w=6
zM9(jQ=fSrf^A7)4UlsZ7495plaBqM5ZzFVV3+J}L|9#XSc)lXP-l#5e4%pndC$~}b
zsXTu!dvINHa!TF~KQ@#XT6Y3(@AEzK`yTH59QE9#{lKOB&=>RX!o00<uLhsHn(y_p
z|5zvD{PC8&a~gwh&*!}>&%bZoh$x4*tt&l^+?VKy`rkwU0JHH&>r9pU*5sqeQ`78q
z^^FPsRrr?CpCcP?3wifOr@=V1K|LeYS0Ap%{-qtf>4mN~aBhh4Yg4qV=tbk`y^(oo
zC|v7`{z33G^voF*{Wh2HIp}HypQHXN_}nBF<al0DW&TGT_(1)yV)tl--)-5M>)}@q
z-v{V_+ln0{>;y+^H~b4vDY~n#h5NY^e$-T6FAamcXKPP(C+B@N(A$h(T}{vR;-|4z
z?S0^NBlEe1y!wq&WRkzNqCa<~N4JB2ZRovC(;qV8K7pRV&xf@m!!bOLOb`F?yBk=U
zpI;SWx10gDo2SX<)2GEy?X-i(OO5Y|@{IOL?eX{5qQ7+{?zdV47Y-C@*vYuW&Ss+e
ztIUwc$?Lnmk5&JFA~L(uZZ-J36&;#yp85NmSTmX$`r`a>B}}eQ6I^azHaMH2pRrTD
zA0~#)IHPy1l|yi{`4UKcH5{9o`mhCW!@jY|zvKBS*^}z;od(ih7sKZrSxCdrQ06@H
z@P!>fS9tQ0ywDTOk1lX|dK!{F!|u`9Jo>NP5yMiPR-T+)k&UCp%9(2&`Bs$8aP9V@
z_a0)Ms0p8c(@tGh>aoVXz4Bw6!&XbT)9YD*X~C0>dpj1ne?#cUSj$&u!|>%L?Z)$u
z_-}hX{sz(x@ckVpcQL-t@TA*8ZLYw8Tisc|htT)wZ%zoO)6yHJ%s6+j`V7a{ZYcac
z3;$yGa?{ozN*ap~ZLCV85?&jXzIfCP?oO!4;n*}Cznjv!r^4s9R)wLJ=clKcey5Rm
zZYKGh0*}L9y=9th+~_Bs-`ITFB8~7|i)?wCMPA$>ktWeM#IeEMcz^%2DS1goV;bD*
z4xjs0o<AE7cCfpdo8}wGmh9BMJ=cm}-Bo|)vrkXdpU|t9k^8{y_T+X8dhm$!v3s$v
zNZTuKz&_nBMNH`f)|;F?|6%ooo_3G=`ekFp4YpKo3-2vZ-W;y>Am?qJ`!&YjQT#e)
zqq`M5b8R?2hThy%J0s;s8jUZV#CZq)&z5(&8TqW?KEhs}@4|1dW!m44CvZ6;XBxrD
zu&=g(Ps7yTO?l+s?~+fdjPDfq)WW{b?WKNqGM|sHlrPMCrii;8o42B8G~~}QL;cOz
zk!Pgv10S!x(Pqbzd=q#PaVDEo#%qf9HYQKYd=Gp(K7R<h><XU;;dA)c{crs}HV=P?
z-JBOrfNvj{r_p0;kkb>`w;R!`PtW`4?`rp_M8?_r@OUJgYtJ4s(R^noLPy~0Q*bow
zMwi3OjeOpdZ=}C*e@X|RZsg2I*ss)Kcb-t8=d-*QoUR9-`zm*irN2SvkJ=0S?x))8
zL;ioxhmwb8^yz-&x{kaV&AeALTbmE}Tub-CjwA;k%Y)Q7g;lc=eKqvt-o<2}FO@Bd
z7Wh+*ePgiio!B#az#}(?7oFAL15Qt@;J^g<6mc4}i!SC-d-n=X)^3&az;62eYq<%$
z8?61gMT}Q-_Xv)L+n<*^6+PitBk|pnE4XnAdN$a1?L$u;tv?48firFRJxAsQc^K6f
z_l2FRz82=$N_uzr_n)ht$TW2i`U2N)EJmrX9>0wd@J0MiMPKJPT1@b}n`F^nm@oGg
z)9|6AIKo+pe(2tmq2y$P`&O#R(Sq!=Vl-|%&yQoO@9t}}4zz@ZzZW~=;$i$aHZkt&
z<mc~>uk3Qp`)XvPl?=s)-o7ucjB8HsZ3g4qtw=uJ05=ONddqq-0taN5plM#(+__s9
z9<-qcmJs%Q*2bP$zqGSF2ZOTBDtw4c`{$UgZ2T!Q*G$hARro)Itzyq?XJgVX?Wpxl
zv-$8dV&5N!Z;SB0S=tmnErpZa%|qG5p}^2Ymq`=Y-*$swk$-7-CuL#F-ie+%SswL0
z*;R((!wKl0No&18JwuJ>$u!bF5OO6y=t*qWE7?01;NO+jl+NnE)cUd&Y`%j&8=2M~
zgKTlGdc9|N;-m38{O@fQdXWcl__N&w-*?5I|Ejl_`SrfY>d?bqgv^WW7=EJhb~6LN
zwtj`((5Ye?$NpKD9@rLF<!r=nnQBE0hFzyEeeyScm{o8#)~T-a<2G=pDs8MD`AyRJ
z3Lf@`-!V?z;B?@BZ+ILyJU#73er4)W-X9+)r_DXzQZJTQaBggx9K7>=sc~$Q?Vk3~
zj+Go94<`Zpr_IdAus3gKY$2|8$HU1Z;bi#zo(tbQ!G}%KR`7)XC|nI3JvYrr9qmXC
zhxhHw{}WU2C+s1UJinO!JSVL$e{=uf5_HHg0JnS4A0CD4jm^LN(G&RmavJPC8Hwrl
zZL(p$&-cA$_O1Rkp@%$=9~1HGUAP{n9cwD(^?aXcel-&hIM{r)?xnfjlkd{HFkHQV
z(8HsjwdlPO2i}0*7?}*4=2PJmPxgF{=YQkx(Z@b3htHGI)0Y3muBo~_HQnLoM0heR
zkM)fI3p~us|4H=JF6Q_2d|`#ZJ1URg$HKu$X4<y-0`se(`0crVuVM$8LBGxUN6)R`
z=aKoZMr%9uO{(DMsotxcTjgQr+R}O9$TY~j{(sAl&THVyrf_`>K0oz;<#nC+&44Q%
zvom}S#sAy#CF-f4y^_yP)wFkYKF$0fulcqWKEJ6v{GM;fH&<TYeJ~R$a`j1tu6p8G
zwpIQQ|GF_1{;$e6gI7GA)IS_v{FZM)ej2koFEC%-9|7mPz^x6+11B57r<Uk$4F7tP
z!=^=)x0V+#aPRxF6P$~@`YnphmA4gV(%1aRodb^6-wyD4lJS*+nI7E`edCK^+7+=5
zx8mG$8}{qCzjAJ|X$n95jXfWF+8*$+l;?3-F`NAVRBoxA4%s03@>KX$lU_a9I7S|h
zklR-L>89dmC-<aG^Sn$9MaWGH=YUhS-`V#~(cdP!L;GRZy{?#n{$}*)Ey-_f_&g1L
zZTQj6_gqtY^X71BO?i7U51p;x^IYRmlbvH5>(8s@z3Pd0kk5*+mOn3_=)rK~o$_Pr
zRQS#QU2F_>_GORWTYtYPf46S94N2L7R-sGa>w%uHX2uM$K2^~ejy4K+k+{8#>pSev
z^T~4?oL@u(JyjnTq@_+3$&7Y~D&Nem{=YDAeA->c#2(o!^oI9CXr|{{i-x8xt@}fG
zgblZU8^SKVJlikHiIjGO!?JA|w;5KFS4iznc-9-9ui=j|%(%@{(_Psz=XawamAtBO
zy9xbhAK3XV++N2*Gtj*FkK8^r$ooNd3NO*cAL7|D+I;-6JO=KLQS)_t>$;=q8gz@N
z=U>+w?#^`9_l{lq8}#YLsh|4qhTCy3@~!Ob3!wJ1PRv@dpF9QmI%XqSc$(<XWvtJ$
zjbFyTv8{3Gl)cRYJTu#$wmlav)Sx$S<&z%_eR(*&t}a}uZ-oAU=W&uQ$GrCA9*?o<
z2k{qe;A7xY=d^`(I>YBJ{5}XC??fJ&!|lz<orp|$8vDCmX=k|5)UL;gpwCJ0IrR9^
z^wrpREU_L$Jo!v`H^BSz(zca#aKAKMeSNcK^wF@Z?UN4hU0jcPV}Gz6-0Oy}BhtaX
zw=se{qGu7i^se-6=ili-{c0y4(r)B$A^mwf_*j=@?5(^dd|r;et>E!4@a+5YChrIR
z_p^t`J*0P97n<2mJz3G$ThWsT!4uXn^mH;GUjD!J<pXqgK-c4G8JTDbj~7&M`0KPa
zy1Vk1USgg$a(=j_`rIUH{b-E7HEA2=8|0bZ9DhWx^N(vpj+*6TjjxkzdThkc^~&qh
zd+LiDoSGt6ZAW+=c)DqRlG}H#E4$G@w=_cr=37<9ZE?OG{QIUHL%$rT|FiPN@T!^f
zzWLg31;=-$Cp2PrUTnU^J%gLTEAc}4LH2QL%3bnh-mk~LJP`f!<>%N#f7&@0+*^5F
zdh|fJF%J$Og^wS?;s2E%l@Ig$#{4*r@!W;~U-{*E+zil*pY$T~{YQDD=R;3^2_3bp
zYb((k=Lq-YyH#-Y741dd`<wEeJl}@hd71B3;stlr9zTP8Px6~P7Yw<L^T2)4--?}M
zH+a(``$c<|bMSmQylTJ?dVz82$iBQa{_qrHZ;d$gTE!gvtS4Xf67@sSivQXKdh#su
zD$WPDB!|8D$=wfg<~t`0oy@(w^w=Kk;F}fadFlxH?Y1}13-@D9tjw2fibs5NVo>ao
zhS;Z1FQzE(CqD2qRF8zyPQ>YB^NTTXa{|4%2f1qJ{=uEoIJmtWZiij?bo#NVBXNX_
z)Kit6f!<EpP;n{a^|uQ<$Ck#WyL?7_=ubm&+|{+yN&bxODtx@T*sUTbw-v`_ePHEh
z#gX{(NqM*5n~}>$i-_MlkzTy7_dkH!vCi+n-aJfgcf;p@;isKtv6cDN#dCqa4`l0#
zvyJnra)W&b>n0wogz_iD=i~4t^6JMvEpGBU&+HH1?;~L7>oTD13%8ql{${f@_8eQ&
zH@=toa%=n^<s|+*HQkn+1(WCRoN#UgSLf*Y&9wHb@oe+7t@F6SkYgl0a<CJ%Wl*6x
zJlst^)9uIuhvg)r<?M~O)8O;h6}yM5xRCPEY<nm$RK0WQ&wHaYa+&T(gPqS_y*qt<
zti0gIXg~6toW|}j3L5h|CO@B-$5wEDQ@Hw~*|wDvq$O#D@wyhiMx4ikY&pDB`D!eq
zKh~sWZ)!XSvtl$duBTh^cER@?N9KEe1ibD9-%r<zt>J;Z#PsSewEwTPGlc&0H(VB_
zsy7?M*QvZaCZ)({R3|Mqj@{t(6!JLVbCaxTqtrHs{i;9I-&s8)vkqxrax^e&n)bJD
zwzn#+AP>#p^uGEhqY3@Dmyzz1j;qXzjoCSBz^9RZ?`<^4sweX5@0iA<wenMs_k6^`
z&4j<KsW?C1^D?f}Un44XY1+@2_JX^M&8Ns;y3Dvc2~O_#@cSw7KD@yXO?#3bd4bdM
z>hB^S(rM~j!Y+L>oR)*s?_vL6nNC%{lwCS*q587C9ZrW`@#%_x$6RqI%k`rUKaD-{
z$$gseIrrWJaD13~{%kr(xt%h7Tb3O0g!{wQ+Uy&L!43IjQrIJVh+Bzzt#{}NoT*1I
zt;s%I<U1NCD`b9{`4N5^Z+M$$c)o*ndAj8X!!eN}`A5pfvl}l*x4R|bXz2Mf=#ypn
zSUFKWgJXB8JnV{#?f+}CmrctLOk*L%%zPK$BaZuk#EZ=L{m|K$UVMB-f8Gb~MLvuv
z`EeEd#&WnH_TeMoUt9BI&-@7E6Zt?dsNnXI`Ih)R7v7%eCV+Ktcx9#hD9?vI|4wwj
z>E4<DxBQZPsq$XpymzDztreegI6lZJ4Oa{DdKLOcz~OuHm}{Ng^Ln!W)y{6u_ktH4
z*^v+Rd<*(<;B!m<bVt&G8ptEM3;xQh2G7HP_<7}_Z~qAQ!#+@2e`3G(lX~L5j-S;Z
z`?zLBoa@R3RYd0U23hsO3A6ER>->!NAZ~Cw^wtx%vMu=-n2jxJ7>5z!yZ56<)pt%9
z`o{o%88zvTW5ll<V!u|6ePhQ;yIU0neoXS-f#_My&hfwXW&5I<aTp~Z(!S>V5AMC%
z94-xKfBqv2Z?gT0!_#p1vz#6s@pNYvzZkVS^ys~fZ|=G272LkKct`J+z~_C8TT^+F
zcGmw6aQb!9HJ3eOnfJS~cN|se@7?^E!@KysVxi|d(yy1QXB_?cY=4hpe?A(neNx_0
z3^op1(~l>S`rG+y{0mO}RK9N=>8wA06|#}i5`TuzgY<X3^2nT=u_x5zEmo5z6u2aB
zsQw2&Ux8bJ&zGs^IrzMSTt$xXo9U&o*WVwmd@0ZK(iD1c;Y99iyVJgKYBD*o7p~xQ
zYv*MLiB713Z`<L|1UNkyUxu<6EKzrJ9s<kY?SgENiX0E8k#1|3+ym}Sr?2h^N!pS3
zNzmVI<z#0WyT?23Mm@^>ZD+=v3RUOP*>^X-ACxDsjK+TIX#JWEx6j~9*N<KLwD3eg
z|GmxyYiEbBXB=aG{qB5lC;jfjmpS|ycAzOAOs=Za)<gdrM$7MNoOgoHCzv<<cWID)
z;Ov|9<o-OlKIEa$1Q*}6K6E8#KV%KOx14?BLUbqiy_x#PvPQ=(fQz&0a4YtMKd}VI
zdek&UT*)5n8>jf4#}B;hgTr0ZINxW}pU=W;D*(K*`^);J!_gtrujhx5|2}Xr@Nf<s
zu9e9<<9qDKC#4hcwmoVGr|@$c3EwBe?OAXxvUDHFzEJW@Ta=D9UQOZjuH<Bx`)%ub
zek6Q8E=|R+J>Y$myT{cSTldm2#<&Xi4u{8)k3Y^QclLRi&u_~!(q8y5oPPYdUhc`i
zd4GI}e51$cPmk;|eivc$x|?3!Uq2p6XM3-e{23#x7X#?ehbF6&bHY=MOHX#=cIN45
zesf2ovti`l$KSzlxw_}4^UFA#9@~=N-QoBucYOY{(U{Bbd=Po)#twZxTyUEjzl6Hk
zcJ$~Y%%2XvUkVo*WS#g?)Q9I2;9fO&-6=m0el=sa7=;h3#R=|bd|R<woKF7TpTr*`
z;sX}tXTVo?f97W@mzj=UZg(Rt`?wUD`?kn0HXaS>$ETrdJiqAE^{Wf}#x<UIZ&-e;
z@s!gozsUO7+xLHw&*t>!W7Rtseg_>5-12@Qd6utHIj>;$=Tp)3gZpRCz&H2e!N<nl
zI}gtG(C!V^k=WNfZM~>wK3@f|2L%7=%U$vD&B{7ckG<pYG%D*)e?9`Ad+~P;yGM8V
zMpvM-A^(h1EA9S(FA-Psit@;0`n~?fc>Pn+!`pc8So_w7@cAsyx8$D@c(^tHjTPp3
zJ$m(Sm3BK7@8X+0=gRxg=BkR&yJyMIe*-EnV*j|vzHDvTAO3}Z!zOUAKFer_;=Htw
zy=883EPgbPdtQy>?&4UEx3B#L4xeBg8`6(|Q_oiJT{$g{gNIwf#h@qroU7yKp4uH_
zJa@Nlgq`7>qM826#0$5>o^v$(5B>QndUaj=IJ!839$1SVdPn2Y+x@UTj9X`Mg&~L4
z`FSiNN2B0%oM$$sSMQg4q3lNX=*V08V(|oNI7dFZli*DFZ_F@X7l=dI#b~}PF6Auq
z(FwHv20r{zoSxOdhp&sf@ZU{h#U%3G+P}iyzPTO2+2&nyGy7QmZ34@G#-9_kcriKp
zihU#Wla443f5cBwdM3KQg3o)We&$J7U{7$$-zPf=-QUxncSX-UGdWL(s<V|DJq@0Z
z&vr1*i?TuN25!SO)+^Ey`?$>^L-0*(xby6t@oE?P#`ooRY~8VMolUb`2$g2Ds|>-P
z<9J?$-@<Niaw@dl3zCdier$FFd1~uiurK*I6F#44-K@zXe7*6C)nd8+$qPoqo3EEw
zRd6@%A>GONcXVobDgORi-U<I>eSZUz#XYFESxX|H;S2CT){C#`_jUdLgnFiX?+Z5e
z@I(I7xn>RH5V7AI;pX>t`r!v(JFWI@CtUqE`4PE6BMp09ixhs_a)70a@lgaIT#WNm
z-UZ}g9^77O{MxY6p9T*HWaCn6^>1p_ZXn;aSffrizq-Na3FsW*`MBX}Z8=eS#6>P<
zH?Ecq$ws6L$rU@2^`Slb58;oUk)uu0rR02Fxivft96k;X=jdOYuD~Dn^I9JwZ{hj;
zOJX0oD;(a;3U@*}lV07)&M0nLoR2G);zM)#^I7QcY$sJ3jZM6Fk^VO37ai^Pqc1l_
zr##!~eB&E=H!dXqz2wQ*Prqsz?UnrLnzB#F8a)<1?`d4*M^Cp}gMa2<certp+uwO)
z9rNj>bQ^ouf8CREkaoMn-5S=N1=*+R-`4$K-2-rFWj$)1*CppTlWs|)QPvX9gx^|^
zJkI-AAmL#*?Y4lgfqN73-^`bZ^x-qec^7_nhmlXaH~3!*-Y$Trkr!a2{A#~D38hyz
zur6&$5AK3Lo96#ke-r+6VaFNn9`EavTZ!|_jC%`l-Ph`eQ<wZs<J-b{;FXni;PCtg
z@--J8UxOcw<eR&(^8AtcRo-9d9Pm7RTw7kIo;a7hot|8md>oI?iN4<o@58=vUw(r2
zMGE*H`Il~|hlhXQ>-6AQZ|?AZ<Xe6|KU%%+fTM@kAxF=^*}?eu5jrA%WQ~17#2vn;
z{!!ZbHosMQD|zk?s`T$ue4M7eU-C=vzbikD?&NI}JN32X>Zh_gJv5@!dcwn+@VHTN
zjrsa#*^>RWx927lKbxI9!Sh?q`^fWrD!J{PZCpfr#zOC1Layq_t8uOVR>9|<<Y6)W
zc_sY(2R`3`zJ~lYLaw&pmlyK(Ww}*xrRST&=g#;zK3nej$>e7_y*tjcPG@JSPJRz4
z&M;3){?BpK#8mOzv(zW+W^oz+gq$6E&&s&p%HB~;|IS7K#&G^>^|ZjJdy25fM?Cl0
zDbD-urtgRS{MDjvs-mAfL{E-;5}z(Yeulu`S5SMQxRuM;yXx`F`=4C>B`$@HLVl#j
zQ}}!T!%kBJ-!pg>(b!+goDlI}NoKIXXIaXP=RCh(W_*7uYtV>deC681?>kU-4Y^;<
zzHw4ze8rZTKhxBG7J2+Zh8QRR^lCYSoSScDO}N$ke;7U=mWG(m!)SP$+v5-9X*J9}
z8(W#tv5%O_{xK_S@A(VV^@6+g?jrx<C?Lsf>%!*nyffTBDtl6SC-}TA`B<49K>zPa
zPL7oUKJ=wa(ceu3!wPsknEanZlZ$hNyU`;0z)Ak&@II_HH+bIuk7gMA`U~x_hp{g{
zz=JI0SROiZ8|Q@g8OMxO`)1{>!;?fk^XS*NqpJ};`N7J3`He<f&ko={8e_!Gt>Tdz
z=P5N)XMEl2|KaH_;I}H4|AC*7?(XhxBn2r26i`sW?hX_KyD+fD!1fy0g{|0EMNIUf
z*n)wHil~SR=l`CAzyG~2dA`p)dv<4MXFoe<c6OGxJjy%YgP))`WZzbX8=I6L0Viw1
zm8ic1T&;x9+P=qm`PS*J#=EsvJEXaJ-6VaOo{KY2@4<glUZ233ok{Ll^CIrOT2#*K
zne6G<|L>~YMDu$4^sci0*=a5MwijtXWPjGa3VPr<t3&zSSpD~tPiBYUL3HPi)5glr
zv~u1j<J}f;{GpP53;wH^XUC_vqstSa-B?fW;%}*H{Li<J#LX`Y)f4i3CGCdKiB|oP
zPsWapac{=|ysz?|Xxo$Uze2KHzXK;%r#I_I4dowABP0Dv{Rp}JV11}b?yvhkl79Z6
zzBrFi!@gQ2?;js2zlZsl!pVp$t5f=$?G&eX&=)6<vd2n#x2zBO8^LT5J8)CcGJB+~
zXHeG7?;^>wN6~+G-|KDoXy6=oPxTKMZ}(t1E+ewb%k^uItVfy|uaVgU#?j1yPm%BP
z5cO9?pXs}|c=xS7tDgGs_ZiRa=-%*G(q80nZ1!B)-&3+z;j=v+{MFIFbF=5kt(o(J
z7g~q=d+vIAElwtTy2PIctQ$3xuHvE>>X(}g)D!b`adr$jwQ#QR?sO<QJd{15-G=fW
z9R}a{Rjmt=XYwt$AMq6{`FE<2|4Y_`h>QEsdJ^l;kJ$<8Zx7dRC+EuS<-5!NR$y0#
z-)W`(j;FV4;eFerZ_<>VS(!g_1O9pSit71w))Obx@)l`A&$q}Qq@Njk`EKjGdqVTp
z<h@hmzeBI(9;?mzxlJx&pWW9nzXPt;=HCc;G+`efseC8r1aB+n*OdG@{j4W`V<0(?
zgzFEAFZ#~?-S_KnBX;m%`c<1h`(bv=&*C^PCCAR};Mvx3cZugu(v$Cti}RcGr@p*%
zXTpIF;zV9Zk2T+K&;LX3tH57xmT=|x{EhSw@B5qSmF*v%OP=s1UrZmC$}f5c+^?Lx
zm$%XG;qdxVIKNu{=kOJ@5uZ^<`Pt&{o@IxubAR^@+HH`mCx^=LVOjp7`LYFjdI3F)
z8)z1&FFe&xrIBfUlZe#H`V%v-Iz8NoK2pCbd&7)zZ{*Eal#reTuBKXCqP@uEu$B3|
z-bg-}_NNa6>;#2hi|38J)+F8p)U(<?;QhwEwKvK`=)u=Ccd2?l;b(r7{F^!vc^di?
zR*YA?cYNvo!Ntb0HDAaA>-|A2ugK)PT!#Bw=t&1VaW~NK8Oi?1SJv+(|DS%powJB1
zJk3mv_n@ocZs_4%EYmu?AJ-%wTQA)smb_f@`FL`KPdD}?Z&_b|aS!bCr9Q0SH_6!-
z56L_{g0z;f03+Ys7wq_I=JhjhHg3TA7*d4)?Kl3NSl8Zz=es01f5)5Zbr-MwtnhPw
zN-i_iQ`i9uoT;ClfZj#a;-V7AYNoHD*X7|qjJ|D|R8AkGhu*BxS4v#niXTIkBCFw3
zC0_MUqg{6~h|j3b-sq4%XP(OAn!Z`;+fHy1YT504o;)k^1igWu26la8A8hAjSo%bI
z1p0pJ3H>}J-GN?AU_U=z+7HL7C;Yne>=)Kh|50v8ovL4R)f4UBY(K7=RrN%CMrO>b
z)3?j=x7jbOuD|o);dt`7GkvQp|1$qg<z%RG5Ak<ZG%8EW@~_*slvTtkeLp!j<d0rv
z9@b1&r)Q`q&Yj%PZt39r`<}1Pex8lrG0A869BID)lrG1&j7;9+tC**s>F=KGv@Cn0
zoG*2<dFXA#QHMNv-m}-r@;yqsaj^V&kFYOzXtVdx#qaa8R!w>)4YK?2U&p@RvXVX?
zj#Y(2ZL@pW7cKZf-d1lle(8PiKgc~R4;c66@<T84y+(3Swztp0&b5Sn*xWwiTlf~|
zrJic$-L%Y(sPQ^J3;DZ0f}I+9FAhcD+UNOM$ftpQJ|}$O^c?gN__<oYVq6wwH<3?s
z_VZ%%FYZ}+y~LlVts9Y_`g-fYw$|&1;dZ?Dy~loz_r5!<JG+|auVycpM_u7@(0hu@
zyB{8A&Jn(B{cXp;e5U?(w%_<D`Dgsk|78z|?9aE;?@9b1i|NDq;t!vZx3iA@x*BHj
zhU`h>0@?Cc@#6$TJ_OF~%I;oFZca3p^bYc6?4-O%dv@>+iM##sw@Q0ynEI=$zYn}7
zZSfoLpf?f!9P7*0&b2(n{;cW#!MDrx>tOa~=+CJ9A$U_&KBN~*eLI<dxrTYPQ~nHk
zHRrisF6r0hrz_t!ISk%TDslKF<6BK${Rhao1-xIXzpZ?KrPPyq^F`WkCEml1wY<;^
z^ChKT-KV|qC*A0KN4WhcT#x+d@8(mqBM)x=rug9%#S-g7*hwq%Y1*CQoZ$oP5IfDD
zi}#&B^07XryFd0B^hWjzAEW0rarU^+!CL%?UHf-Y6rZRkGL_wH{0G_*{wR%`(5i|!
zjs00CP}An*DT6$`{73xryT-3}(#_26o}8oyuhXCP@cDi6t>+DQF5KM32z{bo;Te1z
zfAUY6p{zk8@<BSxe3>DFD(sWEnY24S-!b{yxU4IVh5xZnzYm-q1UKw}T8|SR<fX>F
zExY;ow43pogKl@<UB<6#FYch8O5cx34uj8b(Qt>+8|Ap)o$N-Q$B4UIS-xjI;Q5f-
zqwMOM-Y;&mBQ@0i>BH>sz=sw56czO9X8jDibg2k}@K3I?4n-c$d*JUL?7h$QFZ%l<
z#HwQ+yo`?-;=I;Envh#1@9p8&dRISpx9V+5{ztxz*vIjH)>53uE9PnH`^W031CM8E
ze<!Ea-++6Yiq7cK?g;S`aekp9+?>r0@0Y%(T%7Md#P4z_r%Q})75MyV$^Q)YY-OX?
zH+>Z@HL|<;VL6Wu=l`frZhgyoYV+=VqTbp@aW45x@V4@?etlM)2q%L+PrFXZ*;V+g
zjBh_reL=rP`D*N({ggjef3HzK{JOWZ+pDYpEc^vdx<6dGT1G8!atQy(`{_yeU!1N$
zuZeyE`46&6zh1s?E>mB`GpyzZajJ~8SEPs1lkc=2_Q{&`B>eB>xliHVSMu1sQqog6
zQ^z{HOg|6MZVqRne68#$@@NL1S6jbUxnKLOlHN1BO#8i(-|=&VdEGqw0KJNMjmx#$
zO+3fD_}O58@bhw>hQFhF;>29`9y#!UXP@eiyixqi!DsJmh4+yP@c9$;z~>9mx8w(T
zhrN{fzPbF7*}Ss+6!lN>UUMuwkM;2acpB&6W@aDjm-~R&*WtIh5<RS=qqEP-`E~8T
z<?myU$9vy#*_Z5uUyFO;bFA;ru#e;3!b{XM*?aC2+7Et~TR$Q{;^oR8LZ9EvmasS4
zB=1{?BF^i{Y$f@)-#m-;y&*h)RDF%v)hqD%Tk&JIytGd$d7qE`!|!?i0P}IZbt>Y9
z>acGki^Ff$znSJ;wftQ;-yZHpyy8@G(yyhntnV%0R`~0N<`<}EkbT4#)5Fb!zWJ(B
z&xi0A*MLi%*~i-%k8%08_;W5kzmQ%GfZNZR*WzsQ|EZ@BJiW^J_LR@#CF4?uKl<~M
z{}cH&VjQQy@tNv5HIMbZhVv+Aki%B|8xO<n{_Ny6aG)xjT}aM-#d9n*55|b2`x(6o
z|MLg>*V=PwHi{hnkA5G#4*%9Y`B(T@Up$k4QR?mg_{Zx&zbo=(`t?ikN*=fx{^ql_
zJ5GE@ywBEUPk&SL^P6}SQS<VUzG8iE2CqLy4}AWBeEQMwpVBVMCo-9bU0tWdp%!Lb
zg>0aGpZ4%4`sKuV>UL<q|C09c-r5bGg+0@S|Dz4mZEogtRKDB4<+rf|6xoNIC}RKH
zH!(wM!>e&{u>%~~lI0Tjf5{v~p0T64l-;m9&rRhl?yEZ<KY_Q?Syq$zE6zy%G|m;_
z^Je-X)|XM<BaW59rhBpoKL47AU-aJO5^^{I68~nt6tbs&Tk7YX==Is*7s1E48Tx)R
zKGw}e-t3zuvw7%0F<$?ZRrMR?tJ}rA)%d!{IoU`*SIGbTDLK~TY5AN!r0ns(*dd#W
zRlbKkv)cQCs4MvVn|1l?LcR^-)ELe_tA8E&OMamzUlsqLhrLlPeOEhm_^E$k-+our
zfH(0T*M^_9t@xrm{SrOnpZHGw4frGG``iuQuY{lfxHEcnNgreXuP%Dabd7m!Un~7c
zy*2Isy^DV*{?e-16u2}UP6quj{6sz$(Y)DMqkfS3V;ncr?Qo<;+F#tu-1K{TRhggS
zGUIRnZ_*li^^d$qKa_ZNV|ow$*_QYJC-dlsVm^A*e-D2{18+O0rkjl0@5Q~I3xDq`
z?B*Km+e^~*)~~h2^UB9M_$q%*ZT8RO>gfw--b}wO%YV!7Qs1h6Z@QM8D;uq!N<P0&
zZ!yj?@~XEN{m-*C^rCW7qzn0hrW(a>_{lb*|5(y1!kaqg@1}Ge{r$OUoPAO9-;<x9
zdLnapy1}}(*1cGtm-8s-HIlAy_-}gqnRA38pKAQkkCFF4_V@4RQAPfa(5ve3^l{_c
z-#M2J?8AgVI_6z9{^v*4v!#3;ztO8r#ZlR+lK-Q#2i3DBKgdtU_vhkN{Dhq6Wgn3P
zizdsmZQ#K9%Ex*!KYNKhSwQ+T+^oFOXUOwTxLFVWiKjL$&OwkvteekTf9!V3*RjES
z%1_19_>A|9|H0QNe|1(d+fn)V%YFrJhdy?I>l@Q<_SsjK{8vv_TW4yqPn`cYE*}@)
z!S7gSYvoV->?WVb&*{m=>#yv4<r=}~`^Z7&Qtj<Wp7rui@#%dqzl477${znFJ&^yf
zW&V9x&k+6vS^44n57vun@H*sHon8Hu`uf80Kdg@*7u&+C(5t<aua4d-|Hr!U89(!f
z>ZvCm(o4#Bk;h|0dZhV&QvNl0R^g9`_s%ZPegDcn{Jl6WUtQ|MrFqz&!`a_!%%k=8
z2fr@md}Yc1Q1_**wywBeDQ}pCUfrgCyAsX~t~8!C>;pE)LZ0{Ko5-^jeEu?xxRd|p
zTa@%=?CnPOAMdoT%)rM>d6q@L-sOL32v44Z_mS6kMgATAukF0xMe5zx`NWS)-2MmN
z2M(vnKltop=5CS2xy(%Fn2<x`WWDif;;(v{s4pXtm-J?4UI!;LI>`(cr&;(2v|Gap
z5_r24ji{Ejl^a>6D&^zNWi`B>j9x!$4<{z0hyQJ?9rsSG<ZXEd>*)W^eh>R$f=JuL
zpyi?7R3ksiQ8J4~-i+;&`RPIESK47;Rr~;ln`VK-@<|)l2jOSe%&8Lojb_S+{k+(?
z>?eOll`PJ0-p|7r^0|XQLM#b;xO&zZ&TOVv6R&Zz_2mR{c07{uGW{R@3HaO+`rj^-
zL*zeSOb&r3E8%a%*FVVPxf8wo7ET9#u2Q}`|H+D^W7ZK4|4hDN_kIImIwr@7`}hOy
zt}lMKE(H!(OaG)_b;V29&f=DYYU$tjZo=PuJ9(bO&)hnz$KLLe=H$cDVQ;thw$LYS
ztUsT*hp_N`ZT9m+=I^fJ@G53Qm7j_p>&_%N75dpj`62vEJHyM+t3%;mL-;T(?V9z#
z=gjn<QvZ+VzleNCyQM9R`yX()9{sEYpP#gT9pJh8<mtXP^w9qc(`S_5J2@w9h|iCU
zE35;-PuR=N_#w|uyJfxk_wK|`oIAS@y{S>YGwo=6m&w1eS#GbUPG+;KpHACmk?r=~
zw36Rz!{gV~KNGz{*2Vhgyr6zpNxo-iH?nHWyOE{7=UL37N=E4obV#e79{QVS4e%)w
z9s4ov7p@AInwh6fvr6SW>XChAzIRo=XEv38teM}#&gq$bjovM3pVg;N%Zh#_y^45^
z&-9}|Eo)DnKNS<QDy1A|!u7zJURm30xN)4BRmV?d`1~olyaib+{Qu~F?+PXTC^%dX
zeHVOpCI3^^6a1eAzv?BO`9Z3xXEl7ToJAbQ<#4z$ztEhlb~cjV??&~9{BL0&i>h?K
zux_?Beimgl$fp|r^BVKFopUSoGUo$|XSEyp@J{xZaqs9{OXI8`y<M)|5;x#)oS%CW
zy@vANWq+3Xure!*OEZ3ts`T?ae&!9O{?vxA;a7PfYmw~%JyQ4=a;poso56w4@jZ#0
ztFkv^U!alq_15$=%Y*+_MZ3HLe5mex$|~~g=bTIZY>IiYO<qy?z~@!k?I$j;netzV
z-}t*+@3v8Y3%JoCuT1{$l=SG=ad1{NyM4p<%2#41*C^$DeEti1XZwonvn}!SfAmY(
z(Y2Dn{Mjwc-&N=}%kuNlWsUWGclO?L_gif=ehv7g-zT3@;-foct=Ml1;c(b#x92O&
zyHW6Yllp$+$7oHSHIg^28%@#g&FismHx#eu_4TWceZvooqdZl4bvXI4__~aq*ArLw
zwCDGL&%g2$xUVs<YP<rUTfocS{LhWyQcK_4Xg4zDyq0!jeafk;oO5b2dj)PcP2Ng7
zL(SgsXM<DU-DHjlewy(kbcBaP+{pH~xY3}8{71p-&g_}?-W1#8Z>$pmG4I?(lJ@4c
z7^3}ATA2zL1IH%9)q|nlVM(j3sdafTJM6JPvYq|61JO_B`K!#{5UG=e{kgsJ2l66c
z6g&6yb}s(IKXzmCPTB@e&a=Z)-#UJ;2-ncV2a?y}-ihKnTALRQ;r5`C{*eB*W39iW
z-h>`L%mW$ydW2uZF0WJDb+ccJ59NjKNX{bB(R-@@ZStvMe*EC%S@>;MqW3`mO8;wE
z$7M;;-@5$OZ=`|S|FGb`;6+-e9})jkJFVflF5)^ml7omUxES_!>-6oC&xUC`{c4?b
zPFrPRpSMomDskIs+Uz^|Cx^iAppWJcaQam|NA$mDvLpP9{%*_uZjwy#`!MUp_w47O
zSA(;);Lafa=mGRZMxL}oS^oI+D|)pjd%BbHtIa>r%lKD=&tIs=`v`hN`gBg(g&b<}
zb_IREWC8m3#m~j{_>cbH1Mg$hu1Wj(ZXH7p{kivF`qSF`R$TUT`W5T=tJa^$6Z%%#
zo%~Z?ur4K^-=@FgXFBhHU*p&z`2yYtE`84qZjAm{+CKZFaKC&u%>1gte*O(V(|q>w
zd;y<_mE~)*C)*}R@^duQpE%du)2Exy;Z<FJm?qga<nTJ&4th1CSKat^cdn#;R>(ir
zH|ti;--&QK?B^c7x2As+{-w`AkNvW3eUH4OGu0D#dr+1uzoq!QR+(Kf=K{NwczaZq
zW)0MHSk{j`Hxwsj-Anpu$~T1nCuUu<HuU!FtXE0D3SPFv=SkUc>&`F5O<7C*vv107
z?#QmYIO_v9zOfIuS)Na1HOZ^9`vrT$rPO)u7Ug)o?RQxY`Jek{kzeSoY_p!co3%#o
z!oF_ox$gWLZA$rn!;WvH{Cimsxc7VUw|YYU>$3IQ?FHZ4XQRD0C2%z8^}WZphcn-(
zzYDzGg8dotY{-so=l!m3K1@3+i?(?y@~tc0c`f{$AkSU*Ec}*};am9aNAMeTU?=p-
z2U<@>$mKB)oU}r3N6)v`ULEs&N=dH@pX-xnw`6?YHj8+aQ}h0%J{-r+tq;8pC~>uQ
zN#DY{c?mqNoebuW?!g}T13nKd%U_OO2R>htkHX)##dYvI^!!%z#!&0dd`tGgn&Os{
z|9b4|ppTYsv{&{|@oC|HLG!u^d`|E^n!luvb+D;?qSfg~BmRqmK9A*R?m+MVU_aMD
zZ!E9qSK8fETwZhI=-f}<*za{{aNwVF;cldFW1ZL{8BCuV`TtL8H+I^u#g<vbOVx*C
zfg6ok8lq>-xUI8|#UF5ZTB#@Q{zIRYY!7$-TdXYUF&{^=7vmn2#>pr+z1lgHw)lxm
zdTaT)u7(iJGNWTf99hyQva1h*qWkjXwWSXS*qI5u-3~o)_*gTe7JNS5PE71CZ~rg-
zG`QRpuAWntZw3j!O@}4diRY+GPH|3ftCIgkcJRagyv)ptJh~5iV~zKVhtS*V*E7lR
z?2qDCcJf5>spa=|^x-&JKs#k`7oYQQj4JUb?B^cIY5X5U$)OSd^T?9E>R<XA7Slh)
zYT2_#qldqGbNLM3t|-1K<loS*$Wyn4d47=R2W8{DC)9vv(J%QpN_?)L4mYoB^G^&Z
z$0g|PtuKSq-q{Dm@5Mm48|&KWG&8Sv7ZuY#`@yNssp<CSMRlidLjHS*$C#Am=F4=S
zK|eI@qx>-O(SywgCrQ!?<WN%_Mm2KWi$8jV@-35-;diV%qTkU2pZk+b=j6O}VA<dE
z(<&w1E^YRyUF2*3rQZWL-PQ@8$B}b;-l@T!s{yy$XEB;D=+D=p>g`IG^d)e&u6glR
zI+omOz~^E3tjd0FPXBhcdX6)HdL>`0C+7S2X*2VDy3rYu{iwg|%JS9Va&z>l?C3$+
zZ|-fZl#MOxsRK{i@PpNY*Bc6w&vq*7sSEewcC+Dd>~(oUnrG9{<tu@k)sr5yEylG1
zt=hW8p9yfPHoWZw&pVj+6X9jpjWghJE%-bVy)k{=wWPb>7|zDMvpe7??#rE(ML(+e
z>_YxqI$s#i*Mi5BO8RN&jnL1?LeBO0JI0z{)%hcu!JUzCcsPD~i{BVp(yz@LnO7s7
zPua@&dEeGfyuUmKALG8G|0utu+4od7xE$X%vn`FwK<8Dq&ju&C{lTGS`46lEO_M&(
zuf%6>eu$7;HSu-LvrfK$u3QuH_**-{&#zgXtef@p$81}8va!hW;ia4#uq)&K_bT~#
z>slgSV?^1nh9!M>eu*8jzlvYrbN^DWnzENW!0nE{ucHrx;bh>$2zF(A^ilcNZWM3>
zaK4Rp<>fBvyTaRk*?a7P$=ZqWI?ng{<Ua!b_cbmD<wMA+F8UDkp7L=Fh5x<8JC9RO
z4RLcFjmuzhc>~FBka)Yz{kj}(*5<#xCLe?Un#uL}4>{b59{7BR_7dxE;B?6Ow!9Pi
zC~+J^(f?sThkgV;cP7Wt?pqn`eY2l?RQi_l_bug{s^?AL<6iamea3m!pWt`om3_nd
z(vThUYd!$~^+`6?vqK?gd`3LSMD;ZGgp35<uh$e?!^uuK8L!>QFy7F*I47x?jMd-D
zX5Mz{kCR7@(2u7PLrXiVzli=)&iofWM7y<2c7~6u;PddZUt`tZi??^6?*riQWOx>K
zwG+@;;PZZ-Z%qydXtzEbi;U`l&nKwA3NDW;>45;7>1UcrVTa7cf5dCuk~BBIrzEGE
zX&t<e+-N<CIFN_Y18?10V%=;kUSnd(|HIaaw)hXc{>N_UVwO?7zXUzXzk|;e^kIqq
zM!#07C-8ZN=l==sJWpb*YpedHuM;8rmuQ%uWpW7nb~9am9(eao;ba(k?Dw=}?{=^+
zDZ3v%aWe<}jP<BByRtp`H-)o__tbViH`~vf>9QJS?|Xmgmu^>%%V_pyBl<iPJ%yiR
z(#fTqC$g`5S(m4!`_MO$&FF!{PP6h~?8;8wDqB<35xp{1`M~FX^k;9;HM?rJo@mI4
z%DX=ko(Dc3o9=ABHM3thv8;bH{UBZ)8R?64_6H}R+c(8;jM{?qM0{^7>>e1m>f)NG
z=-=4nj`U=5{ZxLWN$Rg*7kI}KpI=Xp@&5H|@pf50@Oey$&o8EP$bD1sm3pI||D%7K
z&bB^&S*$~^Oy3Lr4}4ye9*>Woc<q9YBlcn7b5(c}@@bd_KlWMJlR@v3jmSQN4{enX
z|7x#n51(CGE7P){<V6_(Kck)r+0Obi(A(tE^sPR7Ip|f{%hU1K-|Bs&&noab*2lo-
zX=VAT+2O{iQgVoPgML&t1%7Us9G)HJ`#Sf-PAdC#272K3oNRyPQ}G-jXL(%l*B%a>
zmhDPUI>F=Z%W=FG{>C}x1=$Sio_lJtsU>}pdYZzed$aB6MML;J)%>c%J|02uN4lST
zW;V<?zM754Z$0rFW0W6cA8<T8?k8?;dvtG}aJj3u+-2m})wunU?S{{q=pqoUr+?^A
z%+EC6j~uGAgL@cPH|FHit&cKD<uSfAmwg<#-67v4`_p@0TR0c}YOH+N$$jACch>iT
z|CS$-ch%p(=PB71?BZ?n&Heg6`oZw<9qa54@IG*Oc0Lv#<N29q8qW&wdmHn&KL2y@
zx3&BmJWip{`L<=dmue^0&HeI2^tWD|?=Hu8ejf8|m^kM-nXE^}jh+uX;a0x~KHrj`
zK>xof?koAM0iT0Dnjd;6{anX>o?P02Z=yF*{sEtHuj=xAXYy>we%_XxCpuR+k^SRb
zTfU?A8pG%Q=EdIj4~Lm|E%~E&z-Q#I+Zp{LcJfZ_wD{j%Wj`9S=ej|+-`&^~c^PUY
zd-@E!dN>?DQO^%h|0Z$HdzARr1a5ZXedrB`KZj#+!)1*7aCUAF`8_%~IaQNiaisdg
zULB|Y?xK=M^0c+}el!I=#%m(K!U4PvyDHzs>Tm!}4{ytMNxu@Gk4W~TzdyQXaCdTV
zl1xJ1pSO8VGM(Oi#(thb@5SH3(MJ3;r?XUK>ng7EJ?>?A$hs1C^7&>!V}7=~@fqvR
z!|dFkKQ8igL-7;)dCz}wdo%rQ{H!RJ!RKAbp#gjz1K&<{66j>(TDW~?hH(iT9#i7>
z8j(?n{Qqm!6YI_w=y88y#dI%r?}y?yLasC$E;dSbk|*gjdi7;dFP(`VevEDC=gCgx
z%q5q<WO)s|a6;%``mppQ_UcFM=iN*CDEQ{AeKHX4Rp193o9<c8(+SG=GA|~ir|Rb)
z{Li7MwZ&%yeNU$`_sxE`UL5u>eGXi1!hYT(y_{bBQk-1Y6ZpIf`T_P2=fi>Z?jPJ+
zyEXqm{hIUwIR7sHMvQL_{^gVXzIAeAdKS4i<age;EdLh1tI+=|(u=Ljk)MB$lFub@
zye|41>DlH*Eq3(YCH>p<DC0eay?uf9ooh>Hs3-i=hxxv<-HprP?eE1}<pV#9^a%3Y
z!EW<K`kC=d@1s38sj_R^Bu69_vXkj~n$*ws{QvZ}@Hz1I<KnZTZMJ*aZm;ZY&v&Dx
zM|hv&A%%;90~4}S>BRu$&(eN-qq+-z_=mDn=(ni;>^yo|pEgE4HQ@2dp4&3nF+0=v
zv`h}vZpdv`b|QW0pB(LXS-)xH=6Q5>b^`hU@fb&|zk|5u&H8qA7X2MUJI^$3b;Zqv
zoNMxL>_bns5~p!0d*&zmgZq^AKZWl)@aJ~zH20qVv~~k;-_wsc?R8^zTISu>{V98)
z2R`oxH@4+Zo^2jv?!Vfr)X$&MI~vEAtUH6@$S>B7z`eEV=?<5^$WEf~oA@{OF6Tu9
zIMf#2{+XR?+&2^r(8GRfm`^M5xn+KtdGUe!t@b4sH)Sc`I@!g(-fZjga_3X_Dft`$
zx7tI2LHUKs{b+x1v;J?U@5XO@Y_{4wn)WaK2sm3inFPmAfrmdA2gA$g?{WE|^k<^D
zyp!pL`!Ms}%62dDd~NbPG(QzR?jhU*y_R!@VFwJ7$L=irRCK<3vp(OJAFrNK?BUbt
zO*Q`J(EnP_pB$t7XgEC!zdXD6k2sSz^8=0dRQP-ZdDIivye~O4a1Y_$#&IUQ`5<~&
z(Rq~p$gK%{o=N}r7dIU^TbKQOu<;FiKH2xVX7nk_uVL{WtDZ)F*`vgt)lT}$7FJYG
zjx=9eXnAkz=qWVnLj77J57Lq4xHLxZjNT!cP2b{1i`nS>qRDQ?<zgP}^YHtg8<meC
zhp?b_gXdSk@44{vfAZHIp`Q(t?UG&T%@J0TW8w2E`HsRKsSkN}$|fcICP&if4*Y~C
zu}q@;8St-{Uyp^8{o(MXW<c1z3zd`gCpnNNk4zR9*Ct_)G-83zq7O%TZh`lpKZ-kj
zkGSiHlf%s4W5mnd?|b-}XP0{QNOEXNf6flghT;YH4<4=E*atkETxK}|e+~WpvG__o
zF@L{A@5=7|fW^{Qy!u-8gdTpUAMLgKn>W`#i#MH52|CX@{H?&A+i0fskXNdHdZ2M}
zGjJN^Plo4@YX66#Q+hOgup^&_9-QQK<ZW>5J@|YSKI<f-(i6ycSNnjs87DVjdp_)?
zZI!o%ijTfuzyBxybJSCd9eot}&lIhFvGH129O8S_KL?&RPWIyW_>a#moHIP3<mY_!
zCiLfsbfIxCiu2OrO8zfEZ)#kwfZOZr+b&@LMm=}pqo#R%o%&nSvb)%`VMo1|-o+jn
z#P4w>eEyOBJO{2u{Kn1nWduL-IoiqL^Ks@uEm1T#(Ax>3fEK_5`54t7^dHhI%oq21
zq}Te);PGbu)9ec4K20?Ib;c+B&$G1argEQdaprA%l)kMg>cgSUxIK$}4@;V7cW3L1
zH;Ojw*^p1~>^`48_%SY{H_MAY*>R;Dwo@+R$~tCO+V{-a%g2`V-LgB>7jKPMXxE9{
z?3l9ML$ZbBG|=1CrQ}_M7S1l^b7XdtaT~(QIgh=W!Q-P!d0v#=;P)-z`Z?J^{k=fD
zA<yg7ThrU=aquwmVP2QbL9dnEm0d_a&fml5cH%njK@YupI=evm(d^{8<o~Vx!4t}Q
z#6Oofd~0?Qyxrt}*kjRUV#?eEkc=1iaWVa>EdC<+tRsHo6#XA79(o=*{Zsr>mapT!
z?z62!!{KxE>$f7$H|u9Z_|=^Jf6lIf2N_)6JnoI*X6z5v&F}Es_x1;)T|27zx%iyO
zp1xH3pTXylGfl`()Sq4Wr*Bu^-}VhduiPX8Pg|;gQa;Z-TJ8K`)Kgph#{9CLaqvI<
zs0X6QJUU*z^^!@>p`3^RI^vf%kISX`4fJggKggx#YgPBZM!U7e!`*0H2K&53zh$EO
zm;bxU@}u0NaxwX>E$)KjQGSVbYmnQm`T6GASH-jH3Aw#xJ!oQH-0Sz&#^r7D30!*D
zb1k6KYx*7UN1s@i0-x9BXZn7i{ln;==s4vf&vRlVt3#^|c7!X-YmvZ@%{Y8c){H&*
zMe(S8+n0FwPE}7MNOlgnorUv9eZDOt^|{)Oow{@2(Fs0pCeLNXV0aws@-}vsTd*@*
z$&A(0eS%vl7yOSwcUsa6UPNw-itShi5q~~DIR)NMGxM(EH~X{L8IHBIzKw;~ab8YD
zMz(c~hd0Ew?3=4-a8rJz*~w+uDCMumPc7}<N@MG3_gL)?PM(uzV_r#rP)6K3$w5x8
zJfi+(^3+B72I8MDx9%Rz(tk*MpBK-T{5R(3xI+E=IsbjV{=DM;?$gMz0e{8<^J0W-
zOSkj?e_ni<gnS#wgLH%PGoA0g*E+Ja_*FeIUp6_hC8|{%-F4*i3jcG+%}Sd-ZeF*x
zZ@5suUvi7hN%*P5-+U!LW+xTWC+O{0^7NmnefN~6*X!3I{35p~_f0W8Jx@J#_@8f~
z_q)O8JC*09N-r$+a~%9?l+1wN_tKM(i~ZP@VRsz}e`SrZFL<lx-YSksFF~*2oXOql
z--|zb5q$oE{d|6@{}-UwhtEfr^fm0~&Gbvv8>4cW`ikOv_VWd0{Wqq|)Zc;M<1Tvg
zN^vWG0uP@_SM#^FM}ORU`T~4D4_zcj`XT)r#vlENb$O|C+~<}2e3yQud>^NX9?$w1
zuW!?H(W}GRx61M#TX*8Vn}3wA#;&Q9z36jmcJ;mRZDmo39T)wrlU+rghwxKBLa%?Z
z8@`#(dfC<Gd})|n&rkAI(HY(aKJ?8*Ai&#p*#qX;T}4mjV_le<ePBK9&whSU{ZH7P
znOD;H$o}U&yr;Lz=-1L>vUa2VA<9>TtK+jrvysX3Mc{GNe@ynH@=e&;_ht6Di#f^%
zUR{#?jE^MQC%ZA5o;+L3gUc~ruFKxaa(<2D(6_aYU!~oe>bX68%x5E7c1bqH{C(JG
zHU5+PvipsDbMYM)X4{a@V(ms;*z)Wi`Z&fp!#lI#?40G+fuMhu-ELm&oGisp<a1b?
zov)mAFuTp~ZZfhiIZe++7WK%$oi8%JZcNT@;U`(aPM)W{I05vC@BLl--QaL{xESR-
z<%{t(Kt8*>th1lF7bWU<LMMO3@7={|T*f|G!%mKRoao8#(=RJqevNuI6qD2w@|l)D
z>vIIZ#|_ry_r=$RKG$%s;8Q-Q!|jL6yCub;WqE%6{4w><;O}_Eyjw25F7T%o{9UU2
zQ0G$~hD+}i^R*j(n+5sn$`4^r-%YPp@^5S|e|P?_=Z5<ILG<V3a}GRgAiu_Qo*N6d
zA7<}8B+o{?2g>)A{|6o%z|LN59jDLvT>YvGk8jbhUE%t}@O*jk4V(`8y8Hp32g$Q^
zJ$>6y{FUFYy|`K8?M>rZgTLC7+0lGV&D9F}dwz0zG23|kO=k~=%47LfYVqqlL54@t
zm)gosx9(JCgU0^J3&!+f<=T6<7|(u?*_ii9^qv}?WykZ77klg9t=8av@S}OUv3QMr
zHeP#s@pAmvyY49T-st~Bp91@jG7jt2Gt@e~O}eRAfxesa$H)BBhb`GJJEDJ%zNb4?
z=6V09ll~z`UIq4Fp?t;?U+V6Z@y_YRtq0BxqzUXyS*9yw+#6&+?JrsUWR_6RTJ#Ip
z><j48F6ga2zk@83Uf!HH{1*0iku_9~GxS;LUuxHl^n52>WX)SIJ7@*_W`7s&qdy2c
z7m&iP>2D?dapk{K&vRD1>)2Cwq`%9Zy#+fjaf&j5_wN)}vUe)7(tE1^ee|!;|D<c@
zCsUIu=-tqlpnvPLZu+_$QQO+Z{ig^!<*u@R8DrD`i81WY&fKK_gr9hQ@uK<KH~rYS
zxyOinTkGdu<n*d>8k??>!*x^Pp4Os?aueLrbBB4cx9{WF3D>0G6m{wIMEu@?|1;Ac
z+@k$;abNm9jd5a8xikEJF1{w3zYEdB9v+w8AjW2bs6}_E!ncL8rv|-Ix>)&v;+z}u
zg<T*XZ(XTBE0Y_omD}lGRjciF#qs!SrC;C5>HR)?HKOe00w~5C$Nu=Z!CvqW=0_#7
zpn39u+ke~{qhHU^<hZG*Jv3`bc3Y}v3^}w&*Sd+Kb~1p~)qvf8Mf#K5i<0Cj^k4Bc
zh?l3m=a;A776XmTA$AW|8PATQOm?!~kMkM%eBVG{ir(Ja!clPKT5s6{$?Z1u%hAWM
z!Vjd&7u$u2b630Aoj6cChxpusow<OQwX1Rmiw27L?<1`O(~?ib#eE@{P1R(e-Og#|
z-=$`GFF5=}wvzlSvCDrkufkrg%Bo)?Zuu)wE7iR%eqYMtakII*{JQUnK8<`$N8`7>
zcAtULZjUKm&ORgm2J&}&hh7a1M|PRlinm3-RDtiSc#GTX#|m#{-Qm)AyggOn!|*&Y
zYyT<!%gb{hzLWe@?eaLGk1Xx)#9PUF`20-~=kRCb>(x_{|9XS7Wt)l#`9JhOlOJ@o
z{)vcC{|Wq2<MQ9&NhNU@%k8em{a2~ge|0fB{~13r11R4>|LW#BYvDung#I|Oo4-RI
zGK1!SlkeO78^54e!O!|4vbt^MbG&@@`=Q7Ef@AYP*_KrzuM$0Yt2iYOq^n_FSZ^Gv
z$(ytV8}ilSnEVI3FtwfQcIP;`ose%TwpaiB{CB&AaX#fHyOc5p;b%|vKjHc6(C-%f
zM;`a9@;~U`|B5%1k38lV`rcCecdKuLdS1`hmh<IkpCh#YY@S&)-YGuM*Pz#xr}`$o
zg>A%7=gIEM|B`>C{`w^OpG_N!WoqhR7W|;>#W1=#jeVfl6F-&we=GALa#)3Z(#PM&
z@^kbcySDz08)4&qozY}-7AraKDClh*JDMjQl^U(gLRR%8*gXhsI2;{~gTs`2S1UWf
z`n~D%u5@S&O+AK>YlVKt+0(s^@6rCVJI2Cq&*!_*S`oXP>tS>v%DI&YkS=@!8>}T&
zSV#ZB;@C}V$&c)I8JKk%-I!&*U4o~$=ksI#)`unMD|eE%p7DD_|F4oZm3ycnk9a0G
zX(!pB<*3!?xV3Ns%(!0N7nw1ErLiy6KW$(wsbPO{LvgP0`PI90eY|a*+NDox%bvH9
zFT9gbu}yX{%x^d~D6N$&!^eg6z9yWki@vX@n!zHxz9HL-)IA2yG*<uoz%TV|RL_Ov
zy(1in=$Vt#DtyPS*xh1~$>~6N8gDiI^l41mPCRpEa`=ls`6v3(#<><zxFXIbr!`4^
zt$VtEx7#IyG~TOd<3#oK#`~#0n;6Ru@ce7<*Z;vsecw;>T)h7_@!X$AFixL26<M5{
zW?E+#!Pif%mvK6F3z!*M8*<uIJzZ%%8t0T#Q$1LY-T=L#em-8jLrZ?Ze+%~ELh=bc
zd@`BqCd>oG%WX6t-YEW?e(BvT@>cyM^KIwEu;~3a#Utrg#YA$wpT0%r(&pxSBkev(
z?o+hzs&P6sZO-nE-QZV?<;HZP85iq$WF{ELN)!cL`~rt(($6;fd3xG0nO0h%%h}uC
z6x-<M%ygK&(zWd1-sZ*2qMUv-uLg)B9ZW{A6yvio=G~peR&c$$^`<3n%?s|UID%E$
z(%c!Hy~W;`BJQt|nELOFg{-;?_7{%GK9oi376!}8Yl>t-aM7B`8b`xBmspz!u`
zF^Mh9cGRnd#p!Uly7Ah9749ye<eMx`^ZvKE-?|ie=cjw_1b=PJB8vBJcCvQ}<8naO
zRlk=P8?x=pmZyqWSb6RcwF}b_p8ix+%H4QTyj;Ai-pE&Z5<U;3H??vn#_WrIV|}Yk
z)1UM<coICVlsg*<f7WIZZC+pg(yC%t!_Tp$-n@Yxc;(&+pL=<)t(SM^C0Gu(e?xbE
z0&caUj|Dz<GXmZ7$bM1Xd6bvv^&s|m4xOAlfoHMLI3fQK?#bAnS73##FUFw9``48G
zD|*>Nd~~cM>-jfAZngNMzoa*9?HB&eUim}*ji2Fr<n#ZNe$-9&%r}zzPsJQ~7<nFM
z!TX56IT$_<GCuO$nWrtBOL>P~ImSL>MOyt!ak+LQ`r(fJMt0xXai1)D1%8VPq+HiI
zm3z=n5(k~boi)Xa+KKZWkLBOO|KZ6qd7R!|1-D~8YREqRMQ%?kV;<`%4~=yr^8c*M
zFW~LIgkL&#A667=^9|(IgxXiN{wy@Y?&#33EV(syv1$5C+MQ0b;}p`LcIMZJR+@yT
z#9L@pk_&6&2X#c2qAs4#tf%pYF${I3wf9r@^*$smS3He6ine^sU)tE+4~MW`WqgF)
zP52p{1?BhK9(Iqj5_D0=G%NPS$u9nSQGTSxdeanQuoaBp4Q%>9iYJR>a65o}#u&~0
z%#^#$h2M(%`K4Pa)rOyXB3%Bfc#ie31snQuG8o6tJDWxQWb$6|GHEtXJ}VyZZ%=<u
z*XLh-exOz;UOb`hxF>Upr`LIV_|dZ|yW)Lg7<rtwg99DYU1amCj?X{Ya&1Yfsoww1
zmwOQU7M`f!_NvxOG*d$b7&}8gQFn6dK~-20_DNs1?y$5Mym#iox?B%##<|?t-UTYa
zyvTB1jej6+Girs?tF;sJ`tWqT))w$@e98wj2j4p<Td*z9gr_Nfc7c0CNO&2kHBLW)
zr{BvaJj%YGNF4Iol!)0eM%TfwUupVLp6hI0wM$#G@rNgGrj1#3@?fSt^w&+FfwRWs
zfB65@?d_56GW^?J(!Tn*hbYIcr1_$Itiqay(5X3^o-WG0C#&VLVo}<`Dv+sXtu?fL
z5>dDx$zyy4`@Dm3KPKH0%DupU(S}q$wQu-_`LqrDwmYq}pPEjD8#fgXr>newIq#8H
z^KLdOX_&=noX6Pt?_0Zf7bh;NNSnRchZ~D`ilwQ$M#;Em>h==6@2cH|K75Lwxiog5
z-&g9_soA#B<R0-Mjl^X3weGO)(&@>q*>?C}CXYuWzO-@T5-0H$wGqEI*=Q{)hQs@#
z@gG_8hZ^6H)pH9ubb({%u`+6<=Zo2!l>UX!N5xSel({RN?zT(E!+g25^GQbc)8de<
zr_uVmn4Ny$-Q)x3qbHKad;B4-=~}g<v32NeIItnx#>{-TxGd|${;KD+!3#y2K4YKl
zD5L(EeL%V5{B;bk<as#TGT*`YK2*HJn^)fmT$CMQ*QbK>d?VrA685v)edf@7_`V<g
zPVy$SuLC<eHR^VRwSNno>XyfC$A7`$u)ZrMXIdBD4f*9A)U%=3$lDn9{7T+|^Ub4?
zdE5^7b5S!7RJHEp)y;V;0X&*)Ji6u$eT#D_mGoi@aU2`neb)|-%Y}fx6a0&N1UkXb
z$!6A=JZ|%>$&VaQ$=jIMFd7+uM}76nV-82-{_e?eHQwF#Mvr*lz4Ew)p)wp!try|%
z2&<?CeBMH9ZIaVH*G&y)Dj#uim%>lCx9~?-g*h9GOLAu-@qcH&Sk8*ele0a)HT`}7
zpW*MlBY%Z2_k1|r+Ndrsp3k$g{KNSd-tP}j9?WB2Ei0D7@2Ka4e4X`U8hj4ibPfwW
z{#u(agqas7U*rvN{dw_gE@uyZ(qz17m-$wr!Pb$_-N5;db*;4#{J{H2WR4;WShUt1
zUIP=i0K%5Y$K8$Wn(Ai-zR~fl+c@DJadHz#$E^u|rE}2qFRS3b<hBols%rgei?Unk
zzy6gydY~0)Yc2fD|J+|cIzt~mPxW1fo4Vw%m$?`-Hexwu^C#bFWk~&U1k~@2-il4K
zfcN1g<<?qB=fIZXYS~Jy^VR+-`j>ijwO&lq*8YBZ(!8l+etuykT1>`6=-G6m@jHFa
z({?O&_f61+J}iU2det;N#hmZ0r)%X+s?3Vs-dyVA?BCw<G7o3@zvKMN=VDMUB(d<7
zT)}Sbjh{D)7qq^U-i)A2lhaH23A%ZUTU})4a}#|WOoH9h%fz$ADZWpNI{0c}UEdGi
zwbEVrC%UmXpOM!tym#A+N9hS~uYg<QjrxC!uIX#mu<Kw)Wj>cj#7XoF^eMW-y$Pg#
zo!(4J&yDkZX8T-t-<Jfo;wSHK4e8?4XHk47ALn7|VEQVfExzt1sTc9_lQ-4K#u$12
zcVs8O&p-A&J~Og8g<T$}N=K$C-_)V@@rJTO*6}k(OnnRcq2Cm((!1@;4NM->uV<}0
zf3sxHB(r|WS?)CnJ^6wCyq>>hit~XB%!V<=cW}6-mG;eaqgsZ`UUD({PAz^*4>yZm
zK~HGsAp49H$Ztg<E(03Xaq8%A^JkPid6QVzPm06Xi&lM9ypsNA-QPjBm)+^h2jbYo
zE)?=h;QvAv;L>zklKZ7NH=CZ!FP^t9yjJ|>w9u+F&H~i8D{*jguYKY7(s@OXbhOiz
z>%GsMZZ~1q<f`KF;`4NYv!Ho;op+Qz=}GM2t&_`&*W~y3$a~dQ@{+t7Yfic@lT)==
z0nckCd$UyS!?NwSPLsTEve3OuJM-dBpl$2mbG!5oegv~#zuYHFI{ih1l7i<d<)e~I
z=;z6Bbzj!Nxa>{#^IiP66M28c>t_8~CUL*`I(y}J!{>=Sl~tYp8&1RP+wTnj*mCEZ
z-J#$OYZ-d%n+@XiT*I=h2^Y68FF)e#Z^L5f<GsOMZ1A(T@!o|Wqyu^re(!N|h5hY*
zsql)pPfWYAYA0>-gRIt{6vOf!@c0|&3OlLANtQh3ci?bU=r;lm$M`!TlK1pn>b{jN
zXr27#Wqmc^ZAW-tS^3z_Y0S?Ow@TGacJ_O`k8TOChryw#c~?}KSn~3=dwz_#?d7jx
zFZ8f`_x4;vf6dP07K5tpOX*|Aw{lJ;w7s$X{7v<zF}t|0QccA{hu^KQPj^znv5W8#
zc{gs$KZkz{;CB<Vv#PvwU2xxs-MrX5TkL#FSo_}>FO>8q@cdfy{!{)6H?^`vUdy}T
zv!Qb-OVo3_IF5*_e!f`dJtNkI-^jhT{k!FGy8-+0XL1SubH=A#iQTcnje$eWfOzYy
zuH6cBwhHw4leMxX-)pLD9UO=C*t+~XZmn;r?043i9rST1`Zw~^byshkp!$Hn;&hf$
zL;Z=Jhf$vTfzAIb|Hk1Y5T^^e(u8fy*T0KN#;l2U`;w0PVR1SM68$N@E^Z^O4io#Z
ziAR`IQ?yedIfmujR6B7e(JY#}$XihC*PaGz+xuk<j!sc_nK{0Z9e4#yYVXOD^yYAy
zQqcJyco-Hz+PE+3RP;;z-8AhEKihjJ9i?<z9{ve(^6$pi(1~}?eyw$;9{O}%sJGP7
zHXXpe%Iy=jG%MIqe*aFJ9n)T<T!oDq(wC>qZ@pp9HfbMT+Dfpoxn8wp%huxKxRD>X
zhw>Z6Z#1^HwIQ!oY}f1Ic1K>q4fY3HmC`v!J0sHhaI+6DYUB}(Sevc+!NY!=D_%KH
zZOM?5c2j<sxRl?E!RdA4knK^5l9>c|gT9drme9z^p7gub@o+fRKo8>f^Gl4ti)q~D
zJ`oRFp$Bf~uzov!jIjIVXC;q1#_15ae4?Jb56^1w7hH>9x2(D!t2sH>O)6(qycOFy
zN&B+D++WH*|Gii$UfG#HGxhPbCwX=g7ae;STga}`$L!jQo!*aR+B(G)SyFy3er3yU
z^oG3L?~&i<yHt!Vd!=Hwr+M4V`Y?c;J2}^Ksul7I`*yQ%^{c$lT}XO3fBe46-!EEp
zHt*j1vNFDGU4KYEkT^BV7nNOP*JlAg{MALRv|0L2`XR36UCFM&|6SIN>-cMaBFCqT
zw~Y3a*(7g*ziX$yvE9*lJS!K<TKOp4#bX_h(^zr7?%i|???HRnO&@^UU)jSWX=f`@
z`>WZ*$BTD!*9jZuh%9a~a8pp$+MF;h?A_I@nIp1>X4DY7-*GpFJi?iix4e+^JlEcw
zI4p~77ainl4y&%Q-R!v4v>W_xMT48cy*|7-t?iOeF`GsHWR-9`m_1vIeZGU|rs;n*
z<v(Dd%@9Wsx1sN!e4kA=?(WIao*ZXzIJ|c<G{fQWb^enbs28UdQ}a=9=a*tNd%Yj|
zcgru<?);)RT&o8UeuMw*=vuG*D4(m`OW4Bif!Et=HR3elmSY}axHnY|ee!W7eR@9D
z%!Wkx-${Ez^R0ZV4Tne2qDK73{b^w<_&m^e5hLhL^miw?)((p8m4|)TguNa7RY{If
zzL#-60zLBOo}LfKZCm#A7}Bc3ULJ;D`GxbL<kv~w=&e}+tNAzjCUr>Y4*l=PPM&Ao
z>2D++P*0ryy)}P|pX?hrU5%Dy?i(D7-qiPpz1Obc->~<?@>;52f#-MTFQ9)6pV!0G
z?~C{Ik@##XPtszm<%{+Y!`gqR_)$GI@sr}F5`N!@&jb9OT7AQhRK;A2`!m<E(nq4C
z@GtUYwISVzci-e)c{q7k*?enh<okNMnUNUdxeaXZMrQ9A7~6;zZL8MF^yg3Svi4}%
z$6-wmGC!xmzt-i9Y_8wa*~@VTra3z|{CE{ux{(#UAH1$bD*Uh3fx!D&o{saeN0Uk5
z-z_kzrIyczA>oJGj}DB3^^efrX6n4G#PhCJjN_s33;Y|SJSG1czS`37o9W(&<e6f%
zUk9pTF<Y@S&aQyC`{TJv+Sj-@bDn!wrTgf?1a19P)J}VvSItR$JHPgD55PpvR1{wk
z(XPE%VpC{eL}It#C)u69vo@UCN}Sye#zo#vrD~;4tEX~};<0u*mVP#7`wwu($~eAn
zu^qfZO<{Z73;8fvzM#FxCifRy9l(yRquu|~?ssuM7EW#%Zx`v#T9hdRuH6FP4o%yd
zXZyI{YLZ?Jl%={c9BwN=2HQ9}gv~#SWS!3Ek3GpK+*;OiF*|RZe!H<Jb$1m>+y{Sq
zCo`N=+1k^+#oKkWhSi6s-AHq&Y$#KFHWwE?%*Z{%?j2y<nkRRsTaikA@pF^Cf!!`|
zz8w2JflrtAt9-@jad2o=@qIcN{tSbsd+DunvFVXUa$WJe?|-oyKTr3jjSbknlS$Xf
zAb9r{3+9*faJAR6PQ)$55pQt>bdIbsfrlg5{Sg(kvY3(0Rol<Ss`MB8rPn4;*atpG
zPcF}{#?Pb0hw075Cp;UA$>AdXxj&ml3*ReRXKofEvCq@q*7!#9$?@O#+$tNT{%-KL
z6Z%H^HF_Hj{!;W-dbACHaX<FcIaxoWTF<^<%%c{fq`J_HZ{^MJh~MtwIpVgymZFim
z>PKbvY51ALFFb-awr~!xm-6e1JADuT>3sD^9*wKA4(45B(MV3_nh6WC@qUf_vfSv&
z<Md88)ST_?-q=aFZ7xcxZ*m;X%ky6N7|tKFx4BzY-jC0UgN%D;@7D!S(YM*5=+(t_
z?B+f5Ap3B=JUvPt|B1AuC;NIb4ZaUP$L$w&lO@{UiT>7vyRs|tBX`HI*MNMQdBFpk
zpG_P8D4M{v?&Q=j-`Q-f%@1;*-#f|6G1v_13U?!GeO-R${mcXBwDKKr2`hX?9No_O
z9;Dcq-Mx*`annpbmHvxR%6Ii|eZR+hb#-~}rs-dOanA>OZbNZ+9(cP>T*m}d(P;T@
zq!f1aPUb`G=f!$tKMTDNy*>jz4l_Or^2i<>I6T<*^WnI=X=%h=o(p^Uw)}bfyj9a{
zawqUe;^}+_E}O`E_c9#L(p#)M)8OzM>Ir|#!u)M`zFxVo&Q`(a?e(^~y#04rrB{is
zi&Jw;i|@%NEbqkAabNr@IzEJ+rOC1AjnSr)azti~r*C2F$X%dc?bwZx|30*JlpZzq
zJy!CL=0Inf($pxmA^x7E|ZYK?s2BbD1^#TcqoC*wGo6g&89d*z#&!Q-Jy+;_K^
z|2p7fGD>Id?`m|yyKxe1kNh_K!n;0NJfTE*=U)TgtPLl_t!`>K8$I?r&cNw-wOvnY
zEtI&>ENO|0YxJQT&aY5@e<iMimBIUkWH8hBoBg+kzaE8mZM1YDEkDsXd<S>?z{^+s
z+Rk5RY3E9E`3K+K{q_NlS{mtN@c*DP;p^<A&p)!v8yLwG)$<lvw}E@C0C5?Ik#j?D
zI>X(`vNIn?oU-22-sK@=Uk`1nT&KT!r>+A{N7|=6m=@KwKRHM3&lW?|X0&Lkc)ERQ
zZ+rahmYia=Mx^!h?j(46B)RtR<~<Z=)!67=$<&y^>mm5<ES};p$b2gLRBO!uID80R
zN2g27PSJ^}JHV|`N1*p3tLgS3cS;VGN29Oa4|N;m_QuD%cG}JN$kwu@-uHG(!ofx@
z?z5VJ+ei2X2BY?H%R*$=tft%m<MUJTavHY^4VMiuvcOdq@BBC|xLWy{M!O4tg}H7#
z-%M{an)ehN;B#H`_lI<<*80NdqsV0y`*}NZ93{Iz+{QH=&R+)|=gL+P@@g*&)$Yc{
zK8^KcBLB$l*?F`<1Y0)Ixb33;H|3t4>73qirCwLd+)TkQu~J;iLHJ&jiEe_Q{j+K0
zIad5fQ|)>WgU8i;ADZn?PkK0&712g77W-v`w3a*PeNZ_s4$WrJukXZZbknPTqU3io
zUJXT;PQYI-UM}pAcI@Pxwci9E<Mb0=z~{JMaZa{3ej~4Qe{(hASBdq`n|2nr6*Lw%
z61PrOOJ0TVb<}ecej-2lfA}YQlkD<rd;P2epZ8X}n{}ltocTpwjPZW&?mny0v~q&D
zK4<sf*OvK~?3PZRo5Bw>gMHuK_`WN?ZClcG?kH>SjKC7-SjO>Q%6z7L;`2%C#rbB}
zr`C_Nz4NxqC(?pkT*mI@jH?7!CmFvE@Us#<9RPnT=|P`7?C^o|^AGheODo@({?ug;
zpQltKxV}3+<6f1+$W7+P{G_tqgY%hYT21Zm@4Lv-+*vMi*dZUQm(`r--d{ap4Dv($
z8~1ikB$v3~Dy;Dq&Z!JEb9eE%C%pNixDbC~_g;{9Ha`yKXWmi$tBR}gee}AqJQ{7F
z)^qIV(X^#Hd_GXSjl^%f?QW5_>GkBW2lRY8Ke*&?Vg5h3-6Fk-{l5?U`8D*gx9>)u
z&O`ZNE@wOY{C)g|A7)Ly)Z6?*dG0#WlGq>Ik$jqy?TJ2{S|bk+6tH$)Sn?yXfIMpZ
zegvt8PwIH%-<m9U)}OdZc2*KQX-%!hanD^>^p56JGjC^cUrFC2qVn1+vvW!9i~D%b
z*;>6#Js+!W+_N}}jJkP#lJc>B?F$Rq;!s4nvYmY2hxA)pHG2DX0*u)oLc5O*b_9QO
zlx<EQjzo=J%DMiE_o8`lx0hDtt4~%7p8X?fsr=`3a%Z(&uKlU_xdSTIRsXq4AMDrr
zlpn0kMNlo?gRa!>R95;c?CRb$@MZrs#_5%2;~9A0fYU)fKSFUksr}AAax>k<e-LZc
zrSSP`r7I;ju@0_Jj$&7LgG(oR?lSFGmB)W3UWf2U#O;qqCJoYy^r|{6KZ^HoRWT$D
z>s24p^OddypATaXe<g2SBOH&hKX(qg%yGC}jDzjch}zhWKjLit=4(pF>sv$qhVT%%
z!HS)^t-1%M)s6Sw?nfD*^}c0n$sBJ7hqD{w9PS0|+F^RUUpgADox~oF7@k4m_{Np*
zL?^(#j^zGi8atxm^5J6-d|Ux<8yWS*_=|gIo=>+!-HjjP%zzi4v0pSm_NpD>XiwP$
z=fdsWJt~J7x6g|o(>>|XRQMa-k#_32RzF{2KhG)U{vLd8t=*US?7Mp}*pN=7WqXQ;
zKGx@C_WOKXJ|WN1PQJ_hO+FpW#P9Lhn5SZNc7p!)hTAjY(3~XA;&ee>_T$n1?c!YF
zXxtsk-@7C3+1A-?Xy#2QJKXdA`FnS#aS1zkUvs0ExVQ81(}|y9o?bj8J|k`!X(8Tu
zu5oXPevr@F;@#Yt3h!oRr<HcpaoGXn(%knM`mq&!KF#+g_TBcQH?4f`i2t6V{!j39
zL-zf2(n!Vg&A>-*@pq%u-cQv0zFJR`SF+ee?CSTCzKh3Ymk*+&E7TWx9<IvbO?Qsy
z{ElYIcJgNIMb16g*}oRo$o_J9wmrSrC|-H0=bJhe(Sq&pp7^{&X~`z>%E9kP;;UQp
z_x&M1-fZ?mcl&gay|;q%C<mD#JG%#KA9HR8e&ZcTfj1Nm$NS)h%w3E6-##B>yk%_8
zk0YmA@@~x3uRiSLacZj#pU*T;tIO+rq1o(2CS2>o6FDe99ld6<Pd=R-JBt69Lw{?)
z<pb0&vrr!ETNn6zm{$Mce-1mLE<eXX=ymy>chTMk_&lo|my_UiFX(e7yF2{fXT#^%
z*O;sSTbUo{<TI=n-xpWsM|rxrbA-L2$w&MlyFs9$xHCT*_f3+U^Pj|?bxAMECqa!$
z?u9*E{o$|vo1NSuy*S@a`JdtQae5~YO8%Yrpa$t(`5^N8w)ld4qMkL%*Ea87&9~Ky
zZ{hQF{Kx*_CHmI}KA&ndyKDJc^)%DQx%i3`5EmKEI!5?-)K2<-22=}=#C-KvgPn8C
z$2ehrrg_kc<j>RZO8z@v9nERb$>^QUkaIm9=T$EDZ*zT}rL|sk;!MBBjJ!lEkqK=N
zS~-^FuJVlh1?q|OFGrc<?L0lw4&h|}hEwqq?-9;<Yco9BmuY8(c)B^bjy?DjwBJFk
z2jHrLRxiM9J@muq+mY<%y|pgCD@=}iV^4<4XN&H>hsE9=XOE~a-h<99d-e+K+l4l7
z$B!^y?Tw-CBu_5pi9CcgIY~ad>&UCBH|ClCyBChedxeZ&P&wi_AJN|BxJlWO5no?9
zy#b%qSR%VvYZi!K+rcXC=8*Iz^}GFm<$k-G+wm9FA&FLe)Uxifhp)5}h#`lQL#$xE
z>^X)%D)#Q~BgqkPH9VgK#7@Nh7n^u==9Aan?A-=x8lRq`G@BV8{mqoK;ip>=ec#7(
zqwPmd(ThsSrRf2r`*(3rI@Wm1Om0czv{}ku9a-b6@Jr87%l5tx!OcJN@9#)A`a7p`
zfbkfBK9sb3!t;YkaUws$)mqEoah%eQbKHm1wI=ZTYNJyH&Ud7Rec<egbjvxmbborf
zy*R%q#=9%O^o>xX9{<R2^XD{n{O&YmQdwU19R83o-j?o@S8rmedCjup)V3@72~gue
z_OMuVdOI>ZkyLt!8{fxhUY#_{rW%c(ibJz_A9jOlb~5c8A=}~cwC9^*c6J1L{o(xJ
zW%Q&T+&+*L6XyeWHhRs)PXylbon{B=XXRwyEMigzIPHJBadC1-eer&O2s{=Q4u`MT
zPD7`z&L{O|PFWp{n<4P~2K6+AyGMDhj&p`5DD9ri>`eL}=eZ|qudUNvCmW|mzRxc8
z|HkZGt^Za$pq}bDzA@WPFZ($ibvi9@#vnTqx9#C`tgBO;W7&zGAIa~0wDIbw{9vPV
za=8zBmgn+3^tl1t?_nlAC{FqUGhtj(H9v(lu%`H2|E73*T#ApgtZXgdYv4!9e-ZxA
zitshwOd7zwZpN*C9(F~X?>^P4(wjXz1;1UzWnAI8+Hm<Kz3RxXa**+@oAl1FR$e@M
zepV^Bo}TYU4~MuvHRQHuewu&9T;-S2ewlE!8|U)&)Nb4-w!P2pMqtnUoKlW^dajWt
z4_04O5o8DHM?Dfc9<GP|a%z5JiQ8xAdy+>V_<Rm6o+iHTbib~YUo`N!t+=~hw5@u2
z7=CuAFSq5#7~Pia=bT<`m7b&Bh#-4P`P%5W=J7t>2|k}p|Gq5V)K1vv^4pQ`SI&ix
zH0pJeZ#@_9(Ld*H%!1b9J9hIecYg4A-y516m*Tg(NRO+?b%3$F&G%O7x>@^ig8N3O
z*qSVFB#qd4yNNX0E4R?7#XjK8<PvXace27-`1>|;t6?M-DBBJ%*TdybMr2_!3_oMN
z14a2saC|G2;mY?_&s6vp{oMzi^fTYKg=oD|W~nFMM~<Wo9q7YUdpiB_eJT`;yrZ+x
z+bDf>@*nN3huhcD?k0W@f5R~L>|FZ%waA65^of1qePOz`Z}C^{vUithy)~Tg0B?Wr
zW<MLxmBmlrsP(qEbptMM@dHZisND1F4-d)B&|@wu`6=2y9sMIEo0808Sa2c>!@Y&(
ze{KHft8w2E4j)9{A7W2OY$4A&9FBE)8G2;>sLTFaZ$GGBdM|#O!1r^kNk{M#ACAi|
z_8S+eO~h0B1RRoOFb)4;KXK0s(d{XwS6JJ6@FO2iyBomg|KPtF|MfLw|FwHw_fSgg
zA$v0Jo0!c%kh4q9WAC~J96s-F{!Vtz<S-~&6!X$qdTt*8uJ^ZwT<m+C=eh=dHqg`4
z;BI&0al7^+|JuCt0<|`?KN!*Rz4$Z2zG%wsUSM@;#E)^MSyKg0Pg6@9-!D>s4WAd6
z^XOT)-vxiav9muF3G+NY+8DpP@Ety~@6$W21N=+ny5Qa*>(8$AtA2JnD`ylx^)!~l
zE8;^Aq@kVo9WJAzTRV>v8uEZ!8&2}Z^iQ#Ewimg%SycODyt&lQE}>Iytc71K0>R;D
z*k^lX&#<<h5?65?DHlcW>@M8al@04``jv>QIDnk`%jUAs+^7q;=Nh$U@OZ9%#{J%b
zKXqkU30!Ee{CRq}h2JkzPks3OA9AcymXG^ePt<PU|5aJs5;nwn@Odl}`w-bx>Zw)w
zGeq6P?K4Y{<O21_`O_QVc^&*-quqA+ygj=~Jv>YBHzLfQ$xeiKO`U5w-@je?)8n*O
z9sY}X==IsbVLy#U-<NMr?5g#sJ^6j4pLU}8H_oEB$HL!ZQ7g;4aTC5fx`*(5vveAK
z-X9nCJM#yPE+3uyxLa$fMt+?6D^5SZujIdFeuvp41AKl4J?jpi?>6p{f8!?Yw_!(L
zqSiR?eXD-ev442I(p|;xU9Ofy9Nw*@&;TAUB<Gg=&sV{XKE6l&5#Rg(eQLqqaTVz`
z<9EKNl;<AG2Pz$$&-MGC?B|R0x+Q!14D~ck&dX1=`pPc@cjKOyb3GpsGw0_gkY^))
zjVnFf*15rHEUtCQQE+>L`fts{erx5kuh}sqovR<?tcTCB`y&hUt@&2QZ(w?^b!dt=
zuvha7^r}hn89Ti>o9?yz5Y}7!<Oh6I)6S3RwIJ2!`F#45pwA|URwRCaaf;0F3t11H
zjPQ$k+frXn^;};%coTlZqw}IxJF9V?zuS}SO@3{u=I6?O^)iYbao!eAiZ&;?n~hU%
z_&dw|JXBvFW1UqtC+^eVmNa5-_;tQ#UWWg_iix}#H{JAUw&%|<W}lchA3Jk(k$F3W
zeSCLW|Bg<S^rFG1_-yEj**p=g%T^W{^*@T!lb7(;1Padb-0_h3YNP*wJ)p-+`iWLG
z-Zrhx_1vGuP3n#JkvK1NJ$XLiiI#R|9^_pMf8AL|;Tqmr=ezO#xBQ~lqW57_G$etp
z>66x%JwznD?{|0MT2&^I&`SBDZ_~ejiXW{64dCXRWN;kod<{Gw;rl=2RA2p{8@*ZV
z_8RGFp5L1P`F4E9dGD!^d!7|I>|i_Z={?2;^7*~LCyl&s{o(Vo=uVu#=dqrcoX$16
zoyBrIrAN)2U%c4Ab;RdhNlG30k#F`5j-=;lhbLT&NM9P|+_!ImyH}WJb@;3AfsREn
zi@n{P#4k%PL~rHH#uLWp7xB-h1UL5mF1E%u6mR#S`YSmv81G+A;qDFQUp;s)v#WL=
zCBZoV6?Sx-&wet!uADbd@jJ)+?4#-*M6#>3FM9}FeTaTGP=DxmJN}5pypT=#r7y+B
zNOt!ex-ul`lKt1|h}QfgBdx&Sv)gy~&a;zzL|0pthC5F(7w)`h|87_I$56O89d~i7
z&SI<jX-V5`l-hQ8?l0a)_K=PCapUhs%j`jN{74+eJWn~VmMu1a8uM=~G#=IXn@^w(
zqmxP5)4tW0eee$Tw&wS^j?~CGd$i<#CVY<w?)~6wTa>-Bhst_pWe55`15VE)y~^UR
zA3<-(KYX$JBfHrB-XWX9=O@%(8*b0{YhQNwBDHNSuEl5I^EFxEM-Tqv2XXx|e13>D
z>m(1u(OM+^5Po9cd{K6v(hd2c<3yLp#OyZpRu&I(l~y|OUmpc;w@tqD`&N4XZnnU<
z_Y|-39DQvduju)-@UP-me8xW4MxR~jSw;5dFf~@ppT>_p?))k3b%5uW(ub=29M8}$
z8k)ag6#K*L8?1K?;PX?cA~*Aw&|9*f?^2r#Ci%1YmQf^MYMkwKqIWU!L-I$}-UuEq
z^|bf#{C>4XJoJD4+lZg}E%dg|ci+UytR^pAj8{YFzR&V0^GZHXuj|6sN0f(ddDwRy
z<f)ryb!^VBe6Qa-^FLok$`j%7X?`EcfANUg+l#B4VYF7WuTLYD(axvbh9CaVe6(3N
zCB0g|I;-z_>rm{^{U_g^HVsH`%g^<#qI?`TtFI|P#{9B>A6hTMzWLU=6Z7Q<d^p|e
z`weDMBli-<Iu)6(uQ#5NiReB!+Lvy<?fIt4-sZoqc)wpg?LD(Z|GVI5A*n}%&7Ho5
zkM$v=I~-zNYdkyq?<IO(oA2gMxY)<v*V6CZ@%)o{wp<+D<)|a+-i7e%EH(YCAK$T<
zE-`MS*r@0Fbv{}B$d-N|{ZjPNJc<9o$0`2%8J<5Z7W6Xou`ur@{Xfpywgfu9A+Ozw
zzKbuiUPRvM3t7Z9_%$!&TMhY~qP4O74>v&Fi1)k2iL*G5v_EU|GFIslcpm3}A7>fI
zc<(E!^j%ila@g9H7w8qXSVR3jlTUQHQ`*bzB#y%8M<u<7)u##n!?5)I<bL|T#<`UT
z^}P!Z-*j!f?#<>F{gx32Uh%#qU+CG1$s_z8SL44cJ392Lod|+6S#vYt?hSe~(C1^M
z+JXQ24)aHZCVXvU#`K485ic<^y-&Ye=)n!fu`@r$efnL`Irsbg+Yrt#VeM3RuKURn
z5BEwRDD{1G`l9h!SL~m@P~zwL>BGjiro2k;!oS~&6O@lg&5P2&8#mLYZ(BKjk$>Y+
zT3407H~JC&j+bcH&u)Q#R6YDT>C1kVFARMK$v>FBqEt<|y2NKi`-jisGl$c!DPIF_
zzie!`5RVr2&>*+2g&x*thu=fKt;N~hLRTYB?hWPZiPKm}T7NpX@}OUbIv;b9&uzrl
zU1NmB9cE`JwWs~WYgvU0obP)IuDQ233%}OE;`722#di1p9=ELZ%=Yl~Wcd6lxzafH
zbKFDUpWh*3+WTcM;J&f=x7YOMhhic-w1#rqWH0|qf6K`KRqT`93Eg_a@1@42nR9=~
zn1u(*j`f)GVPC(9F1{!`)~N1dpYc{c#(Lr^p7(Dn{^S3uzdb)m=yN0fj`!5>UUbg~
zZlCP=o_cz1_Bwh!`209}A9*%jRr~tlj_l2U@p+MYy1?nh<Rs1?J^ZwbtQT#_<-Kef
zT>Vsjq^D6^IuAGqD#=!my>E4Uqxdj;SpPeU?>NI8K2RQx$Ji%d7C&1r!aw;tzB;1(
z;JsoP4cU~vRO-_f@G`uXNq#%Mb><T;$9r7E{6*-p7eC1p`n9&`2#1@IL#zBrQk=}L
zeo3#t7nk=KdPnE9|7#WugxindPVBLAjZyZ_KOsl=m$K7i-*bBYoPLT=gv)jCwKZHG
zr^O@HAM4ana4!5td&769P~A89Y$=D6^2hZf^42Yac{0S6c-}<*x}(U~S`U9CHsTKN
z1+{Q+ettHu`c3fqN?JW$d|ixZWAS(gnAeTdXYy-|;~@8<ELE@ladz?K<YRfCk7I8P
z6HoV&QEZuftsS=@!QTf-ZFirqslSQ%x`)-)-@SwHsJA+te$Bs;F=C0|<3^z+q`DPe
z-|@Yf|6bEhQ#0;$)HdwsCG;R-Ntftv6Lq{x&TUC-8G387e_8zz&+(1%tZ2-hN9|_q
zeO`aZ(2+QWR}s%o;$mxmKLmAWk=a+~P28OQ6fGNJuH5Rst0B=>-q$}Cq4}h`V^?vJ
zdgiJ3H}zi+x1T~E$%}E1USFc#@3eNSvoEi}<soqRO26L%r(e;#H{=FcX_Q*p={go}
zF11QL<ol=ac?JI4X#bbuY1!JI!*85Te<oS3pUuq8TjZL1KV6}mY%p?Gyj9BoNVfV9
z-lpA)qMa{`&)|RD`};8MzLB5mv*cOx{dZ;FG9O!NeV%ppHobaS-<@++KGyZ$6Pc*}
zSCL&CxKKL{`L}_?_vp_^I}F!qcQX6=f9Ngj<1O>An~K@JozaJ+9~g`J{1fl_-3^Dn
z?_%60rynb2zuO8D_}tz1iTKzv{hWTWJJ7>^*(UuYWD1`@RlX+w?Z;}Xq5ideM!y%Q
zq#v3E;%$|yOJjCPzlQT`iwk`Ze7-<W>!Z){xs~<nTJ*@5`#}1w-?xCzAJIa2iAgGO
z`-$`iI_EtceWLmDg!+R1l+VtrfoIU$8<Drw7yAg0TOY>IhF{WW{5p)E<3sbHE`RhB
zJ~R7&w-~Pj{k~kgjpZM`TYpy-^|D*6UfcWq3AJ<<hrJpq|5en<PPbAXEbeX*ZDoJ(
z8|}a<sGD7`onCIcc^dc4<VSjquGeEXKjdk7sj@e4DbhUqS}QgAPoC57+VUa&zpm~B
zT+8C<8~DBVP49<t4!!p(y;lVkR6s;Q>;<tG>|!sm_t-UwEgE}^#-7C3d+&lGR<L0~
zk>~r(jd|bidd(FdpE>*N&d$#McXoDm_6dLO!0+)Ysjm|sax1=g&f$AI8oO<N8tvT)
z9{)$VRz9Eh+27|?*0ZVNGaez|y58@O@et3Me_hU_UGlf+@vq*udKbMdeEtx9I6L`S
zTup?_pO8;0aT@QV_m>apWwTP8ZvG`cn~S&J*<bf$7r#N9T8PX3SUWAme|#sB_8oC^
zYoW~Q$_v_QsNH|(b?EC`{1&U|!}s1Fe9fqL6PGbG*6D1ub)XYISLbi)wG+7fBeTD^
z?{}GP6>-iV(*LllZzIJmod-TfesWpBnUGHl_&S!pX2p~K{)_ygPbuBmdEtZR{W$R+
z&(cPjwTrj)!wqY=9{!49?Bp(L7+$=C-p#$g7wJ_dKhgtmc$j<{@0EJ8A6$$)#AAwA
zjXHm5@d`OKgWF+$)v%|Z(^}$peno$qITw7c{M}OR4A9f#i`SHI;r-X!OMb3`ml4^1
zL-Bfv0~h)p_i^tn{z_kGc!%&!TF@%HioHEOdnmiFxCFlwvb)QAtKsyuxI4^y-DhZZ
zT=pM)hu>pa@qtm2(Mvh6I>6^I)$f*F@vgs)gwG!tr)v28z0dY$&}v+DaTo1F^JNn~
z{~EXaN7*;z*NFwODp0}*d`3P!{bdz;M6~}{;&Tt>V;?;LK1O_5`|LMb8836J@ax_z
zo)!K*++3;nYcDqRNAPTdp1w-UcJ}|D{k63j@{#%`8rjEa?d51a(zfEW=48vsYcfmY
z1+(cC=+`~<%8r81QO`tH^0Qh$Urp<^dI5a?$oP-py|`6<cfsS8`1)7nH2eiW2be2+
zvr;dCw{7+M+U#Sk+ZXBa6WIy80w0maCzZ$Sm1Ex&QBFUI_I;hdr8!)B()ts)aSWdB
zg`#g*$09!Pz3da^tNr_*JVr-m@3YHCo3CG@MEvtQ_VpGlwy*h9Mv%@r?F63JOE=M5
zC$Rj^1O1{2oC<$M@AQIy+gYV+^?!4pALGQyiqEb%9%=vIS&bW~AJrfJi=VaM5J#WF
zLnn8?)3;B*7$1?uzAw`Mh3QwN0=L)sRkeH}Yt{0byd1yK&!%!(<f*xuHcmg06F-vj
z5!ZBjs+Fp=w0&AdYdgW^wdn5hqPOsQ0z968a=G7az~=^TnE&ei!q3ghI{e5#pf?on
z{2Lw1-ShiYzmxOAuS+}$oQ{b5$J7(&%>RVvRYu}r>qTeveh-(!p7|2rwakkr)3dCV
z?#}aKU9<yGf5h{D$nK8)<Usz}p|quT{<->V*vZS-AL}b^?BC~;XT$t6bSrSan%ui(
zZSpJWe;==#eS%N#bmZTfhq-%r57Angp#8pqk=Zz3p>?tU`75L&_Y>TUeaam881rHd
zej@I~YX!=6kwy1j`Z$??`h8rqW*0w&-j|=_ZhEkQzv2V!6#N|TtF1FT`X+LiDW2q2
z+EZ6N^tZTd&F@h+g?)7~e&gMr3*dO%zql5jw$<)+aMP`M`1}oS<T=kz^f}G_z}Jm3
zFC==LgWr;O(BBo#2iIxGO(FQ-O1=Nd=abe=;^TfcZ*n*tIMG!cM?}Lv#!h|&AH98k
zs@+cP@4w)FDSZAy`vu&6TRYw1_A{h7o;|&sz9x3_a=&ZC@A08}y-Es4hpWG)_!M1E
z9{ALre)WLAef4vA@g07=iv$;&knGUnCFMKuXMCp@oxOMRiQjoYq4>-?+lRj+@TV;t
zUV$$s>BTbZ?I<|@rSWQlz7k#LNH{(kKL?|SJs{r(Z4~#4kID4^Sa=wCem49bLc_0u
z^G)IK4aE<B-A=x{XQ1aSe&bhIas%BL{1M$fO1M4JKI}#IelsI|ulnP>@89g@Ug~+N
zc;B~X+5N?;lFyHepK<FZc<~iJ2E*Z6Dg5`J7j;s+*Zej7j{LH#i>9d_$w=U1-i=_X
zuEp12-)p6?tJ!2J&i6j6)K1fJTG(;N`5>Dh#XbJtWOs?kYI_Xz#(nQDDb{H+N^!S3
zA`}AO$3UEZW(u1q4J3~_$_EZ@%zrVFgd^L`M)X9~KnlE_1=*&sHHWg`wiT}w<-<<i
zj!uo@uh>iZM%m^3)%8-?;RnFEzHIJ;b)>fYVf%=1=<U~MW`EI}Z+Ovv#`BBL2Y<p%
zA70=B%AX<{;47=q>vFPH%6YVn(fK<x`d9WXsej~NcAXUXyvSU6NIcBT*>`@wn)fF%
zZnWZ=*^PDn8;j#JeHO>Bt-$9WAn*+O{s#MdOB((OO6*@6z{e5(?mcfc_E*a~^tM`m
zN58~6uzLfSs#2xY=Pq!o6S;O^HxD&3-BQ9&3-}y24?B^<Ct-E)N7p8grtq{rT=lYC
z`ku79I~QzN(zj2Y{f@s+y)*H(Fg5nQO1xdC6tA(*PV3E6IU!O5deX`r@op*X-tFP&
zH2fY751Wz0`Ea>f$=%`cbd*ceSM(@8|Dtz0u#+oBuRj0t7yjKe`;(nW<OjYmePg!$
zBrnI8=uP;UYp1YJZ_uC4=EZ~XIQ+MFsdsob5FR(CclF_O*o|G_`Y+b*F4<S=Z(xRh
zY#ow4&O3&mL5Hp2@N(V;`$GL~Kog!%E1gx0;urq2^0VZ(YfLU3ED3K}nRoEY&I4K#
zdz?2T2RjJ(G8A5qWDnQ(yZ*}8Hl7pVSH$~_%io3y?fALtr~3Lk8Gh9mukmm>Zpe*=
zo3X#xA^%Z(BJAOQ+}PSyz2UFe6`sbulo`tRr+@pw)zFjO;Bj9P-b4AuR)hoKXV}SS
z=KCwXYj%A8t9HJuoS>e#A9ry+-n{GTUQ1hYd$n?2z5&N>gx_t|e64k(ry6d^*BHf)
z{2FhU^nc{<l<WDe){S0f-J|)1zVDhnTdqr$%H#Yf-AWvPoF8aYe(7)NVUmaGXZ>#@
zZ~qJQOrCAOk4zn3<v*1Axy*VsS$$uZ_}<(5+d*&beDQPh&Iu%3&a~L7Xh8n<+r=03
za=iPD%gk>%#)`&p$B9c(Tfb_H>sYD&TJE*fgC`x`Pg!pi>WicMm9*<*(~3%|hx-=w
z{f;N2Xp|xXV-9-Mzn^yEX7u19hZ{rqHJZ_f#_+k3^4^2g-fa7VgNrJ-=f0$RBEQ{9
zMM|U1>!s*{=jX%O0aoxoTL*rT!RGFwx!<>zKlB-L84sVAv);POf7G1*w1>xc(as&+
zFZlvix~HkV@Pqx6AEg(KTZaEN@~%7xf5R{ImUW>U6ntC1GyVI@x)AeWb@4JTH-Xo+
z>3<#Z8?DX5ny@I6tk-TD)lI!<Vx`gwpEXddNg8V9{HRis#`2@eML>?7N#-Y7EN;{u
zek#4{!ZM5dO3m51JF^j6!ozv~JrbgXoZIn7FUnrd2CHwnc43F|eT?lF@ZwU4(LHU<
zk2nYJ4>ZF!gPXIm+u-&_BEMdQ!+r5FRZo8<o!j7atQ-Fp<J~KTU9vYE9h&_KNA>mh
zc{wdRrnnh$K^Fe;71^mgy@Sx*KS?#lGonKda(m-aNI9FNZ{Rx$KXaURU+NX8#~^0p
z6aKgQ!oEG3EH6;U8~W7`&b-P4A^R=w$xg68gFf#m)hGK9hR1o98~1v#KW=}4CYkT8
zj7vKuJEZ>Bt@YNw;q-AGdiYP)k;?=cTMtfpCCE8pEBYMo61GoeMuD_OJ)Nz79n^2X
zooe)>jqkP7IO8+`J#ca+Jgukw{%~(R`lhJ`dQ*73o))y_r>>_RxeQY9FM~APZESp|
z!{xXkv7hp>KRi@Dk<VZU-#1n6gw%w7RLl4Avv1;(Qd{HLN!(lLvv-%+we9Tij>Pw5
z+VQ{iw`k+0;ys$elULyLhVnPLJ017fF14PFqPg;Nl=a-5I;Ngx!2QZce#*yFBkN0Z
z9{xOen<0BI^+><U0rR?k#J=w)_V;Q2S2u5<pI&-_?~yU(p|m19*VqqX=hZT^v%H7)
z`|vma>MY_o`6rf<(lq{#o~eTo{Zaed(zvnwvA>FM?3)iR<vBI)ZXU}h1^0TZ*L$$&
z-Z#!$7|+OWQJB|Foh!D+zk3jHI`C`=+>f}Oo%348t0jE?O?x7Yl$VW7KG24xo-8iy
z-d_A1F`rt9vujt5%jx;qT3H|qdg%3f_*_E{&BVVw<lj@>Tgl9m2K*o`)l<c<(TF^!
zyVug1p09NuaUEQ1p52#+-gRS7|C~i0&A;ar|8B<5{4IVOitqTu9P7#dQCmIo&RP$q
zo1Gt858{^a=Xpo;7UDb_nV+-WCtOW?nq;f+8T+n}@+Kl0r@-;m=CF)kzw`gK{*N>-
ztBRkr(<5tEbW=X^ht^Be$h`)=qj}N3=w#k?l2;>#E8cr7zA<YXv7g(fYV&bkVKVR`
zPGTQ-)6W@2dwS^A5I7q4#jv6YeQPaVqg_eguNdr8zBKf>`FD`-fwu<~eU*1hL;F39
z$Dzeg<J!o%-n$y8;(yNRTRrE4k(IWZ-Tb+Ek=Vt{;q&BpXWP8!;$HR-<kAZ6u0`h$
zg6|O*b1#2HXYIYK{_o_4c(iB;SF4;8dKsGjdH*%##^u4HuKH`--(I74?(P@swCd)P
z{sx|ZS4`9H#aX53L0*}BNuB9=b9<=h*Wpg8tBr5uT<A{EA_Hne^1lo+_n^lg$UN3l
zJ?&`5ckIFk^q?vGIdb$4D)qb`Kf@Vp#d`j}8$=l51S`%38fOn?e}H+t;O-XmVq;lk
z;$GpHEN<B~(`JPgw1)jWg8bwq<CmSyhL!n)m9t8W_vq9OriwZ;pC9wLu*08|duc?9
zJbnAKB<(iX(XCS-e&FMsnR*?{9Q_-;Tx$k56cPKDoN0r}xjn3%Wwv(}6|h44{GjkZ
z_G@S0=y+uw#^*pI@NXw_aV~JZ7WU->S<X_9xKma{DTlv1)mx;`nSFTNb9>i%5_tXu
zD|`{THGngbaqAQ7NHq=nM7`VCFW9x{PZiF3nm?ULtcIK?psyvnmR6sB+O6i_807Ek
z<=tqUhQXIEX<&(0V^UZ1v87&(N;AlFU}~dXx%t?`G0r{Hm^6kyOiL}*6LHO9KS#XB
z2srExVCquVvp2kJL{F#sj5yN0QtuS!`v<1(>Z_9HZV<lYGr>>H_w!SAiW`eZqPJte
z{wa;7pW?pYrzk%0cN6uqHhdma&WlUnylAKFwloVL55eaR^grp(K=b+zINy{#^I#fQ
z;^E!e?L%+>MQ#x%_MbGgtpAbJ+Wgtw`zY;Ya{8t6MjC*hw(RG6>CEgZ?+o@ykul;+
z_%53s9Ir};@;<f5o3l5n_?Nq<$a4M${Jz4;Kx?=+h;}xDzp)?q)jG6wHi8}gtI;09
z{~Y7kUcB8T?Z4yQ?%HXBaT=5lE#(~cajZLI^XhUOX6FM++}>S%u}+N4yQN9W?*z|7
zZhPdNey^{Z{&t1qgG$^xg#FtLUpvCNZOH#1^#`7xk^j}G&UX&j%D5-5QS>XzU&tTa
zke-~LcOZ}TmCLL<fw%Yh9{GKj`o5)l?$&<b(tY@g`FB~~-niF-&)v=UM%nY$i8z0}
zHSe0nlG{^x-?IGM){VI3@Va%Sw~<?cpQ!&W{0}l4S6OGO)%Q(4nm)D2URK}OkY6dc
zjQt$haBJk<8<U2R(0cWZQeR`Z8vB<H@TL#G+QGS@#;;w`p|p#}vwI`+(lEHz9sh&j
zY1o<5OZzsL?>YEt1D~V*YWUnd74&#^QEmRmJ@Ae6znb5p8F{zoXKqAZy?h^}|LsdZ
zM)(Pi*KWiYo?i4+Uyb*<6M57{Z<L0^-Sf34yOZ<2I{0lVuCA_feZ*<}qV|69x|#W0
zBd>n56q)32EA8j*?C1{guN}L$Mfy=j+<)RH@be+<$Z!mgBh$k=ad#V)^YfD;{Fc+<
zcB?eSd|Flv(@qC?ytDB=Ri4p)sU!J*UktR4#QRp?z=gxyH0)wrVrMf+{ncj3WAyb+
z-^Z!{`M8-~wp$DSZpVfen`i#6ao3EchQByJyc8xkW(h8~FB_6=te>${y&ooq&p2au
z{UV3p6!RsJ_*yu&N$STJyaWHneE%KgH_ra0{=R7=_Ul6Uyax|y#2Lz*M;|`61LzJ<
zo|PARqWRGcE>BBCv;XjWbTyAYkUL^{iqp!|vOj0zS+R2FT1P%}%Vvaj`>=bDv`*B8
z&ug^Pke7O#aqozJf^*nf=?->1FEA^3vT^UgBX<jgew4R-bvB$_p4D!Yf7t(a)8kJe
z?I7RZaB>&p`yY{XyQ|F;7;vj6@ApvlKKsoH;WSoy)07$KPF9~0<k|y=KV%VK>|Jiw
z8bnFs$f2E8X>=lMqtc&@dcfU@r5=t;)5yC8t9ujp+}^4%tSmn*ZRYPR66H3dpBuyD
z@K-NLGmRVjMET9lmu=EW<!Z9UX$F1qf<&6k-f)i%?#AyANn6pEWHhG2t)B3?e_8%~
zIM~VVW>%VG99#2K_f@V9ySlsn%;BG&q(5O-??Uebw>#3?ZP>vh)BE1V{&U(5y$Szx
zhZJ{8|6#q!`STxAU)X8)s&7Cx*1f^j>aFp+x#+FnYA<@;-nm~D`HmLXF%#cy_?hd&
z@v-dY7TOslKhhX->EfPu;Qvf{ms`@$I^HAfqkK1Ud#%$Uc07U0adV~_oDBbJJNPtQ
z{XNhl|9-c8ayh;m!>1biI<J@dyOa5RLP_5?@15e_?YO)RJEN(1j_K-e$&WleMI879
z^^Gw*cFMPa7je&Ii*md+*52myX&2uE-%idSgf4r-=fUJ0@pT*4-xKqQci6{y;Y9fM
ze&sB7Y#n-f3jcO9cJ<kLKmA?p{glW!+ZY~?f^!}DLnfK;{6zQ&T>Up34ZqQ!;N_-1
z@69*W-*`WzlTR;l<|FM_8t^+$EcscU_kq)m;B$X;=UDn1e6G@7_;;6SuOI#YHXlYG
zTC%4H(CY^BX0-IXy4f%J2<2LP4|WticwZi*suW($X6)6lllvA^d_G&LDQd{47XQW&
z-@EW<^nyoT3@^H>zZaaIRN}xy_!RdvW)|JdqmJGcoTA-o=Yc)+`}@ik@NS6qXB9DC
zt-K>R25x^;Syc3fW6j*}o>Jn*>G)Z1-*pf>b&UQTS_ICt6ZagM6Xao3U%VG~ruu5k
zvrF0C5#N8IdLq-*z4!}Uzo{6lzDD9UM#2~OcS?I*+-Nb;-@PP@|H6E^ub4^>o!ujx
zk=TdcO&LZ{)_ZTInjX!~mKS44<0)|*oA~a%HtRrZX!t|12Pqycj$;es{;T}_J;{|{
z&Us&*Yz)dUa_H;(!g5@5c5h1<=WRv$@d~(^TiPwZxg&6Bb|s!>r+LoZx{06-JFo&_
zFXnCRoefBP%5yL{+q#rPWZJ*LY~|xmk-27CHouhrP`--&vptPThqQ;*x60<g&$#>k
z7<`*g_AS#^@M&i_+0#6fO&kggOKh1mk^gNU2p0L5_H|MgzU)Qp)G6|)@6WF?j2uqE
z|7NV#OVl&Wc%H^0?FS(*6$d?;&-zmSj=ALb7i&sa^<QOu*$y_}$)1f&YmY*<I9L6v
zvWvuMybS;QT7{kwK^*aHcf<F+$>#(0_A$TScC$L{@Mj?NLOX`9S-d^Wz?Igo@EbZ+
zOyl`K8?po2ld7DJ<SkRJ8-wB3Y0sWqBM!3~&c-^`oqgO64ppbk)g!-2no#0lU-%v4
z)B{cj{`ZB)fy2|%KJ-_n9`u3aI3>+fzO`N~E^%&Lni8_}eP`p?Jli+zryVOfJRU&<
z4oO>^kKu3L)!0H@?M{G`$HK{o`@ImpcYzOEr0w8|_))kTIC^24o;ur+EQR+S%>PqU
z$S3?Ela*h{e%><uR$1fy!JY6S!vNgw#eR4Mu2-3V_v0t<`MEU2?_?xq-?z(#`##6_
z*4bzJ*PI>l6nRV{uQ%a(oOXOu(i{1{nfcYyeZV2+vvn`c@;mu1tqUX6`y)F%`dN?N
z8~4DQup1+jVT*hdI3<!j-%|NC;ywD=XXWsD3Vzy)zt}s~mZzpC{G0?&hUc-KiGP8I
znfX7No!ZU(o|bQ0%6AX+`1?3GIN40wF`sLGHFbY`mcLi?gG^`N=HjDgmH2sV{=L!K
zReh67{5;d|%5$qc{9Fs17miAU&FlZtkI(Cn%T{oGEII%CcX~tTebeDe=j<Gx!^r>k
zd?)oZ&R)o8rdry&HlJ#K(ARv&QqHfUN8Iy``8@Q--h-J~>eYuOUya;n*%5u6_`0#B
z{J+e%hF2n;)IS1VtjV{bKUMtBbIn)pN5J`RaBDq!;AAuS)EeJa@UJ&LY*7Tgt-N@F
zdtX$#z`4k)UsKFOZ||N-fAb@E4md`CJHhM8##aVrc63wxO(=$I*Nt_!73ZEi@L$LK
zm9vViQpDkJu6)>O`@zSGJde8+GwJ`=l?B@AlnrJtZvwySva6>U$H>DGdfP@k-6rJO
z#XBiem6wU32)(Is4!DW-yZXKr{@Z1DYCrtCHx$$H-;zDOfd1Bp&r|W&P8{7F<yx?t
z=fSCODt8pK@!1AG&oUl$`8js5{=8WEyL#e2$nqk*<xj~cdIa2fqw>CWD&l6>7Mnwz
zgZZNm)Zb4lKUg=shJ<#QRp?6idYJOH%$T9pr)n0%@kZfp8h3zkeS`mb4t;J%^7C1s
zXX?Y;w6jx1I-}iT=v&*>e*gm~q<v*f?3K;HZ$v+YWqP5tXjocc-5)9<Y=r&WP=4vf
z*^(qDQrZU&%eH0Qrdvr~ptXz0tS>zOMm)xF<2GAOcW1jezZ(Oo<W+^+&Dlo>!OqX%
z_OCoN8<`i+%k5K#z8`L<@GM*WL6IF}%*R!g6X5PxHQyk%t|y+Z!?*kN;_Ldt-OZf!
zy<wOB3VV8C8le7r;C8%=d>cReT&Vqo6SFq_C;x_gowJcVJk9mzYToCW#xLXF*wMIj
z$zJCH-Yh$WwLJ?i)L}Po=TjUEdwB%At|44$Y=nM<=W&uQ$GrCA9glJ8EB7zj!N<U*
zu4x<XbcN5``1@dZyof%ug4^@xog10(H1>CW(w=akg<X#mL7$W1bJ+1?*sHPc*vWbj
z_sKVdcZ2+XcG|IA2bZJ~>g%8F!X6F3+Ck}1-`(p`Z|o0tg?l~lbxb<K_jX2b5B$vM
zm)@Jb?fg3(re7W8L)wS_ZOeY%6+Skk83&@bhR=)fw;ep*3!Z&Z`J3Md|M&BU$2+8V
zSr=N`PyMU3*W0j@2g4KIF#L2eAD;VteR&7po$&Qo+J#QEfX8!79A25W$9H$}(mR=_
z&72=DP@k7XtshnR`zGyxzFwZ`dF0~;yZE?f^r&S%&iFdXX2-_;xjuPgc28sX1~*BO
ztF|+|4m{l||AW_eZm9HNe=aaXHp;gv$8BN0EByPkGM0V0k^axf7s9KS&im$QzYQGU
zlbz6v-+7_=67LLd0k7N_$`7}X`=+v2zKh>C;$Pkf|8wN$*iU~tI2Sw+y&*e#Be*de
z4j)I3@512?^yBj3%5N@?;{@d{`ycwB@^~4bk2vZ1^!vxkP0ELz{474|S=TPbZ=56C
zo9|QN>I>S7y!U_07b)M4-+33`tKApeTYKUR^8M*=?p!eRHqHYN#(x`rj(y-wP4=Dk
z%5(61F}!Lb4tlO}>CC^pJ^6?f;%|+6==F*%$+MAs)jO#lf|l`X6WPf#%&RybTtE-|
zh?Bb?=FD+U7&e)AdD*eO_`$a>E>h|k`R#Ty&kOHkO)BTh4#mU1IWZ{qN<;0_rxhEc
z4{$$lIj%>+X(!_Bu{p(9xH*ws+>2gy@c!VQX*}Fs47bBCd^Y>ott0mc=c}hWI|sjA
zvSIF}OwiwM{2U96OHcWX_S2uH?s3=FP8azzb}i+2bFojUC$|?TW&L2~^5R%>`LJ@2
zzqh29j}&o#?^Jg2!G8Y^+>UjAcmC$#YP$zM|B*cHEQ{^TuWrf(`u-#PwK(56e_2^?
z-@&^{2A4wl)8O+-<Pv%HW1ki;d0k}oN9^}8F!W^^P!5LMt(3pnER8+K_Uw%>WWL;<
zyhl5Uzer8DCuhO*`Ew_no5IyC_55a5`?X{?FYV|&ZV2QU#f}`}L~R$S&<Y;zqn>GY
z<blI-lCg3QB-^R*d1dMMkQEnFK9cPQ1%|127W?@?d`2$QJy@`F_^bD2uaA=#`~>Ys
zo|Ciq9Y#ZAQOES>qsoaT&Tj=*pE29EbAq%}8fm<)hp%zZ;{m=L(WzoJc40r(Wo2(=
zJcjUMG&8PeTk-ZH_nbuLC_fTjcY*I`>&5o)Kwe^YbvM@kC)ydx{#gr`-BQ(?&Ee}N
zqB|z1$Y<0bEi{fj;PuAzagK76t!bmxwk7}7K&Zc`dPZfP(!um-qpU?b#Jbths&oc@
zXbGne);}3d*uQ;@biZ^`IWIQn=cogpM)`YRqd87Jkyn3@G&cPrKlKFV;~v~*@Rv80
z<mV_a<2w5_ZiVigmKf7MaCf2k6!}YcG44)+lQ%y6{d9OA(cnj={ppXq!0BZ5casn4
zEcNZgFMS%EmV?ya!~cJ2Ium_oe(89HYDMJ^I30e)$IJMRS?--I){h3_H1;Pa?`gv4
z-0%JY$A_EePo%@q?UdQuvgEi=cnDmr&%bdr+>lQug+H>Fdn-||^$tIQGmY4#b@_*j
zd=KMfh0KpOKO#=!6~7i4p6{+*k#6}BaLi4S{5|vu{Kkv%?QKao8g~A4_GG2<Xl0Um
zhQ#g?J^YFb?f>iYmrczNOXDELX8B&e$35;tlPEIZm*BHMyZGeNem)59MLvv;^OH*d
z#$vc1{^4WbUwiXo|NI!^6Zt?dDRKMQd;vMng14u73E)>ad}&EPPWkZX--Yj2y*u+?
z`jz?4=zZMt-h(~#i~A`{$w5wOxLVNHYw$M`4&R%{T<hwc*VFW`es)KG0KDkTk9?H!
zHSFWS=housj%5Qikw<hd@|9N&o=5!fQ|MuDuY&vGAE;P=V!!sadg8r~Z`B|BxRynn
z>&gXHMCS4)S?$6Jv+->2{EYSB-r%nIZRFm{j`U-rY+O;tIE-|^dkH(Lv2(((KL&}*
zsLOsF>;B4-_G`8HH})vo-L5FeW3t~JhM(2^92?e`-HKYqVYGZm2b=F-dG~4_TpGdu
z{9_i;WJ`*rX$1UP%#M!xbmtb|8MQ6h(FYpeT)Al_ZeLNnp?5pM=Yxz}3we?D)c;O!
z`eoWRi$7x*zwg1{aa`Hod&DtEbn#oow#s*AU+=7*@$BdG{rfon=i}kphm|{vA;w{Q
z_VHv|e}{ODKf;NxD{oszy6Vr*g>0m(#BbsAVEz3KJu)X}{0VhMi`8Wb1un@Os{et{
ze<rQK=d0E8Bz!)DUPX@Zo7ttY*FOZVtdQq<=M;8t;Y99yyVL$~Y6?BE7cTL+t@E<O
z-A<^6Z@ZGuL^wT!T!!%&?4<5iA_R7Uw{x@oN<ALIBHht0xfk4<#$Me6l60W&lcB%Y
z%IVH7{2p(38}&Hzw}TmXCRClxX5ZKNzEe4cXEgRx$LrTjxP6XTx&i#sXGJ6e{{QY=
zuzq$Vf5r*s*ALDI7wLCDvCI+Auscin2zphUwI250a8`bI<GctypJLvK-(^AegR`%)
zlLw0AdRK%(b5eZM`p})8eU&xwyT$w)m*G3X@2%B0jyF190bH2XhFh^8{F)~?)}t0F
z?v?Dvzj3<1i}-<;{Ybcb8t?l|_VamUZ3Tc=c7NG`v=kpQ{VG3{{`Z56frned;d+_8
zGrq@ud~!O4Y&+t1NQyY8QSf~d+@1mFB1`vS{0kLvY4g*G#;XOK-kY8b_r7gI<wwEi
zlhP*SwI93>x_4ZSv2`z<V2rCtZz(*EeEe}fxu?&oeSTIsCmlczBiP3u>E-_7n-3v}
z$Txa|{`ATo6?YLnuY1_#1NGzKbiUuUkw0Uk^<ohF`KV-da!z=<ap}!(+`&8@BW~_^
zd^U~z`{X+WF4tCmnz)Rm?AX@g?v|3P-0}IhMq?Jg^WpTN2S4=1aKUS8;u0EWyRxH?
zF@HMweidA3l64VB(HNdjgnPB%b(j1i_|=l%Vl+9dc297L@omFzaW?(;ev){IxF0Y-
zKL@^g`!hcmU1mCVx!sL>*(asQ+_z1Bh4E<0K0XUy6U0TItzX^vH?C9OyJ7i>##2tW
z{BrAKU*G>oKU=Y%PgL(L_#J#S@yh#U^jW?}bWy?l&u8N6EAP*qLvG&1gO62ycM+WJ
zrQI8?BeAb}+<H;Ve7*)=4+{CSm%EeWtK~Y=h`(cL8l4SfKOaNReZ)J5-=n8|qi5i=
zsrZaD%XYsam$+B+0(xXJ{X&0Zyw;U=cssv4(Z01Qd_GV4*5We)54RV;afW%`h+Vx;
z*>0EOO>&dx9K9cFuDS@jdxre{H{$Yg{*TM;%YLa0gntp=um#*}%rn}lxF~JQ-!iK>
zkvv+(JFmuZU-wu}wy*sT4xeHio3fABsAoIxuAG&|!@~t|G5Co%=i21CzjntO&wZ^M
z;b*v@XsN$4@xtx!=Nu3J!+ySoUEPp8jxUa32iD_<-otqG^*(Gb<JQ%^!qCIo;ymWl
zqtWm>&NHjn)k{(zoZZA99eGQiDgH?tE|8Dz4{#>pH>R7fbKOJP%V<95UdnmqqZ4TT
z4Se{qI6JFD4l9eh$=^$2#bo;3)_;Y+eV!e``Q}|KGy6pSZ4S%7C7)BYcm+NBgnuLK
zlg>Dfc*JEmJr`e}z~=+g0P`d~u%|fX@0T5p?=RTTd*f%enVhGi)Y-<2o(j(=WV;*Z
z`PpE81Fzv4>od|$_HpwdL&(kDaOc^3lGR@9jV~&@@^#0)btcPlTc|XXUu7uyoFwup
z;uiLSlbb->10cx+^b@li=~H{>g8k{ox$yZ^>t<aZ;lCQMSS=RopS)l!yg7P#O^Lhl
z4(TG}-`T0<Rpk49<u3Ri>-#H^EZ#xA-C7d)44;Pov0i+_zHjL7|5VR3zx$YvJ>rmm
za;{m&IK<uWO-b_$JN<}*ub)=?wun@3p+9bJut>vSSCb-cTMn>v1v$C_2p8l0RCED-
zm<_itHGb`Q>Cb|PgR=3dt@^hzYB$pFdc09*n_oTP^F(}(R6bsK`lT`nJ?=#=<TtLB
z4b4WT%jgw9lJ%h@{*M%oozbH$(pB{Q*UI+rFmU)JIGp2uVfr)qc%Rq$5P1tP7GDzk
z(7oaC)>gPv(z)#FE_O!o(&8Laxr!WGv7gVwe^)!HiqY7@?=IKhDsj=#?f~|33w+A6
zoh~-Mk$2-V`rk*Mj3xS2&uCvNo~{M|bga?i;Pd{*MSk>jn>F}b@pVg$i`@RsBO91c
z&!*e?yFTzv%Hi7W33uyQcjjix(k<5g@4W+XRJk6t${W&il1aCwF*s`tXCiK`S03km
zJdp6Phjwe=YvA6*e2w`si9LJ{J?|#&?r8dH_Xhv#!P~j;H1Yy$nqTYhPD0t$O{_}`
z*umY%XRG`c^*0w!7k-=(-toQx-AbHaZQN_z>%Lw;oVw(98Q&V`fqyC2fu;G4^lKJ8
zzK%Sa$v1aXS^n7k8o%GxIp9U)_)Fz#^~Aa49qi<W^y6fFPV)UWcpv_a`|?w?@1}t7
zk$>q9c6h`GzRV7e_2y2$k9^Bd<;Sbn8*uFK2K4AZaCQhezK4&vKk|)zLfkuiOZ}s@
z^I3iydK-D}4lnz+j2x$G@4Ng;^6xHAqbGfv%ujtiz52RRn;jar)Oy3iy70JJah>`4
zQ>8WkYhUFi7T=nki{SZf=6&ROK9k;d%{DLMe#W+bcO|`QAg{*t`dba3d((%7?B`42
z=Q{X&BmSC-*9g7ZMqFO#+ltC|#b1<f1)sZ;<AiLn@>A%~Vs>|&XPwQ@Qk(uBTAX8^
zR>VKYOB0*8&pktZvThbvi%-b;q4zJx{WkuNTKab({x^s7*Q%$6obD~cA0PL*&rfmQ
zcMp3%{O2zg4O2Dy<Uw|Fyp#BN5&AO}{=R_Q%iLSJn!l@&xV#N|^|N~^d=&B{J(eQg
zdmTSb9dgg$Roup2A#+09|4K501wPACZaioE`_;yGO{ETtD8^T=J@UR2XV=mD)%+WO
zD96{`GV^Duy3eDJU&#>T<eyzFXOMIA?W_s6ng0*L=cCh5^LaQ6Z=OB=Mk1|-duL-i
zGdlJWoA7_k$T}*2iMpQlw%*<J-#rRQGSj*+51x00+s9@9Lhk~fccdSeW{0x>d()E>
zWq=QR=_>s9aD(9tcs+#vU%--!bA)^F;`V`){Kw#Zcx!G{-u{ne82kFm?68OPFaARW
zS?IAmbo4gP3GXwG8L#%u=xrmCL_M?F*LUEn89Vuba=xr#(KfOJc#Fjt_vXG7ksIeJ
zbyHVz-Hx|B%Ddl#pP*N>Z|lL0^~z6#lTF}C)ZYWH*2QNt-{Zb~=X9;{?yS`wsW7kG
zq`T?4IP-Kf{@e2U1kP+ja*vo7@$S`~<-A_No{sbXzRJxsueV4ymGv)4P3hatq<xq3
zS?4O~f#<9a<+o?`-$p*U6M{R?-8)XLm0w`xyj8}#T5$ZXl70>T>zikXryJ1aiO_DW
zr#JDpG%)^WSV!XJm+RFN@_Zuo!{<z^e#j?hN5{Cg<A2^w`Chc`AMn3+_L_bNPOeOU
z(T~Q;-<u*M{WARsxqWGUXhQDK`aX$%eyP5=kI>k;T3!2(ca-1Jd`xgM;>xN@f3uU~
z^jG@g=28CllHM;LO#VhNzmpxfzS1$jr>tjG-p}7flIQoL|KPpX>+sRaJ???(A1~hS
zj&fWk<`<Uh*N*vslpC)}`R&Hh%z;ml?{aVT*Fj(4yIs8ZRv%GMOZfY+-)-gH@F&tB
zayT@9wCwLO`BU)O84v!d^zRS(W8~J(eZjM=!^8aUN_s6$CV!~JpWCe)O|rh?qOaF4
zFBzyO=IMj^A>`D-y~4kxvE*=9e!q6B<vrRLzVWMC7b4H(3vfT;E0*!^)F=PPtp^bo
z_qO#U)}OEPBh}v>uHQh;_1MdQEBjlUT^W9-W%|1rz5N#6cg==mZP}Ui_#@ZipI5Ka
zsJOy<;)Yt$A!YQuR&fXY%-PF-v%Y&LwCGIU^CJHpdQ<OMZP3r@g@}E2U(e#NaJ3o#
zM#!TL`}hFmd$}igT{*vI7mw;^3-KEx$axZ6ze9Y{=icwWO@C|H!Tai0Gyd$m*)3m-
z<2a8Td$WU=SjW93Ui^ced{H^4_>=y$ly~k#IM73!$UoDe=KBrBpXq&l_-khgR}L?p
zOMBbzZ=lz6e)tFSgg3>5^x<jwMgI!->t(MOU9`I&yuKICuay5ed<9*^XH+S_MEu<&
z?2zxh-+h&KTV+3yLp}KLQt>bIr51bo5_%Rd&|IRv@KisLBGdYM5vld`CuU$HdiWdq
zB>k#qhZ*BuW9KVMNKXP+lNO)UUSx8ZU_Sq3B=1PW=)-U)LE+cpc_Xh?nVo=oRyqf~
z&A4~AquiSwd_r@dR?mC<%=eOiTQ?$)L%-jO@q~TH$KD@&&^UJH3%SI4znJ9}nS5W9
z;r<$W(!)vIRrGs)wnw(Y`u*hp&~I~c7V(6~nW?c4x)kn)9^S+<t>XQ-EPK~_=^e4`
zKP8{{W&8W|Vo&yh_4OO?z&=*$!<+mj1^ePInTIEm){`v2$anV<JHDZL{V<%37jWK%
z6ybkc!@m>j+UxLqyR6{vcwW8U;&q-Ce$MyFWr2DsPQU_ZTBak>`-ob+v&69`=^6Cq
zJpB98w~<-B^mlq_XO*5PadiSehAc%^!v{*d8kVA6Z!w6^XvE&=ksdWq<#A2Vm-;pj
zE<!D*osW@c9iE`)@YBkvZ=8c|lZ{LFr~T1)S5N5YF=-CHn8AL&ue2WyRZsYJPj_C}
zSp5fiA$6{PovfZ{_b<-l8d_D4!e?a0yfnR7mcPz<VI%#$03L2eJ~yTp%JTo=zp0mv
zRqihSt~y5L>9YK@&MjpXu}a@Y&ei<UFPVo;vX$us^~AlC+t@8Ve1F658?m34;CGYk
z1AI<0-&d#C@GT>geSCfM^aK6fiJg|`&z19~DnA{)i#Y0#C(nESOj&+FX*Z6RAMYOa
z1rKfh2D<ote%1!rz^qk%EB>pT`@K}sH-lph;8556X7)t~evlW{+mK&+SNxCi&dTk^
zy}kU<FZtd$Tb%FWbF_OcVIQ`4j`#w;#d)cxp?SA;?nKmh9iE5$y&u6&jl388pilOB
zMjrBM<($tAA2@vk`b7L(s$VfKcjkX0pZ4tM2hG2DXXV)ve;%@KM1JZktpl4{ukVK2
zvG04G{T%zgzgl+|n&(gD|1^*K!sDP16qk1!Jj~rA{Ezjw8~^f&`rF%i<Gtjc^FRNa
zJs`5b*j&H2<Og|>KKxYq(P!lCta4u0*eqU~|HHUIw&E%LxWQ12fpZJl-4BwR8_gxX
zhkP0Hlox5w4xW>F+pl<`w3o)IzmfU}!)wwOzwt798}ZMvzD#zn<pK6*6YmecSgv1t
zu{T41rWAL<n+EbB{j1csrToi{&7*n6!{`m&=l)MgzoIx^`L5Z%@OH}*haWe-4dvCp
zot!(s`=|A{lkZQIdU8u~r}jIE_i$n@FZA`rlciqWs=e?hUF~~MxP32NkNoJb6tlG>
z4{q_i_~ADzPg)<sPFhxMt=-w~8Q#tgankH}vETWznCf#|@5erjUgNy*@ASM0&hGO$
zT8r<nYk#d&D(|T$GL_w8{6{zueka9CXbnW1#`&xpsHr`9${-If*NK0A#rQSL`kA@?
zv!nIkS^D!6e13y`TiD^A3^%tkLhtEUcm`j@pZrs1C~MG&yp_h8FY`rEg?$n)llG_Q
zTV=l*m+vcw!v8p@-xW@ef*Vdit;ZP;^3%q>E4%rz)X#Vwf$ntQTgK07FW#V@OWzO4
z_Jz-0(eQ@RbLF`IE!&<v4-t2_tlVea?)M?Ld)d`Z>@Ti!A~n|e>D}z`z=t>aDQfH0
zU-UEV(x*ibgnx3mbtv+1-VA?tWbb{Tf6?EsAXb%m@E?547w7dYqzSpzwQmo<)+_qC
zgH>;R_A2?-u#aPZ)=`|t6Xt31{ay7`!Q%t9KhJIT=iuJ@N^kUNccOTSxW7;bZZ2Vm
z4^6Kt7x%mO_IEjy)04)xK74+^<bOVUww_TNlAeM~HBL9*F6YsH{2wjJZAe*9Gv1x|
z)Z5G`o=iS7>{i~@uMa9m!O5VXu3fj}oGN@!#<!oQzMx;Dd_#85?#dskzn3W=e%%|`
z?Tyrb68-`wy&tYzQ==9*Ifj4ajdV2rA53qeH$gv>{6|@(pDp*BFR3r$8CLRxxK&2l
zZ>D|d$>-V+`(#x*8vpbB?tQqoLLR#(N_v7bRo2;;^mCDR3pf+yo8}LYM?3hu()zXB
z``Ry*^nv;L+8>ntfS>)%>-PCu==H^GT&UfC;yGTy&syh$AC~hp{2h%lH|Fx!$$<wv
ze_wy(jpAPpJ~ztWv>&MrpWj0dd_D_(BtOW@?4{iI4doBWPcO^QR{w1KnnU4vtdD2H
z)3^t>Ab(fCya&X-4!_MG(Zf1AIRBuWUzh)0{#N#Q?E4POKV~O<SGff~$NK&-`#9b$
zJXbxl>~rtee(>{}^&|2lUa0)u^!fSxN%lsU><#Ns#CiQAUq(LOH_u~zuZG9>s;@P>
z`b~VUseF~cR@x_Z?dK!^@aulR$b9_CIu&t4RqWfy;;_d0x4^t>SiAz~yTjdxSDY(O
z`k8c~^}Pe!3V+?$;!O37a*p_~w4Zq}q*z|+`56A<#&D@O`*?HXF|GIvf9}N>XVHt{
zaQjj7TAWSss(J>)(~FJoK>0i#H!fBD(I1xlAH}Z`<2W0RFHq01MXc|Q-A6fr947E@
z+zq#fv6EN9fd+8)dU75jp5tlrU=wk4U!&LOe|}5<I{RJ9r;x*c>9@k`@NeB*tiZ=l
zm4}OuO1*uRf4mC%T~xfJU*A-oC<0f*-+YR8r-|=~{cLmg^rt01Ys907nwN+43F~`1
zc>N)I;PYGLGn9s}PJNWmWHJxCx~jyX4rW~Ke1vnK?(ir2<;HpPI<&uklLp(j_Je0(
z&-CH{=mK@yn>js|@ArH8=}v$m`;Z$&?0@GbW=J!5H4QHIfCD30F7f`C%t7QCC#p}g
z8+PEish7ulb%*08@OE34)hzys6SAL;a~=4+fxfr(Wr}^op)%O?&+dfJ-=*-2-jbb5
z4vQf18uO(hd+KMUe%^@Qk{y0Fe2kZ&Z!_a#-MrJzzHPRIhyFd|^{T9@pDN$TDdx4t
z*E`PHZ}js``Jdk>#}+&-AJT`!9{+_Mvc9t1_poPH+8>Czg3oKL%b!%_+b~Y8;p`*&
z*OR~G8+x*$vJO4$jfUwJ?Nsqof5X20ywVun#6GSIKWkU<MMe4;J?EeJT>Y*1BQEf{
zJ-lBAKi7FPdSyx9#Q9%y^p0tjdF@;)y`$bH&i`J)zZ-w4K|UKUZ3`!ZzAt_vAB$+-
ze5z4jto|6s4Rj|Qsgj0?dpS9ML9goZQ=D%c7V##nqF3wWJ^Hf5tE<z^^k-Av|JCNv
zmz4|9qyC%u8(P`z9Glh~w;w9E_+9vWpI|pPX5XHhezJajTX{_RSO=ftuW82qxlcVq
z;LP*sv$FhW{4Oo6>bInC$+@1<T3zz_MY_f~%gC$VLG-`KSJ8`lStVW14>Z>(e#%d_
z9{sD5UI*S(nZN7P_w@JcO6&ZilK+AH1dTG8!_!*p+PB`t`mmfwL2sP(g~Pwn+Yj6$
z4EZ$VkN!J(k6?fQU>?=s?+CqW1W)fXzQf#eS<61m_@iUqHRON3S3M);>sUjt)>jV7
zmzVq>oZq3Ik^CU5jqlf$WAPJmK0SYj99T4Yo==AZXDA=*#Rd7}<jDfkpYdkp)jmU>
zH^R*p@J~Foad8iV9Ae#k#QNj3Q@)P1_9?3?58*TR7q7zCD1T{QC*Ml>H_CnmZihbh
zfa|}de$LsKmHao#R$6D8vQOOqHZJd0K8N42&NeL`^4U*5kFV2G<Mm7ag>p6U`Brj}
zxm0_*lV^+KeSF#v7U$B>h3xT9)1Lf?9g8o@ddBcC$jT4jzqDR7gx4XrM(pYb)Hei<
z|7d-Dx3Vd`3ccD{`9|oSignh75BQniR!<B0kRDgQk31f0(*frD(Zwg^S)V^5_MLs)
z`~Hr7_(SEmVr8ih=M`apj%R<bGLL?8KKMx~=RcPGkM&;4O6!XEm5S;-^y)hGJC$&6
zaGCLJ>>RLF9`d}kSWljv;Pb~R;!gfu)GFyOvA1iSKi+6vnU9ahi#(5hy~6)e4No3`
z_mS84&Ej+V-^_i%v(>ws`-$(CxV;YE2M(uf9X<z}xwZ1Rmzm2P6LM&s{ban__^%eZ
zs4pW?l=OCHUJo}jddUnHw^{fIwA<JU5_mh0Ml{U3%8e{jmGZH3SqZOap|{Mt!--kw
z;eVUz#Jv|Qc~hRjD*fNa-@|^GA<}kVXt|G_YUD>bNM^Cfo3TZ9L0XLdM<?vdD__Fl
zwt3*NeA34C4*1zOcdLYdqn+|$KR;+(c9%b+ejfKXZ{uMM`TUhXLM#b;xKZ94&TODJ
z6tD3Y>&ubi?06*QW%@7ue)!xG`rjavL*zeykQ@R}mcieMufK!Ga~{3?3{D4rE?2%k
z|H+$K&%7rb{+fKl?)?<P^vn(u_wgg#{i*VUbt!PTVfvYVH5V`4ERR<b8m3?I-G;yU
z2J$?bpSg41g1y}*739OxVQ+W0TNs>L>(2+?A*}d)Gxqac=I=srcy;oz%FjiQb!SUB
z75X_q`7!)U+rZ1vt9{^KHGCMC`sM@hxgf18_5X1Gi^zAhed=J`e}uy==x1H{{15Bb
zBEM@%p5ALi5B)zYJ*@mL*{P`-pYK*KvJM14VK2Aihdd?q%QxcRyAeNe@9b9ewnq8J
z)YJIBB>%<+xjmKK%w|_Vl)C1T?e>*a*Wa7L<7d>r0KHY-$NJ~KpnliSzF=q9SheNd
z$dm6y9`mTKQF;y?(yC{G{uX&Fe9A<}evJ2p8^EP@=4so!ZaI$z<SWegzRC~G=kkxW
z^Y^fG2Iilj_shEHE$P!sm7yiQzIcrf^kWz;>rS4lD>L%?r5qN(^}v}ydDncraa@o$
z!cRT;{64z81$ig@f8~Ae+9mxUINSn#JAC&g|6|n?{GSBBT4cTXK^mxMC48=zM;yk5
zaJV(U&=GmFd=kIk)#?xVU&B5YRq1|V^L#RX?#vsLPecCaRpxIu_f}fu?gvyJ(QfF&
z%lR+Hy{CIEt@EMu_BHL6xB-9T{@nBEjg|jA|GCtMWqHN8wBz?^KtDg{XI@+CPc!%$
zewBab9r7KaM}mJLx8`uW9US-&-&>M%1NLT|3)I-Jcc!0t5&SQ&bSrAZheqzFEGN&Q
z?zyzgXPXz(i#o~&J}=kqP;q(fl>bQl#;@giH(mW5;6{(49{Imq(xYF8!CBGl&JDXO
zUzeTSxRmqZ#W(1^ohx?FN8;zd^mEzKO|#Mb*&WQ^<>*by@)w}X8teD{*?X^f-|9Ex
z*NR{I4f2^HKDtNViT!px91c6}hT={0ZVG%}uf8?>7@f(parV4*qb>R^MGN-r+RC#<
zOZ}>HZupjQl&7j_1Sj7WUzgMK7UJq2^7|d(^LP9N-fJux7_Y$R4)Ag#{^!<ksiW^*
zv>Tano=N>!pK|Id=boC(UV+<fvlmitsJRjRS?kt!Kbd2KpLYBRJ>lUPFS7kAZZzm2
z|0(dgH+!bLonm+VO?4w6=AE}l(nh=%W3)d-D|6vu;Mh#Kx);>jH|vzQwJz`Mggwqj
zHh2EEC;C#Jzk2Kqkve(UpIa!uCol5ZadJ;@PsV@v$F9y^PF>*S=}vfBTE}k@;Tn2)
zd-g2cJ4$><XY-;OZjUPIcj<38*81b>&FJCXJdn|^d-z41^18L%JpZQhw!F|i$yp>i
z`at!+NIs3tk1yRk3%|`W^a1EA^uMunT$U95ZO&i)TngM?$AbHa7wLQbi1?posj=Vn
z5!ca^97I&X#jv+Krx!~;t5Y}q>YVjXo$|2HJEs>)+;*Eb|6KmbG4MO+8}kRa{VJX#
z`rjnm3jRfZH)Vgf$!7ceIP1k1?B}32gtJZI&M5xq;q*jCp46i(e|TC!uXbWj_cDIX
z_$LM#|Az4SBlXyipjXqUQ&S&uXv*6a^xd;d(7&jBUAYqf(chcleT>>=X{hhkG4#-%
zTYjfMq`l9?Wk0H4v5r4w{fRuGFQoqDpLoIglze`ce!$PRy#GUtV~^}3cptd*1v|Jk
z`Y)+_{$9oV^7%OPt3LaA4Su%vImquT@OfNWz8QP6YjyxXN45UMz3zcNy?hR@n)Ac7
z$)}UUvv51;4UJwS<JaH4lJ0p${;?r>zjFT0gwtU^5AeM+{hRSSeLi}emu>2M<Q-k0
zp1|A1d7=DB@pqkar(*5}_9^l9pgiTR)U$6slswl~j?Vj+^y8GThW|(9ee*8#_LO{3
zNxv9gcEsn=`FQKjH<dr-9re$-DZ9BRyY8HPFx>dmIp79)-k&!iuioAl+z2iu_qjWi
z<MpDy%W}y7JS30&LNDYS^yHPiGkPEPb!)%t&#%#?l>evf_!{M3&j-N0A1c49C*=Qq
z{*!hG!T0X@#`aAa91VI)`}po~=2P|efw#5TpCQj`c62xUyXM6>?JTQwEjp2JbMemK
z!p|A<-1X1HZ#fITh2MT6zd;Xn!k}V=^;Cph5%a)JEA(#ke6sec%=g(Py#ah~NuK?(
z&5Evh#G@Qr3@i2FFm`TB=(VWC)y^fomUZ)7c-kx*%^y90J@6xZ9#NLR5WNaMpIc19
z-)EI8;CJZxwdk#(){VtT_Q0yjH6{No*wsPbSiaFg`MS#c74Hk0*KOc)hVPB}O9op9
z+sY@}h<?=YUsUMxRDR|j^!`Wob7S<@@`|p|?oQ(J+8amreu~!qUPXfg|J)1rB7GO@
zM6GNzeX8-l)u|sl?YqjzJmRHV!m+@O)+`Otvu51n{I|-FaCqxdPrUtyeqgo*-1&E9
zSxJxiIElR&@0hgCroicy?xA$WPh`^jmY?fV2+=M#I#$F<C4DBldNCB;jVG@weOTmV
zCh&H1^uXam&5Wk-`EVyOalX98@ATu~a$C51YFWM=B>XIm%dQa5(VU#(p5TO%|2v)F
zhy8iJnHPC<@3v!&{l#7AUG?kXY<&J><vVurO!8^!@88ph!(;*NmA_c|kbh%Ji9caK
z56F(={}@XSHT=($O8WBO>8n^w>nbZ{&)yh4{M8%E=kWH;%BL0iH}os=)YUT27yJFF
ze42eiV|W(*l8>Xr=ay-_dEJbEVoW(MLGNyT8J#xD->Upj83A`=UE4V2=JgJuVus~k
zy45*1ZDC$Ca{DIazoU4JE%U;B+16*!_eq15A16L~wE5sBNt!_pO~hd|B*&fkqbDlg
zF<T11W8D$`jvn|tj9hwWr=<~Pf6qwuOS)6q{C%g$m;X+`8E$&56FyHP=kB~yqy4Ti
z-0q&oX#P`wJ`q*#RJx=;33r>D7cZo#<klEIkHcpJ_H%psw~f_vn)x#*`$Ro4-@i!h
z%=2xH&Y1iw{r$cy-w-ahN1w}%9+j{0Ze!hiYFSSeJn6y@))Zc^t&n^^udJsz+>6)E
z#>25^<q2t@Z;dWr3EXUy4WMl?uC-~^<Pv{oz^P{Nwii6_VcyS#mti-~hsRCf^Ca}v
z^mSoL_r5Wljdy3~;3wY8Jur`c)c4tk{71T97{6}{kGCx8$D!AtpOA-~Tkv;GHNP71
zN3?@Gli=`p{A?tCV{A#kJg+gYCb^$7!T8y4YbW+Ee}|9p-qD|xA8GbIkdH3M_xXIJ
zaT(#h$`<+PtZ+U!wk-dab)aoF*!`9G+=w3{<knDpUHiP3?;k4HhCF`NPVn<xUX}N=
zo_>{Y3QvBk<i+?>&aK#$@&0@LVl(SnCSGG=*{|x7z5~C+ocx!{H}H8_saI{;%RS(B
zPv5_%52N8^;KM|AWq0%`#bhrEcmcSWu3dS#OZq~1J2ZctJupi<F<yuH-je($!v7)0
zWpOcvoSLJLK_4g|$5{A3NWAkj^)wbY*VDL+7MC}I{6>kl+t9BI;bt@b+sleg@ZTi6
z68|BGYtaLr|Ej&rx*Iqha=xzUg+4_b$5`}r?B~#rz~|oNxUu(EM%!-=^^VGra{j)c
zd|UNA?|Zzf{)W%Eueut3M_$?ItS{B<knf7&_-{$Fv7YS%IpZ_pIcBP-wO`0cV1K=;
zG8s<x!pUaZjSS<}*2N>TI@whHt!L(KuKu`rRD*svjTl?nQNu*^mvZL67$DlMW3~-^
zTn?Ydm;IWm{z1IGBYYnYhiAdFu&dpG&I6x!_xsM|ut>Wt;aFr;4}3mS{q=EqSV<2A
z*g!wgObR>XWc)|G)-_ps<9keYteMuse&lNFNyLHNjUIUG)e`GwYw;R0OaAY+PISe8
z;PpDEp%1c*V*e8KDE~4(-=q&u>TmRGnR)`B-}L)+;hpD6jCF1K@AU6Q$o?W4=4+W8
z!oJ-=m!AjTy<BlK3_Z?!I<j|rIG2>&kDhp$1AfMO)R|q`o&4Lv*~~t*o6imQ^9H)C
zM)@1|FGJJj<+yCj-mIa|W6=}*+$7B^<vf#pJ;=Jeb=sA_iEKs>9Cn+P|6(CKc|yLb
zQYCt2uJVD;L+Q^hqH7jvw}oiPnaX=V6P^b?ADXr?-`Y7ZoLSbtfxehmM@IU}_s$1r
zpgT9kZ;aX{=_q{vR&jb@+!~2%o~?gVv%jXL<odq+NL#AEu~Xo!N_>7c9b*6WUFF5H
zeBkpYB|iTv9YOBvD=X9+_57FqSz2O!{J8QxdOiAH(f`2bRq1eitmd@~I*!<ffzJ)#
zNyw)<4}P4puqT5)IG>om10T97AO6)r`HnvOvR1avSIdhs9DYVUGxBZpXN2A4!St;q
zdpYP0*vs4EZ<y8l0H5{Yb*ztp&s&$}=jQtvr@GnR+70?a`E2+(GTSdd$oKEP54&aA
zuM^M%w~xs8P(F$02sz8+iofn~;JADtJ?RCHw<yQ)a`+qfm@moaTlc(Elg};bcdDl?
zT)HLSoL*GJ=eg!r75jK1y`SWL?gjZc<M>p*8Gc)c-`GU?QO*H3gU3U~&252h=Lwhl
z+U34PZhej0kNNiaY=SNV(R%u${>1!D#qQ+Lh#fq@xOy?C*w*?egH#dYOLN)Bf!jTb
z?ed@P`?|up=vQmy!%iLyA3wLgkNCa(#G<eM20qWuYq5(rEjIM)zx2J};mg+9Iq*Jk
zcu6r8ADi(rFEF09;rDd&w<Z5`@Hbih4IZb^=VH^c-Se~)>*nso-ul}j?su2tdqENN
zY@9gfBXU`fDp&h`*a_GAd*Jgm#gX*?)5@(SpN-*j&^P9Xo~NJRv!7>`cHr~qZIr*=
zXS}QWTCojzR<oZsCFhy$70zV;xYt%}rM=ehd6;>zi}S;A=3Phr=sEa|{B_%)pUqC5
z$4-lXb}su-&7SKA-G1<5Pvm82n(gE>?CSAw_$WPJr2h5doOdkotqt7l#rv=k9R3iF
z#S51)?&I0H1LXJU;pS8me#J@Z4|{c*`umGYp2XAE)&6KUdW_dheuYK64hxm<V|7?W
z)5F`cc{a4f=l!$Y>F-zG8Qg)~+hkj!@4?%AM7Axx`+)sCpWchVg`+k6Gsm-3Wb3M2
z;d{KxaF=x@?Bp}dfY$tMf5T_2J9o2lgMOdL)3ue=?B|_+$L$UD7xD9E<t6yM9XYgu
z&zr!vW8DN=YFsN`pP6r50*5y#aeI}>s7(I<Z`Bj)&PV9+eqx=pGrRX~@f#smnhh6g
zvU&0(9Y?P|uCz!C(8G^0oqjHLE9Yc#`9+r3zza8oey5L1N3&PoVL$Is(x<>TckQzg
zaIZE$*wnOBIZtOOKghh8k&e~RANij{Pn(I)2>MQLWA2uJZN1p{clr@<xefby$8;gR
z_@=V7tS9h!JM=})4=;cNKY4#}7wtCrANpnKOgR4v|3-{&WB%o({ysUoI-NxB)%?!8
zmE~W+cYXSQQ98%E9QpZoEctvAjyFetE}ddtG-XHcQqn(52O00#?CmqP?_OJ)ub%Kr
z@9X<EPB$)uw?9<ARX*^ulJ+OhIZm6;*3X<@dROgvNtIpOCEGu%oiC;5DQlVU^gr~j
z@Hz1I-O2}*uK5mSyMyvm{JtM8-QRwShZHUb4$R1pr5D4MKS}%Djp}yz;UCJ6q2HqV
z^V8^MOWGLqG=|4Z{cdEoReqxJ>6q=Q-H_XX`BC&`Saz_#%lb_lH_W3`@*~lQi^n)v
z{XN7rZ_u|(^XTsw+IgaJYc6gs<lKaRV^?}IL7c|1?3vZh2X`&&e*oWA@aG2Yw6{-x
zNV|czuj@zL_PRPhF1NS!e#*}1fzR8+jZOKJmzW2+_pf#?_48}=p2qQU>&|F6@{M&P
zaPM37^oL6y<ww)^_52&Vl=Gq$9O?>hf6o74+}BoGp@;oeU2I+AbI0O*^WrV<TkS?J
zUdmFwbGDsxy(QM=*W6Fpwd8Xm-0BVmMipl%_m%U(4f?-<zCFM3q4`SlXzSnU`@`90
z*_Lqp7<l+~WiNOc{XMMMhyKhIm$#H&cn`DKzHIkgzi&pK`xM8b$2){OqBnJ~FzkR)
z^4Oh(pE~Y$Z_wxKio?}2g*|*cy=lnb9QxnX{gXqK-xy9Gh+m#v{70P0^TnRVdoFz5
zpFCQKYu=3<T6u?X7vs2q-Mp9{)^Q(YcXDe3pBK>oJ;Y51&NgR1?`3=gpO^Z6vKf7h
z@~c>UhpMN>Uv@0<XQi9|vV~O|We1qA9kjfQb@Uh-b(Vguk_YL4a$H)Y_eSrLEunAm
zqQw$)e$i}u<8lrU_G$Qi-HXbHkV9Bd+r#sV;P=V!^Hurl_SetqZ1Zd(z1iPNawvRW
zF5gkuBP}7%ynM@Sx9k8K-GiU-XqHKoKLP#?^4CM*<S;mVo*58!@Ab;b`jhQRlP6^l
zRxZ!N9;so0A4ne#^1DmygMO^s=zGLn-<|Di{vIM;?l#}U&%C76t9!D2O8TQtXx3K#
z>HWcjwHxPv`;p6mZopqgf4{1%P*2R?kJ0<Gd*5QQbQQ1ut$IQaKi7}$+FfJk`g7%Z
z_fvw-vkrf2v*&&@(+0>Z)iUjAT)YgNqWn^Ley{ewtn^9;(+4N=DfHlIw<E8EW3R*K
zgYa3EO-V<R??UH**BK`-VEcX8OPeZh4HX}Kn|{A4|8vySlpTE#`7aQyeU9;3R@vM4
zsQ(Cf+B(~r-{a3dYq@85WXaDN=xylF{^@$-Ua6dx4lDUT6TPi*xd?85@7(qh_HWd4
zBR-m#*H@^&BQ3j$JsWn^GwCMw$S8h~Kf>p)*w076)rjBt3w@c$&wQ$O3iy1OdC*i8
z%~kYvhA5y*;DLOM>JR#t=_2#RyB_IspE*3<z<<gwGVWW8hQGr2g#Y<K?Ru%)r&pYL
z+wP@rt12zw&<5N-kbL*e+UI}Ef2urJ>B623`D~Qm>T>`;#`*N-waVc9uu=}2D;IHP
zJ@Y?0_bk}UhnDp1^S`Puc8wQl*Nxo#kh0yq^XtiJgx%G7<lUGSE-B@6K>jD=HingR
z8hbN`#|M@2JUhS2-$%mrQ}Yq}d!}|no>!>1iQVa8@G$aWUXdSx-ZZ-@KZ|_azlYD=
z#C6<^9(wgqex~vpvy)FI|IeHc9$D5S{<*~AYxA?=?RxLS9*Qm#Q|={zY%_5m=g_Zu
z;xB^FD)Ae~=>Jsl(5I8ry2>|Y`6};qpJE*v51*r7Ybr&tK|ibES9|jRI=>7a<ZyYz
zxVMIzaX#3*_^aQ2;e0UKb)s7Q0iQG3)8}db1Na<rrU}JS`m-JX^bP9!)wyBlm6t@|
zX-D;MS)6Vjt#p4d>S-o^<ASoDY4AV%sC%NvJUU#xEwU}$LpcrqRpOU7jLUh&RrGBX
zKgfCJYXk4UM!U_#!(DA$M*BQhzh$EOo&TH4@>9H{at`@@Te%62NBJkU+nC(0EzU5{
zR#YBQPsr^B>p>gy;ue4JY+PO>pTMP8{H_C3dPcuvfApSpDe(E*;zZx~bbc896CI~q
z<ay4FWFu&`)`@UEc`Y*du>prq&D*gjKdRj8-1c!EzGKx>1IbP$x07&wug@1{r2d0;
z<D~9Xcyy%CzmVrkmC^7x*5&C=mOHRBJIRbS(0hUtlneehL3dly48D`x?yPLiGKl!|
z&9Y<Q?bc@A#r$SJSGIv;9j$Lu;dR`X6OoZmj`6TV?8?5mm<G4yS6Y&tpHET#O8hj{
z?zJ?wg?10s?&$1Mc{WZj>37J8Ta_(#bLAfOza&pxly4>e`9kaN!7Tl|wD)1<k&^$`
z{2Ujle|Pu4uhgF>yx)BsIkw`@xWv4eDBIEv{Qn<TKF&hEt>i(vO8Euuci&<idAjnQ
zdSbq;cVkObt2nwV$ma?E=a8F~Hr;1lcXn=gy?#IL6`P~+Q^nu>M|>>FYNz|@?FxDN
zPt?A5O4F74wKu=WHOhTj8J|v5PZj_3HS~Ua_<W=Cyj1C|Qa`7`ubON={Jw>ryj$6w
zT^V-Qp72-J2<L*=`rQkaL(;kEjomZ(oBDU=kG>N=f60D6qtyR1(Obgj14{ZT_VWh%
zdFqW(IbVI1$`|bCGt2s~PA{py2fxQn^yG=kwfG4<d^oM-Z|#nLpY`;g@cA@!ksRr5
z`Ztb0`X1}@)9!JfR`T<CTA}=4w}|e`2OF=?(jU+p!Pyte^6y%A;=P-7$~R=!)Xo3p
zb27X77WlTTQkNYU{jJI`CeOY3sqdlJ-#885z-Np6(sI63=U4KRtf=&ccYzN>auEpd
zwp)I?d3IA}pz^UU%+24jo(^L_|4aS%JDoYbr0<x&Y9Bt(?lSuIbY+%yqx{~=*MX~>
z<@e^3vd1cc$5H<w`B%!fVQ1f(JL9e#p?u)gx%t=l$g*AYtMhHMM=Gbo<(My5<S*m}
zzs6zcn_9;&)@~E^+>rm>XALboH=k|(-tDs?|H-ZSZN|O5_>Qyk>E!dEb|WtAwftuK
zxQTm)H|FEnIj>m<g8o5%gL$z{_B4JXpToEL8Om7)^XvTGOGefux9Pdaq8=Hzi#v_4
z7nAdA_(|SmC!emoI05vC@BKmh{ortaxESSo6%XQTxO{duS!X}+E=ttzhE8#hzxNlX
zaX$NG6+1cVaigcWRllrk#bxSQTiH@QA)l>_M|@7?_qfWs{D%0t(C5b96@0+wws89{
z^X|#YK4p1+{o?QHpU>ZMk9qf+_`1NKrttS^<;S|8ayMLhy>fwe!*6p*@vQP=*wcTb
zSIhV}HkAKc@rvJ#_4hl_AC=EJ@UWHq8jt$jRJeUNd+#oJHew$r-&gTxc(90_{h)Q6
zJ{KqJS95rLjeaeJ>vzNR*D9aF>7ajK-0pL+JWE&7x3!gDircj3WPfJedXLq8+EZ}0
z9sWOiW>G84>eVyF|Ia^7SW~<1dcYBTEE#mz9tSVk>wx{IP06yM4WfpZuikT>-&-#;
zHKh3XpE{O)eBbt>(;`BRT>kGrzt_0?E{E(g@W6fc{l7o{|5*#?`2YSB=_CHX9}n8-
z|Niv<=PBa6|M#ES>+}Eq^!J|3o>`|(SZDE1$6~;S|M2hP_y79u=RLoFzd#M~kKYOD
b%Cc!ZyYWx*|JgHZ*NOiw{4dIHDEt2a1%sTJ

literal 0
HcmV?d00001

diff --git a/bench/python/assets/brooklyn_mask.jpg b/bench/python/assets/brooklyn_mask.jpg
new file mode 100644
index 000000000..6f839400b
--- /dev/null
+++ b/bench/python/assets/brooklyn_mask.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efe74dbed044a192d00cf4a8b44cf89620b5fff29265bc847ced7d60dcab047e
+size 44761
diff --git a/bench/python/assets/brooklyn_nms_masks.pt b/bench/python/assets/brooklyn_nms_masks.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2e13cef5e672532735d1d50c4b8b14704651af54
GIT binary patch
literal 196620
zcmeI!L2IU06#(FGCNq;5=}yqLATBHD%%Coc%Ry#g5MxCcW)m_=s3Do8Uy>;lx^PwS
zPbl5`YpN@Mf;;ICsQP}~@o=YehXXFk#q-2B@0`5%+~c{=fpYivw(hJ}J3Fh_e%@Ve
zuby39UY?zQa&d6+{QBVe;q}?|gX6=S!}}kdoj-oGwR-yW=^t(%-CSNB{r>*Ji<{H)
z>)nH^lV>kZ&yNpoPA;x5uMR%`$?kgf-g{5Bm-^RB=HZ7Y>sxKh?xV}g^WU~_yX)Ie
zZY?Pf*IQ5SE&m?9I6nOO!}XnCuD-gxx>?`-&E9J72fzBqcRyR-Ti$!OK3i}9@#yIC
z{QTtT=JfL7diU2?$0t`O$G<o|y4hXt{P2VIw{F(EAFRK9e}8X#=hH9Vy8HQ3b@RLb
z#sj~-`rexk{NUl^N8i~T_qCs;q8|bT2oNAZfWS8{u(>$-AwYlt0RjXF5ct0kaJ8Fm
zj4_f3rfYH3P~UBfmv?(c>B+agT9TSAN2v(19o7jDAV7cs0RjXFR1k1Mt}tb1BtU=w
z0RjYy2)No!H|Q8i1k<%RYN+qF#ml=rqx9rkUoA<^mZMYz*$(Rj2oNAZfB*pk1S$x)
zAXk{OGZG*`fB*pkMFd>!rW<sOB!cN$95vK;+v4Tjo>6-8t*@4(X3J43f^3I%0t5&U
zAV7cs0Rj~ST#zeF*%=8CAV7csfg%E~cGC?yMiRkvEsh%MyKV9EZqFz^`PNrUQnTeK
z6+yPcIspO%2oNAZfB=CC0xrlErtFLa2oNAZfItxeSG(y39V3Zgx)w(b_1(63dADbj
zo_y=8C8^nRl!_qRVVwX00t5&UAV7dX1pyc23R8AQ0t5&UAV8pqfUDhfgN~6zFkOqI
zhWc(>yu8~pN>9G^)soa~IZ8#4?XXUO009C72oNAZpn`x4a)l{7BLM;g2oNApM8MT<
zx<SWCBABklQA2&VEneR38Ko!R`f5pPwj8A*$aYvKK!5-N0t5&UAW%WT1-ZhMosj?m
z0t5&UC?eo$H{GCPBoR#4;;5m%+ZHeH_Keb#Z+*2SHCv8S5o9~86Cgl<009C72oR_s
z;DTIX%FalD009C72ow=;wVQ6xF_H+TYjM<2-))PRcY8+Z$+x~*lA0|?sR*(i)(H?G
zK!5-N0t5(D5O6`RFlA>XK!5-N0tAW(xY|uO=om=^)3rEisPDGL%ey_J^yFJ#ElJIm
zqf`Xh4(kL65FkK+009C7DhRkBSD3Oh5+Fc;009C;1YGT=8+42$g6UcuHPm<8;^p0*
zQF`*Nua=}{%TX$VY=?CM1PBlyK!5-N0u=;YkSk2t83_;|K!5;&A_A^<(+xUC62WvW
zjvDH_ZSnGM&nP|l)>lhXv*joiLAJv>0RjXF5FkK+0D%euF31(8?2H5m5FkK+KoJ2~
zyXgiUBZ*+T7Do;B-L`mnw`Y`|eCw+vso8RriXhuzod5v>1PBlyK!8960T<*7Q+7rI
z1PBlyK%j_#tKD>ij*&z#U5lfJ`fgjiyxTKMPrmillGJQDN=1<Euugyg0RjXF5FkLH
zf`AKhg(*8D0RjXF5Fk)Qz}0TLLB~iUn6AZ9Lw&a`Uf%5)r6=F|YDsFg9Hk=2c33As
zfB*pk1PBlyP(i>2xx$p4kpKY#1PBl)BH(H_-JoM65lq+OsG+{w7BBDijM9^DeYGSt
zTaHo@WIL=AAV7cs0RjXF5U3#Ff?Q$B&Pad&0RjXF6cKQ>n{LoCk_e`2anw-XZHt$8
zdq(NWx4v4Enk`4E2(lg42@oJafB*pk1PD|Ra6zsxWoINnfB*pk1d0f_+D$j;7)b=v
zwK!_1@3zIuyFH`y<Xc}YNzIm{R0P=$>jVf8AV7cs0RjXn2)H0un6fhxAV7cs0Rlw?
zT<xYCbc`f|=~^5$)OXwB<=vi9dh)HWmZWCOQ7VFLhjju32oNAZfB*pk6$D(6D@@rL
z2@oJafB=Ca0<Lz`4LU{=!E`N-8tS`k@$zoZC_VYsS4&c}<tP<Fw!=CB0t5&UAV7cs
zfeHdH$Q7pSj06Y}AV7dX5dl}b=>{DmiD0@GM-BDews?8BXOy0N>#HTH*>aSMAlqS`
z009C72oNAZfItNS7vu_4c18jO2oNAZpooC0-E@PFkwh?Ei=&46Zd<&(+cQc}zV+3T
z)NDCQMUd^VPJjRb0t5&UAV8pkfD3YkDLW$p0t5&UAW%fW)o!{$$4DZWuEkM9eYY)M
z-t8HsC*S&NNouwnr6S07SSLV$009C72oNApLBIvM!jzqn009C72oNYD;A%JBpkpKv
zOxNP5p}yM|FYoq@(vxp}wInrLj#3e1JFF8RK!5-N0t5&Us372iTw%)2NPqwV0t5&Y
z5pcDeZqPB32&QXs)KK4Ti<ftMM(N47zFLx+Ek~&cvK`h55FkK+009C72viVoL9Q@m
zXCy#?009C7iU_#cO*iNmNd(iiIBKZxw#Cc4J)`vGTVE|n&6cB71lbPj1PBlyK!5-N
z0t6}uxFA=UvNIAOK!5-N0!0K|?WP-aj3k2TS{yahciZCS-JVf;@~y9yq-M)eDuQf>
zbpiwk5FkK+009CO1YD3SOxYO;5FkK+0D&R`u6EN6Iz|$~bS;h=>bq_6@@~&4J^9vG
zOH#AtC>24r!#V*11PBlyK!5;&3IZ<36{hTr1PBlyK!89I0av@}1|1`bV7eAZ4fWl&
zczL&Hl%9O+t0k$~a+Hc7+hLsm0RjXF5FkK+Km`F8<O)-EMgjx~5FkLHh=8lzbc2qO
zL@-^8qlWr!TfDs6GfGds_0^KpY&l9rknONefB*pk1PBlyK%jzv3vz`iJ0k%C1PBly
zP(;AhZn{CoNFtc7#Zg0jw=G`Y?HQ#f-}-7vYPKAuBFJ`FCqRGz0RjXF5Fk)Nzy-O&
zl%0_P0RjXF5GW$xYB$}WV<Ztw*W#$5zS|Zr@Aiz+lW%>sBsE)(QW0c3tP}XA1@`y0
zcb@<G%YUxEy1m-`?(c2gS*><<Uhm_b)%&YwSC^M(=bv00Ts*%%cz$?&cKzVvldJ2~
z%Zta4ezto0Km7c3chkXN|McPSmjD0N=68SZ*5;q+;nOePy8HR^_vZKA&B7}`gN}|5
zZw~K&bawvrLj5t{ex=Bl%U^FO^x)yw%k;gz^Gcchf88FHc`3BNckkYhUgm#)>&LNk
t>t*!cfB*jS@4qe8Hb2Y0?ajY``FHz!^PiLby}S1|-?o<jZSq_0e*l&nwO0TD

literal 0
HcmV?d00001

diff --git a/bench/python/assets/brooklyn_scores.pt b/bench/python/assets/brooklyn_scores.pt
new file mode 100644
index 0000000000000000000000000000000000000000..013cebc7178477f49a0d239f69158bb1b8c6d2dc
GIT binary patch
literal 98600
zcmeHQ4}28G^&kEk0%!oG{1Z<Mm?}avce{I+1h#;h@<)T9MvHO?SfG^uYKo|<1PBx`
zC`iDFry@i}JP|cTqzh6-i*Q;*ME;x>YpkfJqM{=Gz1?}U^MXVY6TtpD(~sTnP2RkD
z^PM-dv$M00+&=Z{GnSOZew0pQE!f1NV<!xs*mc;DNkh7f`@`r<dYs0tt(dI3Lnn=$
zFmyzh!8c7BIeKF9;0eQTxM}3*VS^_PA2V_6guypolsulLrRALh`X_?qklzm<Uxyb-
zzGU>+A(IC1%;fQP^Xh<1`uKWz4dFWUreQ-u>ErA7iwvGLzCr&!tWW)Z<-eLfz9Ia7
zpE}dWpE7gk(6OUO4<9;d<k&G2lP{kzZ1{xX!+MPzIw^U4(yy-=-)PeK<SWKE?vmT*
zl%)IS?q3)Ioz8!`mA1Dj>qy(%HT{wvXCK+xifwm(F)ONrT1?F~ff(BykT^ncHzKoy
z;3)*(P4HTRzm?cB4Nt%IptQkkDf8pc37o;=VS%C8UMbJ~QYQ-&JAHmnKeI9<W)zd`
z6AFI`HXM%zlXgW2|0cn#GEB-=*ECD&w6_u8s8Cl`l-9ropR|E(B7XQ+>7UjdVp|E3
zu}Rsgn5kq=3<GrD@R9y9k8^*~dSl1At`ozgs^yo)0QF((c>5TcS4}w|{?aH>_WYMI
zKz*2z`8l27nl#BKep`titFE}GgRGjXD`81xSIz+Ou_7`L(`T8qTJq;8>UY)`mHGA>
zGC#kM*pUqr+bVC2`pf@31Hw<hXAI#>fNvKV2&yn|*MKmesLRnoG12+Bs2Bd;O5h{?
zAAYN(7&s~e#5WSc$KYy^Ik~dA^r-rNDpTkO8LFQV8|Xe;FX7K1Ka=3P1k-zpnExFU
z1P0sf$M_n?ngG||avX0<_S+nJm1>y*(x<hox$5eK4RE!H?^T#rtFDbp3Z3wa`X1d|
zwunq)vVQI>Fql&+G0P`ojS1fd9eWVNdOzAL$g96sXWLeAoZd9ViraBqunexk=QmP)
z)otHNl&5`k63viRlYTS_r-ElGc;XTDF^X%;66)JIr0rz?*$PP?4!02VDR5RK^Vuhe
zZ9lUHqRdW>C2d20zRF-_8#&G$5@e%ia%{Z?vJv$E9YkMkwePs;AwJVmjAf`Sg>ZP>
z>aK?3F_KejyToiVvAG)RsxC_Sp@paqQ%q%Qf>$H|NpSUHd*K<>3ZhR!ou=o*FhB1{
z89Renb%bv-mWUxP53qp`pb!6DV;kZj*E(ykZvFAhOc(EJ5Mqzl;rYH~kj3|dRu192
zCT%)yWl0~=XY-}RhihvOs<ztj9`V~{5?ePDu8)UbRmpzBXO-luAmp_Lbl-*Tjro}|
z5q%Q=9*H9cbvYd*jyxj#8OpZg{InSoVx=8jnae)By_+zgo4KwVE8FV&aAJLjY-aAn
zvU8!%&VpxWZFL;lD6IIY>%W8zAXHEGwRwhUBV*;gMcXr25w4e<{UMvauZ$4i{;B#-
z)Zbu!@lt)+MyrKv<4NP(^F7DH<IwNozG?c|(oaO+!Lp(WpG%!uhouM%^YaMa*OoJ{
z?{aw?ardTdW;Jp1J2V;9-MTp0os;*6?&$6mchZQb>UU?>56I`!!0~*-`SEBMulFeb
zPx?1a_OG=<`f&b*l85jMnD6w1iTMdXiwqa?kROsdk5b`@&!X$e2zeGOpq!t0v)md-
z5F5bfYCU;=<!A9FvLE-oC$ZTG{n^f7(JwK73HZW8Za<n2G=n*LgNtwd_LlC<cdgsk
zcfEc)_%pv3f77%X;ODU4o&A+!(ym%>)Da4Ogf}t|NAiSEGPbF`Z11M&h{1;oS0T=W
z=Uff(RE=5hi?IOyoO=S>LYwA{F)xue9whecAUH~7ymBW1q>%BRPVjVrp&b?4QUjY-
z%YMmEl6~5%m6(&^$l^+wpEn=zt6<9k*B*ON8O+Bqm$#lv-T~hp<?5VL+#G!PMpveI
zeN)IEigm%bVD<^*cBSAAUJX4ZeYgPD^dhA{Dcfi-5S+kc>j0GhKx0w9UdnBGD%_7O
zE_3c3&~~RiCnNcYuihux2y3Bf!hW#p3Xu;6o=`gpC)<e)bga>HWi|0<(ysK&ML+sk
z$~4*6eIFJ1@c4e8%#S`Vu%G4hK^eHe#dCRnY>&ap>+<~ar$fwo4Y^GOm<{`PGLiPm
z%-TbgADB-?%AG0f2Vc#SII&4$Uw6bXugA(<Y?^Om!7C=$|Lv;_GFjPnw~bcQ?}$Hp
z@1R6&wRI(2(Rm<Wv<vo^UPo@hX8U(&zjJTwA!Hm|h#pqid_rYvVn5YG#@!fVPbtZ-
zNmXQhWr;mEXsZ>M{uU`D{o7pPl<5+WGo;TKMKK@vHpMd|-0Q|2l=9&*5}(kyB1ZZy
zPx5JzJ$Ck%%r{ffPS^+XUBTOEBxkW<54zayTb#)XKXkFLLu!`1S8l7N{`twiC2bM!
zzhLY}LTGbYfQ9QLcINLL*e<8fQ_wzVtn>ROx1ElWIMM%;c2wt&)VHfsuBx&`e?s&l
zoRb&#R#0QklfKRVEU~>-Vs^d6kxtTA;{_6%WZp|zCiA`bDOEMOMslQ&U|-Tp)c^8H
zK7ST~g;s!-%)>GJg3fXWxY+!5Zzdav_pc#ez?|(9`U$UICAADpVJFnJ4*PCgfE{jw
zYbR5;{?oyI9%m`zL(IngeaqR48ieu(K!=#O9pAo4_yE|N5N)Zc-INQ8YpVCB%lo<#
zAL}FileHslWP(G`6(zWk;36q+iv4O3x4)M<ty3kgxc;XclD3okNR{Sur*Dpy!bCsZ
zGrUCC2ed83=j~#&+0mVa82opKSZguJhdJ@YB~cgDE57mhN;7-vaG!wf#s1G_CuQ`@
zc5rle-E21QN_Py7o0<W6E3l8sK}QGCU!W%;bV8{df+?Ri$W<rNaWS9hte~SZot5QR
zD16i(<4UDJ=j_J0$bfe%BZ%*L^AD(?nEJ^Jf*<hV&tzHi4T&p!1I37s(S0OK>S5md
zbjNhkM#?kAR40|UX<1Tll;AwVxz`s^&U<D7%%6W<TjJ!t!{$+&9b1o<hz&>kb5-*M
z=RR07>&WeOx;^GocTO%^w>;bTpH{AKTRB6re0S7yag#OsVgFy;bD=D|huBQpUQBqP
z?~H81RRzw8;TDl8BYNomhIfB~avsls4-)xOf@wdJswzNJ)>}FQeHa8%V~D|aGe!D&
zg)y5>>KZ}tUnD=)V&cPB3k-FNxxFsP(zf3$%d*c2o<MLCsVjxx)e_6|+t3*5!-kY$
zYYC1>9x9wU43m7qZTMZu$-es}Z|oY;MyNMN%I+X_QJp4{u?VivNtOH%zT{lLKz?9c
zUcb#<Z^ZjwoW%y0xH!K_i)`!n%UnOVPM--^KicV(ZCDAn%S1lNc<V91)J`9<S=i&4
z>y3TDo*uw26P@0D0rKfQW(yhUy19hUO6PVcFB0K1B~A8A@A+a47GUiPWIjulb+JOy
zN7PR&nIHL;>~~ApRt`E|K|RgEUh$2sa~01lj(u%NTj&`-`d#f#DPvzL>#{GBHk%Jf
z8S8h59q#EnvgV{+bl&jZk7TfLW2wLVu+(1|5g25clwmIsonI6DqQq81X`4Ay;tKa5
z(N{lpjUneY*w7dIE<eE9T!46nW=$Tz*V4{@zbU>ocKR(>_7@{_0^23*1g`Q01|vj&
zIrh-G#U^_0B=UD6C(bw!dkapzt4LqGPUOY2fTN$DBl6}$=MSVVGN!DplEWK2V%tGF
zPsct28wzpH&C$YUlX~fWs=d_1NFTAQMO)w*b34aYJA+yGpuE$!qq$D2sm`ohr5=m4
z1>e`WCeU*uMS>q_Q*1HuC4*ox-r2jte(0l{L|uSWB{n}o3_hR7kO%OSydsIiOOY$c
z&wo|=b8bY|>&uqdf47ur=k0%Hu(hj2KG1NX<llNr*aJ;={J=}3AJZkaW=r|Dn^8~L
z&nDl4I8S5WbwdpELF6K|tsK^q{jh9%;Ko}k0w;3_S7nHS6Ltf^bp7C+TL5EwNDgD-
zeQTOCj<8;^-8u)cGk?sI_Iq=V)8=M^pA42&EHKR5;#tO#IV|jiWy_0_^HI+VJ~No_
zJCW~aQL+C9MXk$ay=<D)W6cxtuod*3tk=vYI@fa<`ytR#!1cso-!4)Y%Wev=r52WT
z{D=6G>9rTeMFUY+21^(FBp_c{U(}nyQuaz2A30BDcxN#DEa@Dy4f-grktiErDZ2#r
zvqv9A85`bfZ$aED5MF$`yB2lcd0KVOz<Ce+?$aWa@%dRV;Xkkj_!mFB0t~`>pb5%D
zKmHJrHeB-{VvriRR@eZ2u|Qy$CVwlj?;U|NSm%~fPyJUA7x`J8=MY0(tp=g|pTVES
zy8{#UsNw9Ro1A;mX3@W}&J^zmfFG;A$Oqh2VDN`O2n_ww8uftAr5|8ju<j0v@ecD}
zVk64jt}*+3l-Utrn_44=vhni}L!VyslFOB~r(b5|P#ey9(LX|L&?>=|#=^}}9vZQ$
zzsQFfd=>HxoR6E2^#Uj9sZ!oP4fCO_>9rl&^^we9a~A4^wj`Ef*%`nU=yFo)r(~V;
zdxuzoSkG7)*3&Pe4TS-AZ7Rw^yR1n<KkToHcN`Wx2l+7{e8!q7c_<d|E}?9m*c-Pq
zBC#a&Lqqeudj{f^0K0t<)@AywPQ%fLynr=juqSVxdY6#b3jf*4-8Wz?9tih-1@nvi
zR-=hn7W54GGvWk{g?flVW&!M3y9r{1y!;m8ty=ibeyHaWEwXwb;@m*d>Gyg1?L=$D
zRz{-aA;dm^w8t}uq25jHP$vUFzfW+T!BT;1H?<0|UO9-DYb@(6<jo4OqBV%aepWgR
zG1#BD3wefdQ6%PDkZ;vda-K98vCnU9;m@AX=K;2}80$(0-|K`jB^oQ?@1flD!bQ7L
ze=Cj6z<bK<M}ofTAMtkW+a8L%&9U`->&(cv37&t>8zC#dE^n{1hPtr1*LI&-A2|UY
zV&Z+bmBA9Dq|Md>QE!07y?NWu*1n4}A8Txn&F!DP0rKj&<2QQle$aiuy+77TKHWQ;
zbo%r!y0O#ubNODYf`z50a6j3;C1f?t;I>7sG1!V0$VqR_-Ai@*1^5~9C(MU+-_TRA
zEx`G^;+_lR{jE7Dvn>#w(}3%@n(5I(<OKFktV_FK*^)rcBmL1=M``AUX^4~c`p-Oy
z_+xEX^Y0PY*LH31g69aM{?4}unW#U~4E7P+l&ZxZ*^AtsVdM>ep6BoTCYaKj%Y@sV
zmu2NP=lQ9PL%E0MaBQ{G^IC|y<_GqV{2S^_3Yfe7ST762FGxi^E|7lbS6G)3C<;$S
zd@x`y{u0}Gw`QKd5b;`#t(k-v&Ml0ciSl8cy*QC$D^q9ri;zQDGn{TBE)K9p8xZFO
z*xG@J%|LX?d#I<2<~#pd#1KQJ^G@z3a5mT~6}~r9vk%YXHbhP_QZ5?9+j!|cp(#VI
z;O!c}Eo5|JJpYW3Lb)ZIxXvcmg~CNcIgWJ?`qqj2=IVfXAc5@ye@N<q{t3^2#bXe|
zbHL_w*X9kUP433h8@uw61G}=VZ#ZVT1B2|!i@Bb5TZ1fo6K`YP#@$%XrLG?RH<RIf
z9r!4u6>s(E9>1}fYtO;;=Vj-$9>i@v_pGd?g9}`2zxQ>Pwf}ss)Ax<R=JV%w#un)2
znJdxe2Q@SOPQ=e>Y}RMUt>sVrW+2zeHU`RKZzC=a9NOEOa}_BvjAz^M{FLvEzE{mf
z-)Nv8=)RU?bF9&9!{0D}v3A}~4<Wupo7C<-#1a4UDbAS{Cq?|59{M-p&RRj;-W=Pj
zjWthko#TR2vP<vV?B?53zt1##b$0Ro>)LnQQa;wj<=@Q9zIgb3SayLPFM5(=*zf$)
zzvGC<1=g%7MZ6*q>YK@N<VGXs^&H-=$b2K-9sL2`;YZq{pIb}xqQQ@Ge+UmSR_y#U
zm$zo<)_04s>|=(tgYS(n2IoOax+2ddTI7DdALx9OWbXP3^V?{w??$c@o{8+Xw{Si1
zTw)HN%4OgTSM*Zkyz{Eyp{{kg&1X*wMGjoa@v~2a)^uC$VspTmSxsKJnBxr#GLxpm
zxfv&=ObN!Wa`w|99fa>hx_-dxW%p|A`zDA#(^$?d^kF!I)_I-l8##}@mYtrO?3S&2
zLu&W%!%JQHM9U`Kt@qF2cBa97es!4p((HLb^X;d3{-Iu(1rrXtWzSrEf46v~ty^~V
zmQmfT_NTfsBQ_b?o8H8+xu`)_BI|kA{@k?b*$0C6aGQVF88RO_hudjAYdkl-gty&X
zs26?Knai*{bZgh?yk6EuJFms3T*f}6PuuuUlz%|~a_@s2r!5Z^EFH%2r+J~CkG#n7
z`a~$NWhTFuL_ahdru4!4N-M2<q5<O7ft=nqBlZRKPtHf26j)s6I>gho=rx~Vy%z;n
zw;bl?r>&ZqJ?^Yr7n^OnWNX*l@5)%OY|id{4e!Uo(pK4Rr_JYO<9&_E>%Qd}&gTyL
zU1QX5YV+F)-`=DBhP}^ad`ZTL^UmNn`ni#`emUo-?B3w&FOkpmEkXMhydN1OgJ%EB
zxlH;+!L%I21GfajZGxQh$lf6P_5GaN)cb?2?i$bKt)Omn?$7J8lJ&AxY1kM4(CWW*
z0nhh;6O4UPkLTx}7s@-pcz&DSAzN?HaoJyj{^>YZG`ctRLBH>LK3k;a)q4zaj<&ha
zGOnj^NH89Mk=s^OYV7#@9IW@CX8-XC^nvmKD_e;E++0sAeB71KnfTl6q7_SA`8OXw
zw|o9MPq_Hc@uuC)rbWDL_$tGCsx`;C9fLV#mvS3yOTV?{dbG#X2EMS6W9RJj0RGGp
zdFT49Nx#0)%}?)nZuZ^&XI*S%mS(Ohx|PefZJyaAI-29eGUJH>zvF#sonn~T^LReX
z1b@B_+v3+!ibE*VUq8H~3&-XVqp&&p^HWcRuAln?FI&(kl=<s=T;_#`Li7GKn&bEu
zW6IV)ahyBfSe4s^W2R}dVpBM_8tDE%2p^7W_e>Rdj<#e6LzzYT!%MmU+pGLpW#@Bz
z&)&M-!k?Yt$`rL;m-WHTyIgx(`Tv+5t@9MNYq^nnNrGdmk)Cly3$&r5p7!xunBPqA
z@xdOBeS?jn8P{=~^0jd&C4*!86JyHbS93i7rC|I}Hur%Yj|L~LJCo;^v<g{Mw(<O&
z?}Nsy3pn1_EOg+!$2i{fQLyjlH*op$=Z4J9$Q!#r&l%vHp@ih?YutM1G;XI|s9)Xh
zWu70oF<3V5R~%1S6C6F^T#nz&3dI=u#=t*@W?h(!ZG2hlf7`oU2HwfvsP{zNSr50o
znftTvV<Y7pKesKMYp{XhIc12!Hb0F1?9<}etGNu@q}hE++_K|FWMwZ+`HP!xy?93!
z%ltd{A3Mbew{Fb!v#0e>SN|33-KMo%6hk~u4`uH}+(WnecjINv1A3=h&?kLbaK={b
zm%OE+WwYPl^1f${0Zl*V*t)>z*JA>2Z|p8(Xu;>GbEy{jwGVCB7tj}9kDP4N$~IoX
zv30dE>hTq>yfvX=X3pC<hmSEb$7Nt$v+9OQIu1wq75X*(&~MFV#=KSSQRnB{-hbw!
zOg;UIy9Z+a6z#5&V-c^>tlmGMZBJ_XvrwM((wE$hV<hE}v9$jrSKfT>vaEkUcq7Mu
zEy~<8;~8x427Td$Um;G?HwTkMUHa%bt9ZWcH@0-MIWD?57`qJD)!{`(Y3wqdUkYo&
z1?ea=EqHqQR^Gpb9fQSN-{shR$7r?iI*ylJ6)JraeQ#~Y(7|)R<oP9wg3EVr=X&})
z8cJJvA-BhBtUud$2G3`2Xzkvf$n$Tm7ux?B-q*VPK9sWlL#~H?r#-WI8p?0ev?j2Y
zb<?97>vR}#8~ug<h`IU;t)K;tJF`&tePUu?mm0-aHbVO!G_0p@;eHkW%3uj`_9o%a
zn=v2j9ixRW>xpICXotoWVSa(e>cw$BoU56yK8tv^_S&XJXwRX*;^f_kztB!uBi4Zv
zG^=?>l#d14bsT~+MS+oB#69$R?Qne@*DR(z*?9}fEYQBHe-m=pPanPp*Anb*&3ey9
zoeBR7=ZwUDZ?A0}A4d6l`Zn_s)YDV1*Iul3$LPL?|AG02_U5ZsW7$1gGz{y0Hx=nU
zzE0y9?mP2GVL!6-w3Y3*V%e9quO1d<=W0D?;TT~7{p`=ipq@JVbFYp-yjY8_$9#K>
zzO?rrc)poz?7UzB*J<VIgI0(+VU~91TR11P$Fw)Uy%+0Usnvf=tfk}H(p$ggeFW#Y
zCl#N{W#Zk9=$gJ9$7Tjo%JG>c+|}^iIuiTzPkR2|B#zCy^aFeFS;M|7nAdV8&o65k
zdVRx(T#xylp1L!LZJDF3nUv4-;k&NR?Z@+c>r^AU#W@EL3D)P4b?AF=F5CBdJuc5o
zZBbSs;<?(i=7P89wUt*0pINCTUi%yOL+dO(lCqBD@cLl1#Ws!;Q-ju<*P;*1F<7(&
z=VunkeM#(1<pd(p&gjqcwA?dq!?s5Q?3qqjcDKgriaphsmj2m%l=lTT`jgPkLjh~)
zJj{oE^|EjA*)8IKt(SOakNFGw^ZC;`|26s5#aMP*pxx|C5O>hBJM=~lZLRkkw_yGh
zjs3fa_dLiJ`-M!P&rH1cu{GM-m+?7^-KphFc?HYv(axSYi{o%lV_*)h6YRtK_Bz7%
zuGjlq^)D`C_cz|WKg6+hseZ!)c;B?L^hq6s{t_+qhuxejtAk$lIL?jiZ`wZN2`t-F
z|6m5Li{QQJSzU3SH(%7x8=S*s*jDZE>EltpgFdRq6wI%q7u#aKdRo8X79Yx7tQT$R
zgncw$kEFFm{Jj=iI+o*jmSK)K8~fsE&G#`r$FX#+s7UN}bkR2bfNK)?o-DHc3)J(H
z)^6iav|+BcXh0q0xl}7Xa}mmfw7#P<(1+*f`l|hCTZ-<R`zF@ATr2#07nE<IZ|Xl1
zb^b*w-#Y;DtJ>Ot@cHGMwbDd?OA5r>E#`WxPI@d`_*<%OHMksQU|-z6NBA?dd}A@+
z&ah_OhBh1u#2?iV!=7QwYterW!k%OR%ffzh0>*}y-jVNLqBBH3oKG8r`e8q(U^ceP
z)Qra)Vp|dcpSch7qZ-TZkMacpvrRY5FA12#bJ3pLbXEv4KHL-u<Zj4BJ=+3P&%YmW
zxt4hUI>f{EhcC+J7|y_^@B0<k8BQ}s%$vjWBWD@xyWgVxKJBu!>4>-K5r%t=EKiG^
zaUI%Fs<BmJ#2;#GawCpm4H=&FGRLq64kyIk;4@nJhyd!`5jeja?vq%z>tPMPA8?ZS
zxSn>+71-WfJ@=-eh!1H?9}&DY(xdD7^A3Z1y;=4-a@9$*>a9i$-^Q)}5cz>K5qlRS
zP71JBUPcUi)DL`#820BkUXB=iAkY@|nEvn|1?Pjb^pX~6PqJp+el@ny(0wh&VgB11
z+qeSTQli_L&vQLil5S7g&GXq1J>~i5xV&|fe$YPy%f6|x8zy7DupfTC*gv=Z=CVR8
z+gr2Gy%e#PVb<S*@`Iq>&pm%;2RdO}fYXK7qYS)rIeV(ehxa7ny$QU(Nbs}4ZXd8l
z;{6f)>?Hk}J%#zOKl~(oyW*y}-|Qv!f1>{U9%78=1lSq4r|q0AGXEuY7J28Z;r&z}
zF-G8>ta#@Fd+C$KS(LEfYIZxecbk?U67$R;-7f2d`EY;9clOPll%vJ(d6?&0z4TUA
zcjPvt#m-oW@^J3;Ht}9BO=IOgYzv$RNfz(sz-Km&!h9RPozF!a^|Npk@&15m3O_$f
zw<ny1`WM3Z756*X<DI%2`Hu%8*NO46FW~z~tRWe^j}z}Sq3w*EG21V`_k^aEww3+s
zBj26b;++UA3BDBXfI!c-_Gl;Iv=mV`gS8d=(pCohB16i2u?%s3fNlFb%8v-J(r$=h
zyzKrIWx8or!*<x-bS?H&1Uav-$NxGC^UDG(r-`T+#&)Ua1J?QrkP{ed^V<mj%wTo#
z{e=@hK*i^GJ^BRyANb$@%S750A!kuwE|aM;Ov*~(sv%42RPp(%MwybT0s}C2d&wuy
zP}N#|QBV01fYuw6zI}w0`l%TB`3%rGnarO*U*S)cB;&{=Yu%qJqW-2BI2Hrchhcs1
zCD_}lm1GmYtwf>xz5`^{TwMuED!Xz9h>sPKdwrPra2mmtD?0XH;JMmO&ibM<-(ExJ
z=l2mivSDJ|v1?Ki|FaATKLMXHgf9VxbI)F?FmHRg)dxONms6BtqDsZ*CuAsCeFngH
zMM?-CgR4R2<jUq!QLf5Yp&trR{fyWE@7TOVzui;u`8`_xI{@j^lV-560j?JDy$bVc
zb&V4`p&VV$z`nniEFz=g^Lw-$6VN_7rtpanApK~P`B1DgPlO4T)j@rX;>zT!n@@ck
z-otr`e2c2$^LwliKzybpnG@jM@GmNj@!q2`6`vn^2F8$z&o4Zb_%MAor?FOQYY$3P
zS<~x#WUP@m7%DzLCsxJhcX&|o`D;4N-0~IH8)UAi73H8r+&4`XpI=a_;`0k8j>8qs
zCwU2cuGSNr1LH}>=NH_n`22#2TDqo?^-ns%(*=fky289&gL+qsX9tM+BjOc+Iu)Ov
zbE@L=|6Jc9Yl-c`Z$Z~VF@FN4@$PEk&$g^f#pmb!rQ-7ozm>ira(>>G>LEU?;`9GJ
zI%%mGS*PX6xkAO~2a=q5(I))F38oy@2vgs#k)lcp6a5L%zwoVxiqFsa+JKxSsrdYr
zI8YU`iH|Xgt5CECA}T&Vl2=IXiR3((7t_+2$5ed&s(V_3%=6x!!;!@+^YhB`k1YS=
z-zq*oa*{{<+3O47)q7?Etm5-?vYfGfv}6$<E+c%>{SEpq-s2gR(dWccQno5{JK6VB
z@%cr^kiG~LOt@9?`H?Fh;fMORH}*k}yq8q*`H?qz#!toP7XwdTV>pl5Dn7p;MaAcL
zn1J&tDn7r<gNo13M}~^e4?MtqSjFcD9#njOJ}6+1LB;0>DpY)a*B?}TevG{X&$}u<
zKe&X7&(G&c6`$YbLB;3CI9e(`zv~YwK0lAOXsY=9u0N>w{M;W@e16V@iqDU+SXF#}
zcYaXu`MEz7srdY^Kfu4UqvG=e6DmGG9~UY<zlis%;`2Kbgr(y1^LbLm=jS}A`23s)
z6`!B;pyKm$9#njO&I8;sRD6D<Vu|=J0DjZBVe}l)S|anU#Fi=k%?KbA{yk=q%$71g
z{+z(@-9cDjklZWf8T{MWULt3ho%hgQqUXWj+{VdIzXThO2MeTK5yC&b>yoK5Ov*~(
zsv%42RPp(%MwybT0t0m3@R9S(75uphda5fz>!sh>RacLaR}56f0QF%qe^w@5vwSkQ
ztN8piqf(WY3{W3tWPYx#*l0HKTUl!bq~h~ephNwrHUq@RisU+$(Px=7;@j2MPYOg)
zzq7um%(qp1enOan)n`EXF!-2?&tHA|REb|017v>ozI6p3H^`h^*<4!fmWZ(f1*v{U
zY@qvW^xHiZpTF8{pWFhZPfwa5!Unj~XFwnE!)n_mbV5P8o>B4ntIgz(3eZ0K(H~Dn
z2I*^)%!gu~c`{5AbvkQYkth~<HN8^b&LM4A@%fLzEa?{!XV@vB;`1YKgdfTz^<j#s
zOil1=^x-79`tbc4JcC+6eD4Nzs`&hZVA4nQ*<8iv7X(%FO8Bg1$x6k&F#@{p!gs_m
zKQkuczhw`9kHqD_Yp>$-J4~wh{FVAsb^hS_9_O?=MXMw$?whdxz4R05!$nQ-+fq<6
zEJff9W{u!;xaG|2yWDwx#NC^+nbpM2@6cpackAM0_q((Gp*y+@6Gl8$zdN&jz<a^d
zFor6`G&>GsR@D5Xt32ajrOC4&)(UwhM8)U-5oxuTF@mgb!RKl{d48qh^9ycOe15@1
zEnR8+Iu)P4=A^Eab8`48IS&^!5ceI%(w~&~CVPRLw^e-ppG!Zv=c@SpKbJ&Vj;_<D
zN`KCsDg8N;C9#UnFQ_AaD6)RmmFgjD3>BaMXR%qK5IpO8NyX=f_Z#4AH{uv~uub@h
z6YP;zQ$T&Yrn-(+p6FNc`8h%PWDZgB`H$u_)yyM4#wf0)nre!x`20v%A@OH&9!$mO
zM}F*T`ecI4^WL7rk<M9}&dTzSEdS%*Dn37Ql1Kd6>kHu3i_r~O#pmZ_Ict=oC5!lQ
z8R3)eZ_sz~9?zicAdxR6xGHly+4ocN`9;T&z6cXcxK;7_kt-kJhx)cR_CZd?=jUX#
zk$Yu~p7EpK)$Wus_LXvv!oEoExtR}08S8h59qwDmd`8b@(Rm{z_Cp}<ePgM={IJwt
z7!ep`n3Q2JiL%g~uL*upVymIF!JH|v^A56FNyX=P>9V#;4pn@9hY}T^pN}ILPjke$
z16+pKnFnqln64i>3whAc1N{o*W8!^lT7a1k;&_K|t*mnpJM+gZ>4)B&;~3IR&J|h3
z0>d^;v6N?rWxa_P<$N;gS@BLGgZaJ_`F<9S3JlvjmlHkHq#kRYlu_~dp&#Hrtm5+n
z4=O&tGb$N_Jq8t@A9zsl`CWHV@%iyy1JAoEK0mmGiqG$k3#Q`pyF94){3<^G?0G@+
z?WZ{pDn38Pl~wWi-T6Vq=jZ;Q;`4JJRD6EU15@$&-T6Vq=jZ;Q;`3u1auuK79TzG-
zKOYw=KEI03&;F+EGoHZpgNo1Z&JXa;c2ob6xR&^fR=#%t;#ak`fqICSYt~8=pHGtl
z@pg;3FIk=RSoVDMt5n@;a5>7fHkf^n_b$frjm3OB!<um$${z~EAJq`I(pbxDaeoZX
zlC%n7*&;tn%#rU|*gNw5OLT_F_p=>iQ2$>wRxmr8x69Ow#~Y&kiGa`Chxt*BW%oz<
zf`Hkk8|JI{{G2xxpP%!f;`4JJRD6EU1H1}W@%fPo6`x=DL^ZF~JcCqC?I%y1v@1f+
z<H5TwnJUAitQ4*qvZPKGpTBC9DXA(j;GAI+$w$sJSMX;cP>WYuZ%q1jE!Ln^{jW1X
zeVELj$A=a&j#Pa9<3sWPqE_m|jLgp}KL7usH~g2jnaqu_&*h~s@n<*>T`MVy`knQK
zP3on}ReXNRhQifmK=?5Dmx|9{ZT3`wUm62so=gbehO0s5<jUsKYBxpbhk{f;BQ~h`
z{MBalmsx=H=}9wC*Z^1h4Co_%SZ&*cPAEv%GxR%9XZ;~kWAR-T<n<L8%qf+a<&&|-
z#JfL`--8&|`_W$buJB%+ZCk-{deaaqZpU%KGPsKM&qk_G81i3mrG4~^FqibB^KG6;
zf@ksjWL&9_QCwTLP~XlWZCCO6kHIkU84+jLDUm`rJcfo7^5-#f{;{@8%qENZ=XhB`
z_@Px$AEubf)C8;e{MBHO^bvhFSMm9)!66kTeA4WiU~i0oZWW)OGg(j0W9Av++!SNw
zy>actRDAxLxSUtk^F1f$IQX`>Z^Hh!iqFqkSMm8h@2h<<f~?KJ=W0E9vGP=We!;DZ
z&o7v$r7Mj;lTOy~(*=fky289&gL+qs`)mfwPZIZ5#(K39ab_4xS}F7M<|BR;Y~k?<
z?LlR5HaX^wyPiwlf%D6wT%A*jo5R1K!s84VuWt(ZL$NL!?6;4@*sP2O6`!9o<M4jM
z5~)kY=dU^IbiAwh{KzNqF;>}pLS<^w4O9<VW2pH2H6fLkjgh`n@%j0^UB%~jeZyWW
z)+g}udWj>Q<eoshKw@)~#3{=p_THz|+*_-lf%r2$8?%kf8}vPqiqBsKPLCx*=VywK
zRjy`#SMm9gze3{M#OJ-3md^Sm3&*byzJKf^o+%)|jFhcecT}OYs0-@-7N4iS(#)Q^
z`(0;w|L3xkGWxmSS4MZ&&1U1SbnP5BH3RZipnN&#=s=#u5<(|bpF=QRvsHKPogj0F
zx94zV^D6W6%JPpa|Kr~(K0k7jNBYt03*gm@(G6I|=jUWmKc_Bb5g(>wobGSXckv$2
zpyVLozm#BB1>ZJhy(&Jx=n%;X3lmJZRq^?eE4V*-iTbuT_TjH8K0l{O#pm~2<(Po)
zk-F#E!+mjoH9x@GT!46nW=$S|aUP1DxZ=28GGPA8C3B!S|KL=(i>yQP1%~|1L_UdN
zo8UW%{GIr$<jmQHh@E;@p$*PBc%8_vBbc5ex>`P?F-GblV@k#6cRCLEOch+I`1}qd
zE+;BJzsrP*&kr=foT1|L0}m=bzdI&We142QrQ-9u{-EOX<GRsQ@%decQ1SWsJgMUI
zyF94){P-+6O=l%t5tnF@`&;3CWRGU<`U>;gXsqu>o^LhP?Y6h@7=z|Vx;cC*m$Ba0
zqnEbf`JGn<4|T1}V-=n~EfhI$CCAS`5n9u2xr@yKXJ$2d;bM+AEXYinekw1UG9?(h
z%GpncbZ|(Jb^QSCxmRP~H$nWF#&TxiI|4Wh+<Bcl*X2C=T6TJBvRk(94XNG34=;7)
z6D^x`x86U8+nEMulvaoN+^FL7!x=0mP1B;+e8&68RPp&;N>qG)?hh(HKj%Tk=jS}A
z`22j%RPp(_KdAWpxF%8Y`Q7`biqFr-g^JIQ<03u4%D%<Dh=~8SUVglf#QX*QS0L`8
zO@4JT;%$L;voAs1LCfyY8`r3|*87cHFdt%v{JRJ0gm+Sl{X!<tXXa^$*Jx{Bz7OT^
z)N-c0f_RU1_QY8nhkF_Wa~PMm59{0OjKH$j>wT{J7nia78*knp;@G-Wzu|#%5ohU>
zItu+ITIvtG`7?{vK`(neoy(~B{OAMAwZgx5;ryuh{K$if&+pzZn2OKOc~J5BIS(p6
zKj#5X`u{(Q&!5}pl%#d3XDzaBufv@Ga{JV)4<8REiMGn?sjNPmICSiU;S;;wJbc2$
zkz>bP(gR-pCp#M6Sn~TT>w>8BUv8f|&c)Ml-`xERBk<e#?=<+YVnU6>hD;jLW!xV|
zA6b~1*0e&&1pFd}yQUvml-hJ^g`%-L>WHG)lH5KG8+JyS_Vq6Cr0XFP*VXvDxmCwS
aQ0^25yo|q{+}u758ajFP;D3%hm;FCNa$>Lm

literal 0
HcmV?d00001

diff --git a/bench/python/assets/countour_lines.jpg b/bench/python/assets/countour_lines.jpg
new file mode 100644
index 000000000..8d5c50219
--- /dev/null
+++ b/bench/python/assets/countour_lines.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d59100dbefed479522bd91fbeb18b57f8a2fe58797f5f1192385bb774645753
+size 26397
diff --git a/bench/python/bench_utils.py b/bench/python/bench_utils.py
new file mode 100644
index 000000000..91c9511a7
--- /dev/null
+++ b/bench/python/bench_utils.py
@@ -0,0 +1,411 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+import torch
+from PIL import Image
+import os
+import logging
+import inspect
+import argparse
+import importlib
+from glob import glob
+from pathlib import Path
+from datetime import datetime
+from abc import ABC, abstractmethod
+import numpy as np
+import json
+import pandas
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    format="[%(name)s:%(lineno)d] %(asctime)s %(levelname)-6s %(message)s",
+    level=logging.INFO,
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+
+class AbstractOpBase(ABC):
+    """
+    This is an abstract base class of all the operators that can be benchmarked.
+    It provides basic functionality and guarantees uniformity across operator test cases.
+    Concrete implementation of all the abstract methods of this class must be provided by
+    the class inheriting from this.
+    """
+
+    def __init__(self, device_id, input, output_dir=None, should_visualize=False):
+        """
+        Initializes a new instances of this class.
+        :param device_id: The GPU device id that to use by this operator.
+        :param input: The input tensor to run the operator on.
+        :param output_dir: The directory where artifacts should be stored.
+        :param should_visualize: A flag specifying whether the output from the operator
+         should be visualized and written to the disk or not.
+        """
+        self.device_id = device_id
+        self.input = input
+        self.output_dir = output_dir
+        self.should_visualize = should_visualize
+        if self.output_dir:
+            if not os.path.isdir(self.output_dir):
+                raise ValueError("A valid output_dir must be given.")
+        self.op_output = None
+
+        self.assets_dir = os.path.join(
+            Path(os.path.abspath(__file__)).parents[0], "assets"
+        )
+        self.setup(self.input)
+
+    def __call__(self, input):
+        """
+        Runs the operator on a given input. Also visualizes the output if visualization was set to True.
+        :param input: The input tensor to run the operator on.
+        :returns: True if the operator executed successfully, False otherwise.
+        """
+        try:
+            self.op_output = self.run(input)
+
+            if self.should_visualize and self.output_dir:
+                self.visualize()
+
+            return True
+        except Exception as e:
+            logger.error(
+                "Unable to run the op %s due to error: %s"
+                % (self.__class__.__name__, str(e))
+            )
+            return False
+
+    @abstractmethod
+    def setup(self, input):
+        """
+        Performs various setup activities to set this operator before it can be run.
+        :param input: The input tensor to run the operator on.
+        """
+        pass
+
+    @abstractmethod
+    def run(self, input):
+        """
+        Runs the operator and returns the result.
+        :param input: The input tensor to run the operator on.
+        :returns: The result from the operator's run.
+        """
+        pass
+
+    def get_params_info(self, primitive_types_only=True):
+        """
+        Returns a dictionary with keys being the variable names initialized exclusively during the setup call
+        # and values being their values. Useful to log if someone wants to know what parameters were used to
+        initialize the operator in the setup function call.
+        :param primitive_types_only: Only includes attributes with primitive data-types if True. Primitive
+         data types are bool, str, int, float, tuple and None.
+        """
+        primitives = (bool, str, int, float, tuple, type(None))
+
+        # Get all global names (e.g variables + function names) used by the setup function.
+        all_global_names_setup_func = set(self.setup.__code__.co_names)
+
+        # Get all the global names (e.g variables + function names) used by the __init__ function.
+        all_global_names_init_func = set(self.__init__.__code__.co_names)
+
+        # Remove the names already used by __init__ from the ones used by setup to get a list of names
+        # which are exclusively used by setup
+        all_global_names_setup_func -= all_global_names_init_func
+
+        # Get all the variables of this class.
+        all_vars_info = vars(self)
+        all_vars_names = set(all_vars_info.keys())
+
+        # Figure out all global variables only by intersecting the all_vars_names with
+        # all_global_names_setup_func.
+        # That will eliminate the global function names from all_global_names_setup_func.
+        vars_names_of_setup_function = all_vars_names.intersection(
+            all_global_names_setup_func
+        )
+
+        if primitive_types_only:
+            vars_info_of_setup_function = {
+                v: all_vars_info[v]
+                for v in vars_names_of_setup_function
+                if isinstance(all_vars_info[v], primitives)
+            }
+        else:
+            vars_info_of_setup_function = {
+                v: all_vars_info[v] for v in vars_names_of_setup_function
+            }
+
+        return vars_info_of_setup_function
+
+    def _setup_clear_output_dir(self, filename_ends_with):
+        output_dir = os.path.join(self.output_dir, self.__class__.__name__)
+
+        # Clear out the output directory or create it
+        if not os.path.isdir(output_dir):
+            os.makedirs(output_dir)
+        else:
+            for file in os.listdir(output_dir):
+                if os.path.isfile(file) and file.endswith(filename_ends_with):
+                    os.remove(file)
+
+        return output_dir
+
+    def visualize(self):
+        """
+        Attempts to visualize the output produced by the operator as an image by writing it
+        down to the disk. May raise exceptions if visualization is not successful.
+        """
+        output_dir = self._setup_clear_output_dir(filename_ends_with="_op_out.jpg")
+        if self.op_output is None:
+            raise TypeError(
+                "Visualization Error: Operator did not return any value as output to visualize."
+            )
+
+        op_output_npy = (
+            torch.as_tensor(self.op_output.cuda(), device="cuda:%d" % self.device_id)
+            .cpu()
+            .numpy()
+        )
+        if op_output_npy.dtype == np.uint8:
+            for i, npy_img in enumerate(op_output_npy):
+                if npy_img.shape[-1] == 1:
+                    # Need to drop the 1 from the channels dimension if dealing with
+                    # grayscale in PIL
+                    npy_img = npy_img[..., 0]
+                out_file_name = "img_%d_op_out.jpg" % i
+                # Visualize as image
+                pil_img = Image.fromarray(npy_img)
+                pil_img.save(os.path.join(output_dir, out_file_name))
+
+        else:
+            raise TypeError(
+                "Visualization Error: Unsupported dtype for visualization: %s"
+                % str(op_output_npy.dtype)
+            )
+
+
+def get_benchmark_eligible_ops_info():
+    """
+    Prepares list of tuples : op-class-name (str) and class for all the operators that can be benchmarked.
+    """
+    class_members = []
+
+    for file in glob(
+        os.path.join(os.path.dirname(os.path.abspath(__file__)), "all_ops", "*.py")
+    ):
+        name = os.path.splitext(os.path.basename(file))[0]
+        module = importlib.import_module("all_ops." + name)
+        all_members = inspect.getmembers(module, inspect.isclass)
+        op_members = [x for x in all_members if x[0].startswith("Op")]
+
+        class_members.extend(op_members)
+
+    return class_members
+
+
+def summarize_runs(
+    baseline_run_json_path,
+    baseline_run_name="baseline",
+    compare_run_json_paths=[],
+    compare_run_names=[],
+):
+    """
+    Summarizes one or more benchmark runs and prepares a pandas table showing the per operator run-time
+     and speed-up numbers.
+    :param baseline_run_json_path: Path to where the benchmark.py styled JSON of the first run is stored.
+    :param baseline_run_name: The display name of the column representing the first run in the table.
+    :param compare_run_json_paths: Optional. A list of path to where the benchmark.py styled JSON of
+     the other runs are stored. These runs are compared with the baseline run.
+    :param compare_run_names: A list of display names of the column representing the comparison runs
+     in the table. This must be of the same length as the `compare_run_json_paths`.
+    :returns: A pandas table with the operator name, its run time from the baseline run and the params.
+     used to launch those runs. If compare runs are given, it also returns their run times and the speed-up
+     compared to the baseline run. The speedup is simply the run time of an operator from the compare run
+     divided by its run time from the baseline run. If an operator's run time or speedup factor is not
+     available, it simply puts "N/A".
+    """
+    if os.path.isfile(baseline_run_json_path):
+        with open(baseline_run_json_path, "r") as f:
+            baseline_perf = json.loads(f.read())
+    else:
+        raise ValueError(
+            "baseline_run_json_path does not exist: %s" % baseline_run_json_path
+        )
+
+    if len(compare_run_json_paths) != len(compare_run_names):
+        raise ValueError(
+            "Length mismatch between the number of given JSON paths for comparison and"
+            "their run names. %d v/s %d. Each JSON must have its corresponding run name."
+            % (len(compare_run_json_paths), len(compare_run_names))
+        )
+
+    # Read all the comparison related JSON files, one by one, if any.
+    compare_perfs = {}
+    for compare_json_path, compare_run_name in zip(
+        compare_run_json_paths, compare_run_names
+    ):
+        if os.path.isfile(compare_json_path):
+            with open(compare_json_path, "r") as f:
+                compare_perfs[compare_run_name] = json.loads(f.read())
+        else:
+            raise ValueError("compare_json_path does not exist: %s" % compare_json_path)
+
+    results = []
+
+    for op in baseline_perf["mean_all_batches"]["run_bench"]:
+        if op.startswith("Op"):
+            op_name = op[2:]
+
+            row_dict = {}
+
+            # Fetch the time and parameters from the JSON for baseline run.
+            baseline_run_time = baseline_perf["mean_all_batches"]["run_bench"][op][
+                "run_op"
+            ]["cpu_time_minus_warmup_per_item"]
+
+            op_params = list(
+                baseline_perf["mean_all_batches"]["run_bench"][op]["op_params"].keys()
+            )[0]
+
+            row_dict["operator name"] = op_name
+            row_dict["%s time (ms)" % baseline_run_name] = baseline_run_time
+
+            if compare_perfs:
+                # Fetch the time from the JSON for all comparison runs.
+                for compare_run_name in compare_perfs:
+                    # Check if the OP was present.
+                    if (
+                        op
+                        in compare_perfs[compare_run_name]["mean_all_batches"][
+                            "run_bench"
+                        ]
+                    ):
+                        compare_run_time = compare_perfs[compare_run_name][
+                            "mean_all_batches"
+                        ]["run_bench"][op]["run_op"]["cpu_time_minus_warmup_per_item"]
+                    else:
+                        compare_run_time = None
+
+                    row_dict["%s time (ms)" % compare_run_name] = (
+                        compare_run_time if compare_run_time else "N/A"
+                    )
+
+                    if baseline_run_time and compare_run_time:
+                        speedup = round(compare_run_time / baseline_run_time, 3)
+                    else:
+                        speedup = "N/A"
+                    row_dict[
+                        "%s v/s %s speed-up" % (compare_run_name, baseline_run_name)
+                    ] = speedup
+
+            row_dict["run time params"] = op_params
+
+            results.append(row_dict)
+
+    pandas.set_option("display.max_colwidth", 100)
+
+    df = pandas.DataFrame.from_dict(results)
+
+    return df
+
+
+def main():
+    """
+    The main function. This will run the comparison function to compare two benchmarking runs.
+    """
+    parser = argparse.ArgumentParser("Summarize and compare benchmarking runs.")
+
+    parser.add_argument(
+        "-o",
+        "--output-dir",
+        type=str,
+        required=True,
+        help="The output directory where you want to store the result summary as a CSV file.",
+    )
+
+    parser.add_argument(
+        "-b",
+        "--baseline-json",
+        type=str,
+        required=True,
+        help="Path where the benchmark.py styled JSON of the baseline run is stored.",
+    )
+    parser.add_argument(
+        "-bn",
+        "--baseline-name",
+        type=str,
+        required=True,
+        help="The name of the column representing the baseline run in the output table.",
+    )
+    parser.add_argument(
+        "-c",
+        "--compare-jsons",
+        action="append",
+        required=False,
+        help="Optional. List of paths where the benchmark.py styled JSON of the comparison run are stored.",
+    )
+    parser.add_argument(
+        "-cn",
+        "--compare-names",
+        action="append",
+        required=False,
+        help="Optional. List of names of the column representing the comparison runs in the output table.",
+    )
+
+    args = parser.parse_args()
+
+    if not os.path.isdir(args.output_dir):
+        raise ValueError("output-dir does not exist: %s" % args.output_dir)
+
+    if not os.path.isfile(args.baseline_json):
+        raise ValueError("baseline-json does not exist: %s" % args.baseline_json)
+
+    args.compare_jsons = args.compare_jsons if args.compare_jsons else []
+    args.compare_names = args.compare_names if args.compare_names else []
+
+    if len(args.compare_jsons) != len(args.compare_names):
+        raise ValueError(
+            "Length mismatch between the number of given JSON paths for comparison and"
+            "their run names. %d v/s %d. Each JSON must have its corresponding run name."
+            % (len(args.compare_jsons), len(args.compare_names))
+        )
+
+    logger.info(
+        "Summarizing a total of %d runs. All times are in milliseconds"
+        % (len(args.compare_jsons) + 1)
+    )
+
+    df = summarize_runs(
+        baseline_run_json_path=args.baseline_json,
+        baseline_run_name=args.baseline_name,
+        compare_run_json_paths=args.compare_jsons,
+        compare_run_names=args.compare_names,
+    )
+
+    csv_path = os.path.join(
+        args.output_dir,
+        "summarize_runs.%s.csv" % datetime.now(),
+    )
+    df.to_csv(csv_path)
+
+    logger.info("Wrote comparison CSV to: %s" % csv_path)
+
+
+if __name__ == "__main__":
+    # If this was called on its own, we will run the summarize_runs function to summarize
+    # and compare two runs.
+    main()
diff --git a/bench/python/run_bench.py b/bench/python/run_bench.py
new file mode 100644
index 000000000..ae2c69b08
--- /dev/null
+++ b/bench/python/run_bench.py
@@ -0,0 +1,236 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: One must import PyCuda driver first, before CVCUDA or VPF otherwise
+# things may throw unexpected errors.
+import pycuda.driver as cuda  # noqa: F401
+import os
+import sys
+import logging
+import cvcuda
+import torch
+
+from pathlib import Path
+
+# Bring module folders from the samples directory into our path so that
+# we can import modules from it.
+current_dir = Path(os.path.abspath(__file__)).parents[0]
+samples_dir = os.path.join(Path(os.path.abspath(__file__)).parents[2], "samples")
+common_dir = os.path.join(
+    samples_dir,
+    "common",
+    "python",
+)
+sys.path.insert(0, common_dir)
+
+from perf_utils import (  # noqa: E402
+    CvCudaPerf,
+    get_default_arg_parser,
+    parse_validate_default_args,
+)
+
+from nvcodec_utils import (  # noqa: E402
+    ImageBatchDecoder,
+)
+
+from bench_utils import get_benchmark_eligible_ops_info  # noqa: E402
+
+
+def run_bench(
+    input_path,
+    output_dir,
+    batch_size,
+    target_img_height,
+    target_img_width,
+    device_id,
+    num_iters,
+    should_visualize,
+    ops_filter_list,
+    cvcuda_perf,
+):
+    """
+    Runs the per operator benchmarks. It automatically discovers eligible operators for benchmarking,
+    sets them up, runs them and saves the runtime numbers. benchmark.py is needed to actually perform any
+    timing measurements.
+    """
+    logger = logging.getLogger("run_bench")
+    logger.info("Benchmarking started.")
+
+    # Create an image batch decoder to supply us the input test data.
+    decoder = ImageBatchDecoder(
+        input_path,
+        batch_size,
+        device_id,
+        cuda_ctx=None,
+        cvcuda_perf=cvcuda_perf,
+    )
+
+    # Set up various CUDA stuff.
+    cuda_device = cuda.Device(device_id)
+    cuda_ctx = cuda_device.retain_primary_context()
+    cuda_ctx.push()
+    cvcuda_stream = cvcuda.Stream()
+    torch_stream = torch.cuda.ExternalStream(cvcuda_stream.handle)
+
+    # Get a list of (class names, class types) of all the ops that can be profiled.
+    ops_info_list = get_benchmark_eligible_ops_info()
+    logger.info("Found a total of %d operators for benchmarking." % len(ops_info_list))
+
+    if ops_filter_list:
+        # Filter based on user's criteria.
+        ops_info_list_filtered = []
+        for op_class_name, op_class in ops_info_list:
+            for op_filter_name in ops_filter_list:
+                if op_class_name.startswith(op_filter_name):
+                    ops_info_list_filtered.append((op_class_name, op_class))
+                    break
+
+        ops_info_list = ops_info_list_filtered
+        logger.info(
+            "Filtered to a total of %d operators for benchmarking." % len(ops_info_list)
+        )
+
+    if should_visualize:
+        logger.warning(
+            "Visualization is turned ON. Run-times may increase drastically due to disk I/O."
+        )
+
+    #  Do everything in streams.
+    with cvcuda_stream, torch.cuda.stream(torch_stream):
+
+        # Start the decoder and get a batch.
+        # NOTE: Currently, we will grab the first and only batch out of the decoder for
+        #       performance benchmarking. All ops will receive this and only this batch.
+        decoder.start()
+        batch = decoder()
+        batch.data = cvcuda.as_tensor(batch.data.cuda(), "NHWC")
+        # Read input and create a batch
+
+        for op_class_name, op_class in ops_info_list:
+            logger.info("Running %s..." % op_class_name)
+            cvcuda_perf.push_range(op_class_name)
+
+            # Step 1: Initialize the operator...
+            cvcuda_perf.push_range("init_op")
+            try:
+                op_instance = op_class(
+                    device_id=device_id,
+                    input=batch.data,
+                    output_dir=output_dir,
+                    should_visualize=should_visualize,
+                )
+                cvcuda_perf.pop_range()  # For init_op
+            except Exception as e:
+                logger.error(
+                    "Unable to init the op %s due to error: %s"
+                    % (op_class_name, str(e))
+                )
+                cvcuda_perf.pop_range(delete_range=True)  # Deletes the init_op range
+                cvcuda_perf.pop_range(
+                    delete_range=True
+                )  # Deletes the op_name range, too.
+                continue  # Continue to the next operator.
+
+            # Step 2: Run the operator.
+            # Repeat for as many iterations as we wanted.
+            cvcuda_perf.push_range("run_op")
+            for i in range(num_iters):
+                # Start the iteration.
+                cvcuda_perf.push_range("iter", batch_idx=i)
+
+                # Run the op
+                success = op_instance(batch.data)
+                torch.cuda.current_stream().synchronize()
+                # Finish
+                cvcuda_perf.pop_range(total_items=batch_size, delete_range=not success)
+
+                # Get out of the loop if our operator invocation fails.
+                if not success:
+                    break
+
+            cvcuda_perf.pop_range(delete_range=not success)  # For the run_op
+
+            # Step 3: log the parameters used by the operator, initialized during the setup call.
+            if success:
+                cvcuda_perf.push_range("op_params")
+                cvcuda_perf.push_range(str(op_instance.get_params_info()))
+                cvcuda_perf.pop_range()
+                cvcuda_perf.pop_range()
+
+                cvcuda_perf.pop_range()  # For the op_name
+            else:
+                cvcuda_perf.pop_range(
+                    delete_range=True
+                )  # Deletes the op_name range, too, if run_op failed
+
+        cuda_ctx.pop()
+        cvcuda_perf.finalize()
+        logger.info("Finished run_bench.")
+
+
+def main():
+    # docs_tag: begin_parse_args
+    parser = get_default_arg_parser(
+        "Profiler for all ops of CV-CUDA.",
+        input_path=os.path.join(current_dir, "assets", "brooklyn.jpg"),
+        supports_video=False,
+        batch_size=32,
+    )
+    parser.add_argument(
+        "-n",
+        "--num_iters",
+        default=10,
+        type=int,
+        help="The number of iterations to run the benchmarks for.",
+    )
+    parser.add_argument(
+        "--visualize",
+        action="store_true",
+        default=False,
+        help="Flag specifying whether outputs from the operators should be visualized"
+        " on written on disk or not.",
+    )
+    parser.add_argument(
+        "ops",
+        nargs="*",
+        help="Optional list of one or more operator names which you want to benchmark. "
+        "When supplied, the benchmarking will be restricted to only the operators that starts "
+        "with these names.",
+    )
+    args = parse_validate_default_args(parser)
+
+    logging.basicConfig(
+        format="[%(name)s:%(lineno)d] %(asctime)s %(levelname)-6s %(message)s",
+        level=getattr(logging, args.log_level.upper()),
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+
+    cvcuda_perf = CvCudaPerf("run_bench", default_args=args)
+    run_bench(
+        args.input_path,
+        args.output_dir,
+        args.batch_size,
+        args.target_img_height,
+        args.target_img_width,
+        args.device_id,
+        args.num_iters,
+        args.visualize,
+        args.ops,
+        cvcuda_perf,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ci/build.sh b/ci/build.sh
index bcbbec23b..b5114d3cd 100755
--- a/ci/build.sh
+++ b/ci/build.sh
@@ -28,16 +28,16 @@
 # SDIR is the directory where this script is located
 SDIR=$(dirname "$(readlink -f "$0")")
 
-# Command line parsing ===============================================
-
 # Defaults
 build_type="release"
 build_dir=""
 source_dir="$SDIR/.."
+num_jobs=$(nproc) # Automatically determines the number of CPU cores
 
+# Command line parsing
 if [[ $# -ge 1 ]]; then
     case $1 in
-    debug|release|profile)
+    debug|release)
         build_type=$1
         if [[ $# -ge 2 ]]; then
             build_dir=$2
@@ -55,35 +55,28 @@ fi
 # Store additional cmake args user might have passed
 user_args="$*"
 
-# Create build directory =============================================
-
-# If build dir not explicitely defined,
-if [[ -z "$build_dir" ]]; then
-    # Uses one derived from build type
-    build_dir="build-${build_type:0:3}"
-fi
+# Create build directory
+build_dir=${build_dir:-"build-${build_type:0:3}"}
 mkdir -p "$build_dir"
 
-# Set build configuration depending on build type ====================
-
-# Common config
+# Set build configuration
 cmake_args="-DBUILD_TESTS=1"
 
-if [[ "$ENABLE_PYTHON" = '0' || "$ENABLE_PYTHON" = 'no' ]]; then
+# Python build configuration
+if [[ "$ENABLE_PYTHON" == '0' || "$ENABLE_PYTHON" == 'no' ]]; then
     cmake_args="$cmake_args -DBUILD_PYTHON=0"
 else
-    # enables python by default or when asked
     cmake_args="$cmake_args -DBUILD_PYTHON=1"
-fi
 
-if [ "$PYTHON_VERSIONS" ]; then
-    cmake_args="$cmake_args -DPYTHON_VERSIONS=$PYTHON_VERSIONS"
+    # Additional python versions
+    if [ "$PYTHON_VERSIONS" ]; then
+        cmake_args="$cmake_args -DPYTHON_VERSIONS=$PYTHON_VERSIONS"
+    fi
 fi
 
+
+# Specific configurations for build type
 case $build_type in
-    profile)
-        cmake_args="$cmake_args -DCMAKE_BUILD_TYPE=Release -DBUILD_BENCH=1"
-        ;;
     release)
         cmake_args="$cmake_args -DCMAKE_BUILD_TYPE=Release"
         ;;
@@ -92,33 +85,26 @@ case $build_type in
         ;;
 esac
 
-# Configure build toolchain ===========================================
-
-# Make sure we use most recent gcc-11.x
-CC=${CC:=$(find /usr/bin/gcc-11* | sort -rV | head -n 1)}
-CXX=${CXX:=$(find /usr/bin/g++-11* | sort -rV | head -n 1)}
+# Configure build toolchain
+CC=${CC:-$(find /usr/bin/gcc-11* | sort -rV | head -n 1)}
+CXX=${CXX:-$(find /usr/bin/g++-11* | sort -rV | head -n 1)}
+cmake_args="$cmake_args -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX"
 
-cmake_args="${cmake_args} -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX"
-
-# Prefer to use ninja if found
+# Use ninja if available
 if which ninja > /dev/null; then
     cmake_args="$cmake_args -G Ninja"
     export NINJA_STATUS="[%f/%t %r %es] "
 fi
 
-# Config ccache
-unset has_ccache
+# Configure ccache
 if which ccache > /dev/null; then
-    has_ccache=1
-fi
-if [[ $has_ccache ]]; then
     ccache_stats=$(pwd)/$build_dir/ccache_stats.log
     rm -rf "$ccache_stats"
-    cmake_args="${cmake_args} -DCCACHE_STATSLOG=${ccache_stats}"
+    cmake_args="$cmake_args -DCCACHE_STATSLOG=${ccache_stats}"
 fi
 
-# config CUDA
-CUDA_MAJOR=11
+# Configure CUDA
+CUDA_MAJOR=${CUDA_MAJOR:-11}
 for nvcc_path in /usr/local/cuda-$CUDA_MAJOR/bin/nvcc /usr/local/cuda/bin/nvcc; do
     if [ -x "$nvcc_path" ]; then
         cmake_args="$cmake_args -DCMAKE_CUDA_COMPILER=$nvcc_path"
@@ -126,19 +112,11 @@ for nvcc_path in /usr/local/cuda-$CUDA_MAJOR/bin/nvcc /usr/local/cuda/bin/nvcc;
     fi
 done
 
-# Create build tree and build! ===========================================
+# Create build tree and build
+cmake -B "$build_dir" "$source_dir" $cmake_args $user_args
+cmake --build "$build_dir" -- -j$num_jobs
 
-# Create build tree
-cmake -B "$build_dir" "$source_dir"  \
-    -DBUILD_TESTS=1 \
-    $cmake_args \
-    $user_args
-
-# Build CV-CUDA
-cmake --build "$build_dir" --parallel 8 -- $MAKE_OPTS
-
-# Show ccache status, if available!
-if [[ $has_ccache ]]; then
-    # Show build stats
-    CCACHE_STATSLOG=${ccache_stats} ccache --show-stats -V
+# Show ccache status
+if which ccache > /dev/null; then
+    ccache --show-stats -V
 fi
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 1d4fbb378..41baeada7 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -25,4 +25,6 @@ if [[ $# -ge 1 ]]; then
    build_dir=$1
 fi
 
-./ci/build.sh $build_type $build_dir "-DBUILD_DOCS=ON -DBUILD_TESTS=OFF -DBUILD_PYTHON=ON -DPYTHON_VERSIONS=';3.9;3.10'"
+# (warning): Use "$@" (with quotes) to prevent whitespace problems.
+# shellcheck disable=SC2048
+./ci/build.sh $build_type $build_dir "-DBUILD_DOCS=ON -DBUILD_TESTS=OFF -DBUILD_PYTHON=ON" $*
diff --git a/ci/build_samples.sh b/ci/build_samples.sh
index 9b4fc80e8..27b5c383c 100755
--- a/ci/build_samples.sh
+++ b/ci/build_samples.sh
@@ -28,4 +28,4 @@ fi
 
 # (warning): Use "$@" (with quotes) to prevent whitespace problems.
 # shellcheck disable=SC2048
- ./ci/build.sh $build_type $build_dir "-DBUILD_SAMPLES=ON -DBUILD_TESTS=OFF -DBUILD_PYTHON=1" $*
+ ./ci/build.sh $build_type $build_dir "-DBUILD_SAMPLES=ON -DBUILD_TESTS=OFF -DBUILD_PYTHON=ON" $*
diff --git a/cmake/BuildPython.cmake b/cmake/BuildPython.cmake
index 38cbfb86e..cab2e7371 100644
--- a/cmake/BuildPython.cmake
+++ b/cmake/BuildPython.cmake
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -58,10 +58,10 @@ endif()
 foreach(VER ${PYTHON_VERSIONS})
     set(BASEDIR ${CMAKE_CURRENT_BINARY_DIR}/python${VER})
 
-    ExternalProject_Add(nvcv_python${VER}
+    ExternalProject_Add(cvcuda_python${VER}
         PREFIX ${BASEDIR}
         SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/python
-        CMAKE_ARGS ${PYPROJ_COMMON_ARGS} -DPYTHON_VERSION=${VER}
+        CMAKE_ARGS ${PYPROJ_COMMON_ARGS} -DPYTHON_VERSION=${VER} -DBUILD_ROOT=${CMAKE_BINARY_DIR} -DPYTHON_VERSION_SHORT=${VER}
         BINARY_DIR ${BASEDIR}/build
         TMP_DIR ${BASEDIR}/tmp
         STAMP_DIR ${BASEDIR}/stamp
@@ -70,3 +70,9 @@ foreach(VER ${PYTHON_VERSIONS})
         INSTALL_COMMAND ""
     )
 endforeach()
+
+if(CMAKE_BUILD_TYPE STREQUAL "Release")
+    foreach(PYTHON_VERSION ${PYTHON_VERSIONS})
+        configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py.in" "${CMAKE_BINARY_DIR}/python${PYTHON_VERSION}/setup.py")
+    endforeach()
+endif()
diff --git a/cmake/ConfigCPack.cmake b/cmake/ConfigCPack.cmake
index e5ba75c9b..e0bec6ada 100644
--- a/cmake/ConfigCPack.cmake
+++ b/cmake/ConfigCPack.cmake
@@ -22,8 +22,8 @@ else()
 endif()
 
 set(CPACK_PACKAGE_VENDOR "NVIDIA")
-set(CPACK_PACKAGE_CONTACT "CV-CUDA Support <cv-cuda@exchange.nvidia.com>")
-set(CPACK_PACKAGE_HOMEPAGE_URL "https://confluence.nvidia.com/display/CVCUDA")
+set(CPACK_PACKAGE_CONTACT "https://github.com/CVCUDA/CV-CUDA/issues")
+set(CPACK_PACKAGE_HOMEPAGE_URL "https://cvcuda.github.io")
 
 # ARCHIVE installer doesn't work with absolute install destination
 # we have to error out in this case
diff --git a/cmake/ConfigCUDA.cmake b/cmake/ConfigCUDA.cmake
index 8c64f160f..24bc2453c 100644
--- a/cmake/ConfigCUDA.cmake
+++ b/cmake/ConfigCUDA.cmake
@@ -21,10 +21,10 @@ list(GET CUDA_VERSION_LIST 2 CUDA_VERSION_PATCH)
 find_package(CUDAToolkit ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} REQUIRED)
 
 # CUDA version requirement:
-# - to use gcc-11 (11.7)
+# - to use gcc-9 (11.4)
 
-if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.7")
-    message(FATAL_ERROR "Minimum CUDA version supported is 11.7")
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.4")
+    message(FATAL_ERROR "Minimum CUDA version supported is 11.4")
 endif()
 
 set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
@@ -38,6 +38,7 @@ if(NOT USE_CMAKE_CUDA_ARCHITECTURES)
     if(ENABLE_TEGRA)
         list(APPEND CMAKE_CUDA_ARCHITECTURES
             72-real # Volta  - gv11b/Tegra (Jetson AGX Xavier)
+            86-real # Ampere - Jetson IGX Orin
             87-real # Ampere - ga10b,ga10c/Tegra (Jetson AGX Orin)
         )
     else()
diff --git a/cmake/ConfigCompiler.cmake b/cmake/ConfigCompiler.cmake
index 898a7ee83..b011ace1b 100644
--- a/cmake/ConfigCompiler.cmake
+++ b/cmake/ConfigCompiler.cmake
@@ -81,7 +81,7 @@ if(BUILD_TESTS)
         set(candidate_compilers ${PUBLIC_API_COMPILERS})
     else()
         # If not, by default, we'll try these.
-        set(candidate_compilers gcc-11 gcc-9 gcc-8 clang-11 clang-14)
+        set(candidate_compilers gcc-11 gcc-9 clang-11 clang-14)
     endif()
 
     unset(valid_compilers)
diff --git a/cmake/ConfigPython.cmake b/cmake/ConfigPython.cmake
index ab627ed5a..83f65f8b0 100644
--- a/cmake/ConfigPython.cmake
+++ b/cmake/ConfigPython.cmake
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 if(ENABLE_SANITIZERS)
-    message(FATAL_ERROR "NVCV python modules don't work on sanitized builds")
+    message(FATAL_ERROR "CV-CUDA python modules don't work on sanitized builds")
 endif()
 
 # Because we python as subproject, we need to create a fake Findnvcv.cmake so
diff --git a/cmake/GetGitRevisionDescription.cmake b/cmake/GetGitRevisionDescription.cmake
index 74839ab06..b18506492 100644
--- a/cmake/GetGitRevisionDescription.cmake
+++ b/cmake/GetGitRevisionDescription.cmake
@@ -177,7 +177,8 @@ endfunction()
 # without an express license agreement from NVIDIA CORPORATION or
 # its affiliates is strictly prohibited.
 
-# Addition by rlima@nvidia.com
+# Note: The function below is an addition to the original set of functions.
+
 function(git_branch _var)
 	if(NOT GIT_FOUND)
 		find_package(Git QUIET)
diff --git a/cmake/InstallNVCVDev.cmake b/cmake/InstallNVCVDev.cmake
index 40dd24a32..5368d63e0 100644
--- a/cmake/InstallNVCVDev.cmake
+++ b/cmake/InstallNVCVDev.cmake
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -19,7 +19,7 @@ set(CPACK_COMPONENT_DEV_DISPLAY_NAME "Development")
 set(CPACK_COMPONENT_DEV_DESCRIPTION "NVIDIA CV-CUDA C/C++ development library and headers")
 
 if(UNIX)
-    set(NVCV_DEV_FILE_NAME "nvcv-dev-${NVCV_VERSION_BUILD}")
+    set(NVCV_DEV_FILE_NAME "cvcuda-dev-${NVCV_VERSION_BUILD}")
 
     set(CPACK_DEBIAN_DEV_FILE_NAME "${NVCV_DEV_FILE_NAME}.deb")
     set(CPACK_ARCHIVE_DEV_FILE_NAME "${NVCV_DEV_FILE_NAME}")
@@ -28,7 +28,7 @@ if(UNIX)
     # is the same
     set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "${CPACK_DEBIAN_LIB_PACKAGE_NAME} (>= ${NVCV_VERSION_API})")
 
-    set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${NVCV_PACKAGE_NAME}-dev")
+    set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${CVCUDA_PACKAGE_NAME}-dev")
 
     # We're not adding compiler and cmake as dependencies, users can choose
     # whatever toolchain they want.
diff --git a/cmake/InstallNVCVLib.cmake b/cmake/InstallNVCVLib.cmake
index 4ddaa377f..7ef53adf3 100644
--- a/cmake/InstallNVCVLib.cmake
+++ b/cmake/InstallNVCVLib.cmake
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,15 +15,14 @@
 
 list(APPEND CPACK_COMPONENTS_ALL lib)
 set(CPACK_COMPONENT_LIB_DISPLAY_NAME "Runtime libraries")
-set(CPACK_COMPONENT_LIB_DESCRIPTION "NVIDIA NVCV library")
+set(CPACK_COMPONENT_LIB_DESCRIPTION "NVIDIA CV-CUDA library")
 set(CPACK_COMPONENT_LIB_REQUIRED true)
 
-set(NVCV_PACKAGE_NAME "nvcv${NVCV_VERSION_MAJOR}")
-set(NVCV_TYPES_PACKAGE_NAME "nvcv_types${NVCV_VERSION_MAJOR}")
 set(CVCUDA_PACKAGE_NAME "cvcuda${NVCV_VERSION_MAJOR}")
+set(NVCV_TYPES_PACKAGE_NAME "nvcv_types${NVCV_VERSION_MAJOR}")
 
 if(UNIX)
-    set(NVCV_LIB_FILE_NAME "nvcv-lib-${NVCV_VERSION_BUILD}")
+    set(NVCV_LIB_FILE_NAME "cvcuda-lib-${NVCV_VERSION_BUILD}")
 
     set(CPACK_DEBIAN_LIB_FILE_NAME "${NVCV_LIB_FILE_NAME}.deb")
     set(CPACK_ARCHIVE_LIB_FILE_NAME "${NVCV_LIB_FILE_NAME}")
@@ -36,7 +35,7 @@ if(UNIX)
         "${CMAKE_CURRENT_BINARY_DIR}/cpack/lib/prerm")
 
     # as per debian convention, use the library file name
-    set(CPACK_DEBIAN_LIB_PACKAGE_NAME "lib${NVCV_PACKAGE_NAME}")
+    set(CPACK_DEBIAN_LIB_PACKAGE_NAME "lib${CVCUDA_PACKAGE_NAME}")
 
     set(CPACK_DEBIAN_LIB_PACKAGE_DEPENDS "libstdc++6, libc6")
 
diff --git a/cmake/InstallPython.cmake b/cmake/InstallPython.cmake
index 2a8638f3f..977779fbc 100644
--- a/cmake/InstallPython.cmake
+++ b/cmake/InstallPython.cmake
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,13 +25,13 @@ foreach(VER ${PYTHON_VERSIONS})
 
     set(CPACK_COMPONENT_${PYTHON_MODULE_NAME}_DISABLED true)
     set(CPACK_COMPONENT_${PYTHON_MODULE_NAME}_DISPLAY_NAME "Python ${VER}")
-    set(CPACK_COMPONENT_${PYTHON_MODULE_NAME}_DESCRIPTION "NVIDIA NVCV python ${VER} bindings")
+    set(CPACK_COMPONENT_${PYTHON_MODULE_NAME}_DESCRIPTION "NVIDIA CV-CUDA python ${VER} bindings")
     set(CPACK_COMPONENT_${PYTHON_MODULE_NAME}_GROUP python)
 
     if(UNIX)
         set(CPACK_DEBIAN_${PYTHON_MODULE_NAME}_PACKAGE_NAME python${VER}-${CPACK_PACKAGE_NAME})
 
-        set(NVCV_${PYTHON_MODULE_NAME}_FILE_NAME "nvcv-python${VER}-${NVCV_VERSION_BUILD}")
+        set(NVCV_${PYTHON_MODULE_NAME}_FILE_NAME "cvcuda-python${VER}-${NVCV_VERSION_BUILD}")
         set(CPACK_DEBIAN_${PYTHON_MODULE_NAME}_FILE_NAME "${NVCV_${PYTHON_MODULE_NAME}_FILE_NAME}.deb")
         set(CPACK_ARCHIVE_${PYTHON_MODULE_NAME}_FILE_NAME "${NVCV_${PYTHON_MODULE_NAME}_FILE_NAME}")
 
@@ -50,19 +50,6 @@ foreach(VER ${PYTHON_VERSIONS})
     install(CODE "include(\"${CMAKE_BINARY_DIR}/python${VER}/build/cmake_install.cmake\")"
             COMPONENT ${python_module_name})
 
-    if(BUILD_TESTS)
-        set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS
-                "${CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS},
-                ${CPACK_DEBIAN_${PYTHON_MODULE_NAME}_PACKAGE_NAME} (>= ${NVCV_VERSION_API})")
-
-        # For some reason these are needed with python-3.7
-        if(VER VERSION_EQUAL "3.7")
-            set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS
-                    "${CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS}
-                     , python3-typing-extensions")
-        endif()
-    endif()
-
     list(APPEND CPACK_COMPONENTS_ALL ${python_module_name})
 endforeach()
 
diff --git a/cmake/InstallTests.cmake b/cmake/InstallTests.cmake
index e896c7853..ff34de54a 100644
--- a/cmake/InstallTests.cmake
+++ b/cmake/InstallTests.cmake
@@ -24,7 +24,7 @@ if(UNIX)
     # Depend on current or any future ABI with same major version
     set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "${CPACK_DEBIAN_LIB_PACKAGE_NAME} (>= ${NVCV_VERSION_API})")
     # External dependencies
-    set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "${CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS},libssl3")
+    set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "${CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS},libssl3 | libssl1.1")
 
     set(CPACK_DEBIAN_TESTS_PACKAGE_NAME "cvcuda${PROJECT_VERSION_MAJOR}-tests")
 
diff --git a/docker/build20.04/Dockerfile b/docker/build20.04/Dockerfile
new file mode 100644
index 000000000..d689ce3ea
--- /dev/null
+++ b/docker/build20.04/Dockerfile
@@ -0,0 +1,90 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Docker image used to build CV-CUDA on linux-x64
+
+ARG VER_CUDA=?
+ARG VER_UBUNTU=?
+
+FROM nvidia/cuda:$VER_CUDA-devel-ubuntu$VER_UBUNTU
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Add so that tzdata don't ask for timezone info in a noninteractive installation.
+RUN ln -sf /usr/share/zoneinfo/US/Pacific /etc/localtime
+
+# need to update and install in one go, or else installation might use
+# stale data from server stored in docker cache, with packages that don't exist anymore.
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends git git-lfs software-properties-common wget\
+    && add-apt-repository ppa:ubuntu-toolchain-r/test \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends gcc-11 g++-11 \
+    && wget https://apt.llvm.org/llvm.sh \
+    && chmod +x llvm.sh \
+    && ./llvm.sh 11 && ./llvm.sh 14 && rm -f llvm.sh \
+    && apt-get install -y --no-install-recommends ninja-build ccache libgtest-dev libgmock-dev shellcheck curl \
+    && rm -rf /var/lib/apt/lists/* \
+    && curl -L https://cmake.org/files/v3.20/cmake-3.20.1-linux-x86_64.tar.gz --output /tmp/cmake-3.20.1.tar.gz \
+    && tar -xzf /tmp/cmake-3.20.1.tar.gz -C /tmp/ && cd /tmp/cmake-3.20.1-linux-x86_64/ \
+    && cp bin/ share/ doc/ /usr/local/ -r && rm -rf /tmp/cmake-3.20.1*
+
+# Configure ccache
+RUN mkdir -p /cache
+COPY ccache.conf /etc/ccache.conf
+ENV CCACHE_CONFIGPATH=/etc/ccache.conf
+ENV PRE_COMMIT_HOME=/cache/pre-commit
+
+# Documentation ======================================
+
+# Allow using this image in systems without proper CUDA runtime/driver support.
+# We'll be using this image only for building, don't need strict CUDA checks.
+ENV NVIDIA_DISABLE_REQUIRE=true
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends python3 python3-pip python3-pytest python3-dev doxygen \
+    && rm -rf /var/lib/apt/lists/*
+
+# python3 is python3.8 in ubuntu20.04
+RUN python3 -m pip install pre-commit
+# Needed for python documentation
+RUN python3 -m pip install sphinx-rtd-theme sphinx==4.5.0
+RUN python3 -m pip install breathe exhale recommonmark graphviz
+# Needed for python sphinx docs and Python wheels
+RUN python3 -m pip install numpy==1.24.1 patchelf==0.17.2.1
+
+# Python bindings ======================================
+
+# Add deadsnakes apt repo to fetch older pythonv versions
+ADD deadsnakes-ubuntu-ppa-focal.list /etc/apt/sources.list.d
+RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys BA6932366A755776
+
+RUN for PYTHON_VERSION in 3.7 3.8 3.9 3.10 3.11; do \
+    apt-get update \
+    && apt-get install -y --no-install-recommends \
+    python$PYTHON_VERSION-dev python$PYTHON_VERSION-distutils; \
+    done && \
+    rm -rf /var/lib/apt/lists/*
+
+# gcc-8 ======================================
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+    gcc-8 g++-8 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Needed for OpenSSL
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends libssl-dev \
+    && rm -rf /var/lib/apt/lists/*
diff --git a/docker/build/ccache.conf b/docker/build20.04/ccache.conf
similarity index 100%
rename from docker/build/ccache.conf
rename to docker/build20.04/ccache.conf
diff --git a/docker/build20.04/deadsnakes-ubuntu-ppa-focal.list b/docker/build20.04/deadsnakes-ubuntu-ppa-focal.list
new file mode 100644
index 000000000..b9cba6f58
--- /dev/null
+++ b/docker/build20.04/deadsnakes-ubuntu-ppa-focal.list
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+deb https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu/ focal main
+# deb-src https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu/ focal main
diff --git a/docker/build/Dockerfile b/docker/build22.04/Dockerfile
similarity index 74%
rename from docker/build/Dockerfile
rename to docker/build22.04/Dockerfile
index 8fd5fb8ed..e974b5cd0 100644
--- a/docker/build/Dockerfile
+++ b/docker/build22.04/Dockerfile
@@ -20,21 +20,23 @@ ARG VER_UBUNTU=?
 
 FROM nvidia/cuda:$VER_CUDA-devel-ubuntu$VER_UBUNTU
 
+ARG DEBIAN_FRONTEND=noninteractive
+
 # need to update and install in one go, or else installation might use
 # stale data from server stored in docker cache, with packages that don't exist anymore.
-RUN DEBIAN_FRONTEND="noninteractive" apt-get update \
+RUN apt-get update \
     && apt-get install -y --no-install-recommends \
-        git git-lfs \
-        g++-11 \
-        # need to also build with gcc-9.4.0, our minimum supported compiler for the library
-        gcc-9=9.4.0-5ubuntu1 cpp-9=9.4.0-5ubuntu1 gcc-9-base=9.4.0-5ubuntu1 libgcc-9-dev=9.4.0-5ubuntu1 libasan5=9.4.0-5ubuntu1 g++-9=9.4.0-5ubuntu1 libstdc++-9-dev=9.4.0-5ubuntu1 \
-        # Compilers to which public headers must be compatible
-        clang-14 clang-11 \
-        ninja-build \
-        ccache \
-        libgtest-dev libgmock-dev \
-        pre-commit shellcheck \
-        curl \
+    git git-lfs \
+    g++-11 \
+    # need to also build with gcc-9.4.0, our minimum supported compiler for the library
+    gcc-9=9.4.0-5ubuntu1 cpp-9=9.4.0-5ubuntu1 gcc-9-base=9.4.0-5ubuntu1 libgcc-9-dev=9.4.0-5ubuntu1 libasan5=9.4.0-5ubuntu1 g++-9=9.4.0-5ubuntu1 libstdc++-9-dev=9.4.0-5ubuntu1 \
+    # Compilers to which public headers must be compatible
+    clang-14 clang-11 \
+    ninja-build \
+    ccache \
+    libgtest-dev libgmock-dev \
+    pre-commit shellcheck \
+    curl \
     && rm -rf /var/lib/apt/lists/* \
     && curl -L https://cmake.org/files/v3.20/cmake-3.20.1-linux-x86_64.tar.gz --output /tmp/cmake-3.20.1.tar.gz \
     && tar -xzf /tmp/cmake-3.20.1.tar.gz -C /tmp/ && cd /tmp/cmake-3.20.1-linux-x86_64/ \
@@ -56,6 +58,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     python3 python3-pip python3-dev doxygen && rm -rf /var/lib/apt/lists/*
 RUN python3 -m pip install sphinx-rtd-theme sphinx==4.5.0
 RUN python3 -m pip install breathe exhale recommonmark graphviz
+# Needed for python sphinx docs and Python wheels
+RUN python3 -m pip install numpy==1.24.1 patchelf==0.17.2.1
 
 # Python bindings ======================================
 
@@ -66,12 +70,13 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys BA6932366A755776
 # Add so that tzdata don't ask for timezone info in a noninteractive installation.
 RUN ln -sf /usr/share/zoneinfo/US/Pacific /etc/localtime
 
-RUN DEBIAN_FRONTEND="noninteractive" apt-get update \
+RUN apt-get update \
     && apt-get install -y --no-install-recommends \
-        python3.7-dev python3.7-distutils \
-        python3.8-dev python3.8-distutils \
-        python3.9-dev python3.9-distutils \
-        python3.10-dev python3.10-distutils \
+    python3.7-dev python3.7-distutils \
+    python3.8-dev python3.8-distutils \
+    python3.9-dev python3.9-distutils \
+    python3.10-dev python3.10-distutils \
+    python3.11-dev python3.11-distutils \
     && rm -rf /var/lib/apt/lists/*
 
 # gcc-8 ======================================
@@ -88,15 +93,12 @@ RUN curl --fail-early -L \
     -O http://mirrors.kernel.org/ubuntu/pool/main/i/isl/libisl22_0.22.1-1_amd64.deb
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
-     ./libmpx2_8.4.0-3ubuntu2_amd64.deb \
-     ./cpp-8_8.4.0-3ubuntu2_amd64.deb \
-     ./gcc-8-base_8.4.0-3ubuntu2_amd64.deb \
-     ./libgcc-8-dev_8.4.0-3ubuntu2_amd64.deb \
-     ./gcc-8_8.4.0-3ubuntu2_amd64.deb \
-     ./g++-8_8.4.0-3ubuntu2_amd64.deb \
-     ./libstdc++-8-dev_8.4.0-3ubuntu2_amd64.deb \
-     ./libisl22_0.22.1-1_amd64.deb \
+    ./libmpx2_8.4.0-3ubuntu2_amd64.deb \
+    ./cpp-8_8.4.0-3ubuntu2_amd64.deb \
+    ./gcc-8-base_8.4.0-3ubuntu2_amd64.deb \
+    ./libgcc-8-dev_8.4.0-3ubuntu2_amd64.deb \
+    ./gcc-8_8.4.0-3ubuntu2_amd64.deb \
+    ./g++-8_8.4.0-3ubuntu2_amd64.deb \
+    ./libstdc++-8-dev_8.4.0-3ubuntu2_amd64.deb \
+    ./libisl22_0.22.1-1_amd64.deb \
     && rm -rf /var/lib/apt/lists/*
-
-# Needed for python sphinx docs
-RUN python3 -m pip install numpy==1.24.1
diff --git a/docker/build22.04/ccache.conf b/docker/build22.04/ccache.conf
new file mode 100644
index 000000000..3ea1d6a35
--- /dev/null
+++ b/docker/build22.04/ccache.conf
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+max_size = 20G
+cache_dir = /cache/ccache
diff --git a/docker/build/deadsnakes-ubuntu-ppa-jammy.list b/docker/build22.04/deadsnakes-ubuntu-ppa-jammy.list
similarity index 100%
rename from docker/build/deadsnakes-ubuntu-ppa-jammy.list
rename to docker/build22.04/deadsnakes-ubuntu-ppa-jammy.list
diff --git a/docker/config b/docker/config
index 56cc639d5..aa84ebf0d 100644
--- a/docker/config
+++ b/docker/config
@@ -23,9 +23,9 @@ IMAGE_URL_BASE=''
 # change is done, such as removing some package, or updating
 # packaged versions that introduces incompatibilities.
 TAG_IMAGE=6
-TAG_IMAGE_SAMPLES=5.1
+TAG_IMAGE_SAMPLES=6.1
 TAG_IMAGE_TEST=5
 
 VER_CUDA=11.7.1
 VER_UBUNTU=22.04
-VER_TRT=22.09
+VER_TRT=24.01
diff --git a/docker/devel/Dockerfile b/docker/devel20.04/Dockerfile
similarity index 74%
rename from docker/devel/Dockerfile
rename to docker/devel20.04/Dockerfile
index 245aa9f24..5d7fd499e 100644
--- a/docker/devel/Dockerfile
+++ b/docker/devel20.04/Dockerfile
@@ -20,18 +20,20 @@ ARG TAG_IMAGE=?
 
 FROM $BASE_IMAGE:$TAG_IMAGE
 
+ARG DEBIAN_FRONTEND=noninteractive
+
 # need to update and install in one go, or else installation might use
 # stale data from server stored in docker cache, with packages that don't exist anymore.
 
 # HACK: need to pass 'sudo' as a variable to workaround Dockerfile linter, it says
 #       we shouldn't install sudo in a container. But we know what we're doing!
-RUN HACK_SUDO=sudo && DEBIAN_FRONTEND="noninteractive" apt-get update \
+RUN HACK_SUDO=sudo && apt-get update \
     && apt-get install -y --no-install-recommends \
-         $HACK_SUDO \
-         vim \
-         gdb cgdb \
-         less \
-         wget curl \
+    $HACK_SUDO \
+    vim \
+    gdb cgdb \
+    less \
+    wget curl \
     && rm -rf /var/lib/apt/lists/*
 
 # Enable CUDA driver checks as this image will be used for running CUDA programs
@@ -42,16 +44,20 @@ COPY vimrc /root/.vimrc
 COPY gdbinit /root/.gdbinit
 
 # For running tests inside dev container
-RUN DEBIAN_FRONTEND="noninteractive" apt-get update \
+RUN apt-get update \
     && apt-get install -y --no-install-recommends \
-         python3-pytest \
-         python3-pip \
+    apt-utils fonts-dejavu \
     && rm -rf /var/lib/apt/lists/*
 
 # needed by tests
-RUN python3 -m pip install torch==1.13.0 torchvision cupy-cuda11x \
-    && rm -rf /root/.cache/pip
-RUN python3.9 -m pip install pytest torch==1.13.0 torchvision cupy-cuda11x \
-    && rm -rf /root/.cache/pip
+RUN for PYTHON_VERSION in 3.7 3.8 3.9 3.10 3.11; do \
+    curl -sS https://bootstrap.pypa.io/get-pip.py | python$PYTHON_VERSION && \
+    python$PYTHON_VERSION -m pip install --upgrade pip && \
+    python$PYTHON_VERSION -m pip install --upgrade \
+    pytest torch==1.13.0 numpy typing-extensions && \
+    rm -rf /root/.cache/pip; \
+    done
+
+
 
 WORKDIR /cvcuda
diff --git a/docker/devel/gdbinit b/docker/devel20.04/gdbinit
similarity index 100%
rename from docker/devel/gdbinit
rename to docker/devel20.04/gdbinit
diff --git a/docker/devel/vimrc b/docker/devel20.04/vimrc
similarity index 100%
rename from docker/devel/vimrc
rename to docker/devel20.04/vimrc
diff --git a/docker/devel22.04/Dockerfile b/docker/devel22.04/Dockerfile
new file mode 100644
index 000000000..55b652779
--- /dev/null
+++ b/docker/devel22.04/Dockerfile
@@ -0,0 +1,72 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Docker image used for development of CV-CUDA on linux-x64
+
+ARG BASE_IMAGE=?
+ARG TAG_IMAGE=?
+
+FROM $BASE_IMAGE:$TAG_IMAGE
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# need to update and install in one go, or else installation might use
+# stale data from server stored in docker cache, with packages that don't exist anymore.
+
+# HACK: need to pass 'sudo' as a variable to workaround Dockerfile linter, it says
+#       we shouldn't install sudo in a container. But we know what we're doing!
+RUN HACK_SUDO=sudo && apt-get update \
+    && apt-get install -y --no-install-recommends \
+    $HACK_SUDO \
+    vim \
+    gdb cgdb \
+    less \
+    wget curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Enable CUDA driver checks as this image will be used for running CUDA programs
+ENV NVIDIA_DISABLE_REQUIRE=false
+
+# Config files we use
+COPY vimrc /root/.vimrc
+COPY gdbinit /root/.gdbinit
+
+# For running tests inside dev container
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+    apt-utils \
+    python3-typing-extensions \
+    python3-pytest \
+    python3-pip \
+    && rm -rf /var/lib/apt/lists/*
+
+# For running tests inside dev container
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+    apt-utils fonts-dejavu \
+    && rm -rf /var/lib/apt/lists/*
+
+# needed by tests (python3 is python3.10 in ubuntu22.04)
+RUN python3 -m pip install torch==1.13.0 torchvision cupy-cuda11x \
+    && rm -rf /root/.cache/pip
+RUN python3.8 -m pip install torch==1.13.0 torchvision cupy-cuda11x \
+    numpy sphinx-rtd-theme sphinx breathe exhale recommonmark graphviz \
+    && rm -rf /root/.cache/pip
+RUN python3.9 -m pip install pytest torch==1.13.0 torchvision cupy-cuda11x \
+    && rm -rf /root/.cache/pip
+RUN python3.11 -m pip install --upgrade pytest torch==1.13.0 cupy-cuda11x \
+    && rm -rf /root/.cache/pip
+
+WORKDIR /cvcuda
diff --git a/docker/devel22.04/gdbinit b/docker/devel22.04/gdbinit
new file mode 100644
index 000000000..9ba78c2dc
--- /dev/null
+++ b/docker/devel22.04/gdbinit
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set disassembly-flavor intel
+set print object on
+set print vtbl on
+set print pretty on
diff --git a/docker/devel22.04/vimrc b/docker/devel22.04/vimrc
new file mode 100644
index 000000000..59a3426ac
--- /dev/null
+++ b/docker/devel22.04/vimrc
@@ -0,0 +1,23 @@
+" SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+" SPDX-License-Identifier: Apache-2.0
+"
+" NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+" property and proprietary rights in and to this material, related
+" documentation and any modifications thereto. Any use, reproduction,
+" disclosure or distribution of this material and related documentation
+" without an express license agreement from NVIDIA CORPORATION or
+" its affiliates is strictly prohibited.
+
+set nocompatible
+set backspace=2
+set autoindent
+set softtabstop=4
+set number
+set shiftwidth=4
+set expandtab
+set autowrite
+set ruler
+set makeprg=ninja
+set fileencodings=ucs-bom,utf-8,latin1
+syntax on
+color ron
diff --git a/docker/env_devel_linux.sh b/docker/env_devel_linux.sh
index 8bafb3110..0c16ee742 100755
--- a/docker/env_devel_linux.sh
+++ b/docker/env_devel_linux.sh
@@ -52,7 +52,7 @@ else
     echo "Git user.name and user.email not set up"
     echo "Please run:"
     echo "  git config --global user.name 'Your Name'"
-    echo "  git config --global user.email 'your_nvlogin@nvidia.com'"
+    echo "  git config --global user.email 'Your Email'"
     exit 1
 fi
 
@@ -70,5 +70,5 @@ docker run --gpus=all --net=host --pull always -ti \
     -v /var/tmp:/var/tmp \
     -v $SDIR/..:$HOME/cvcuda \
     $extra_args \
-    $IMAGE_URL_BASE/devel-linux:$TAG_IMAGE \
+    $IMAGE_URL_BASE/devel-linux:$VER_UBUNTU-$VER_CUDA \
     /usr/bin/bash -c "mkdir -p $HOME && chown $USER:$USER $HOME && su - $USER -c \"$extra_cmds\" && su - $USER"
diff --git a/docker/samples/Dockerfile b/docker/samples/Dockerfile
index 0adeb99da..0a8e70f65 100644
--- a/docker/samples/Dockerfile
+++ b/docker/samples/Dockerfile
@@ -26,4 +26,5 @@ FROM nvcr.io/nvidia/tensorrt:$VER_TRT-py3
 # by default. It is copied by the update_samples_image.sh script. Always
 # use update_samples_image.sh script to build any samples docker image.
 COPY install_dependencies.sh /workspace/
+COPY requirements.txt /workspace/
 RUN sh /workspace/install_dependencies.sh
diff --git a/docker/test20.04/Dockerfile b/docker/test20.04/Dockerfile
new file mode 100644
index 000000000..edd979a1b
--- /dev/null
+++ b/docker/test20.04/Dockerfile
@@ -0,0 +1,59 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Docker image used to test CV-CUDA on linux-x64
+
+ARG VER_CUDA=?
+ARG VER_UBUNTU=?
+
+FROM nvidia/cuda:$VER_CUDA-runtime-ubuntu$VER_UBUNTU
+
+ARG VER_CUDA=?
+
+# For testing python bindings ======================================
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Add deadsnakes apt repo to fetch older python versions
+ADD deadsnakes-ubuntu-ppa-focal.list /etc/apt/sources.list.d
+RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys BA6932366A755776
+
+# Add so that tzdata don't ask for timezone info in a noninteractive installation.
+RUN ln -sf /usr/share/zoneinfo/US/Pacific /etc/localtime
+
+# For running python tests
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+    python3.7 python3.7-distutils \
+    python3.8 python3.8-distutils \
+    python3.9 python3.9-distutils \
+    python3.10 python3.10-distutils \
+    python3.11 python3.11-distutils \
+    python3-pip \
+    && rm -rf /var/lib/apt/lists/*
+
+# It needs torch
+RUN set -e \
+    && for ver in 3.7 3.8 3.9 3.10 3.11; do \
+    python$ver -m pip install torch numpy torchvision; \
+    done \
+    && rm -rf /root/.cache/pip
+
+# Other dependencies of python tests
+# binutils: for readelf
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+    binutils \
+    && rm -rf /var/lib/apt/lists/*
diff --git a/docker/test20.04/deadsnakes-ubuntu-ppa-focal.list b/docker/test20.04/deadsnakes-ubuntu-ppa-focal.list
new file mode 100644
index 000000000..b9cba6f58
--- /dev/null
+++ b/docker/test20.04/deadsnakes-ubuntu-ppa-focal.list
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+deb https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu/ focal main
+# deb-src https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu/ focal main
diff --git a/docker/test/Dockerfile b/docker/test22.04/Dockerfile
similarity index 79%
rename from docker/test/Dockerfile
rename to docker/test22.04/Dockerfile
index 0c55129e4..63b0d4a08 100644
--- a/docker/test/Dockerfile
+++ b/docker/test22.04/Dockerfile
@@ -24,6 +24,8 @@ ARG VER_CUDA=?
 
 # For testing python bindings ======================================
 
+ARG DEBIAN_FRONTEND=noninteractive
+
 # Add deadsnakes apt repo to fetch older python versions
 ADD deadsnakes-ubuntu-ppa-jammy.list /etc/apt/sources.list.d
 RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys BA6932366A755776
@@ -32,25 +34,26 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys BA6932366A755776
 RUN ln -sf /usr/share/zoneinfo/US/Pacific /etc/localtime
 
 # For running python tests
-RUN DEBIAN_FRONTEND="noninteractive" apt-get update \
+RUN apt-get update \
     && apt-get install -y --no-install-recommends \
-        python3.7 python3.7-distutils \
-        python3.8 python3.8-distutils \
-        python3.9 python3.9-distutils \
-        python3.10 \
-        python3-pip \
+    python3.7 python3.7-distutils \
+    python3.8 python3.8-distutils \
+    python3.9 python3.9-distutils \
+    python3.10 python3.10-distutils \
+    python3.11 python3.11-distutils \
+    python3-pip \
     && rm -rf /var/lib/apt/lists/*
 
 # It needs torch
 RUN set -e \
-    && for ver in 3.7 3.8 3.9 3.10; do \
-           python$ver -m pip install torch numpy torchvision; \
-       done \
+    && for ver in 3.7 3.8 3.9 3.10 3.11; do \
+    python$ver -m pip install torch numpy torchvision; \
+    done \
     && rm -rf /root/.cache/pip
 
 # Other dependencies of python tests
 # binutils: for readelf
-RUN DEBIAN_FRONTEND="noninteractive" apt-get update \
+RUN apt-get update \
     && apt-get install -y --no-install-recommends \
-        binutils \
+    binutils \
     && rm -rf /var/lib/apt/lists/*
diff --git a/docker/test/deadsnakes-ubuntu-ppa-jammy.list b/docker/test22.04/deadsnakes-ubuntu-ppa-jammy.list
similarity index 100%
rename from docker/test/deadsnakes-ubuntu-ppa-jammy.list
rename to docker/test22.04/deadsnakes-ubuntu-ppa-jammy.list
diff --git a/docker/update_build_image.sh b/docker/update_build_image.sh
index cd41dd057..32e4eab3f 100755
--- a/docker/update_build_image.sh
+++ b/docker/update_build_image.sh
@@ -35,9 +35,9 @@ cd "$SDIR"
 # load up configuration variables
 . ./config
 
-cd build
+cd build$VER_UBUNTU
 
-image=$IMAGE_URL_BASE/build-linux:$TAG_IMAGE
+image=$IMAGE_URL_BASE/build-linux:$VER_UBUNTU-$VER_CUDA
 
 docker build --network=host \
     --build-arg "VER_CUDA=$VER_CUDA" \
diff --git a/docker/update_devel_image.sh b/docker/update_devel_image.sh
index 13da128c3..aa7504149 100755
--- a/docker/update_devel_image.sh
+++ b/docker/update_devel_image.sh
@@ -33,13 +33,13 @@ cd "$SDIR"
 # load up configuration variables
 . ./config
 
-cd devel
+cd devel$VER_UBUNTU
 
-image=$IMAGE_URL_BASE/devel-linux:$TAG_IMAGE
+image=$IMAGE_URL_BASE/devel-linux:$VER_UBUNTU-$VER_CUDA
 
 docker build --network=host \
     --build-arg BASE_IMAGE=$IMAGE_URL_BASE/build-linux \
-    --build-arg TAG_IMAGE=$TAG_IMAGE \
+    --build-arg TAG_IMAGE=$VER_UBUNTU-$VER_CUDA \
     . -t $image
 
 if [[ $do_push == 1 ]]; then
diff --git a/docker/update_samples_image.sh b/docker/update_samples_image.sh
index 36d8208c3..6dbfc907b 100755
--- a/docker/update_samples_image.sh
+++ b/docker/update_samples_image.sh
@@ -32,6 +32,7 @@ cd "$SDIR"
 # Copy install_dependencies script from the samples folder to the samples' docker folder
 # so that it can be added and used inside the image.
 cp $SDIR/../samples/scripts/install_dependencies.sh $SDIR/samples/
+cp $SDIR/../samples/scripts/requirements.txt $SDIR/samples/
 
 # load up configuration variables
 . ./config
diff --git a/docker/update_test_image.sh b/docker/update_test_image.sh
index d0cc51993..c69598d32 100755
--- a/docker/update_test_image.sh
+++ b/docker/update_test_image.sh
@@ -33,9 +33,9 @@ cd "$SDIR"
 # load up configuration variables
 . ./config
 
-cd test
+cd test$VER_UBUNTU
 
-image=$IMAGE_URL_BASE/test-linux-x64:$TAG_IMAGE_TEST
+image=$IMAGE_URL_BASE/test-linux-x64:$VER_UBUNTU-$VER_CUDA
 
 docker build --network=host \
     --build-arg "VER_CUDA=$VER_CUDA" \
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index 6cc8343d1..5ad6c8902 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -41,7 +41,7 @@ add_custom_command(OUTPUT ${DOXYGEN_INDEX_FILE}
                    COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYFILE_OUT}
                    MAIN_DEPENDENCY ${DOXYFILE_OUT} ${DOXYFILE_IN}
                    COMMENT "Generating doxygen xml"
-	   	   DEPENDS nvcv_python${VER})
+	   	   DEPENDS cvcuda_python${VER})
 
 add_custom_target(cvcuda_doxygen ALL DEPENDS ${DOXYGEN_INDEX_FILE})
 
diff --git a/docs/sphinx/content/cvcuda_oplist.csv b/docs/sphinx/content/cvcuda_oplist.csv
index 53ea4e8e7..bc4aecd54 100644
--- a/docs/sphinx/content/cvcuda_oplist.csv
+++ b/docs/sphinx/content/cvcuda_oplist.csv
@@ -16,7 +16,7 @@ CustomCrop,Crops an image with a given region-of-interest
 CvtColor,Converts an image from one color space to another
 DataTypeConvert,Converts an image’s data type with optional scaling
 Erase,Erases image regions
-FindContours,Extract closed contours from an input binary image
+Find Contours,Extract closed contours from an input binary image
 FindHomography,Calculates a perspective transform from four pairs of the corresponding points
 Flip,Flips a 2D image around its axis
 GammaContrast,Adjusts image contrast
@@ -24,6 +24,7 @@ Gaussian,Applies a gaussian blur filter to the image
 Gaussian Noise,Generates a statistical noise with a normal (Gaussian) distribution
 Histogram,Provides a grayscale value distribution showing the frequency of occurrence of each gray value.
 Histogram Equalizer,Allows effective spreading out the intensity range of the image typically used to improve contrast
+HqResize,Performs advanced resizing supporting 2D and 3D data, tensors, tensor batches, and varshape image batches (2D only). Supports nearest neighbor, linear, cubic, Gaussian and Lanczos interpolation, with optional antialiasing when down-sampling.
 Inpainting,Performs inpainting by replacing a pixel by normalized weighted sum of all the known pixels in the neighborhood
 Joint Bilateral Filter,Reduces image noise while preserving strong edges based on a guidance image
 Label,Labels connected regions in an image using 4-way connectivity for foreground and 8-way for background pixels
@@ -38,7 +39,7 @@ Non-max Suppression,Enables selecting a single entity out of many overlapping on
 Normalize,Normalizes an image pixel’s range
 OSD (Polyline Line Text Rotated Rect Segmented Mask),Displays an overlay on the image of of different forms including polyline line text rotated rectangle segmented mask
 PadStack,Stacks several images into a tensor with border extension
-PairwiseMatcher,Matches features computed separately (e.g. via the SIFT operator) in two images using the brute force method
+PairwiseMatcher,Matches features computed separately (e.g. via the SIFT operator) in two images, e.g. using the brute force method
 PillowResize,Changes the size and scale of an image using python-pillow algorithm
 RandomResizedCrop,Crops a random portion of an image and resizes it to a specified size.
 Reformat,Converts a planar image into non-planar and vice versa
diff --git a/docs/sphinx/index.rst b/docs/sphinx/index.rst
index ec3667c61..890d44262 100644
--- a/docs/sphinx/index.rst
+++ b/docs/sphinx/index.rst
@@ -50,7 +50,7 @@ CV-CUDA offers more than 20 Computer Vision and Image Processing operators. Find
 Where Are the Release Notes?
 ------------------
 
-An awesome product requires excellent support.  CV-CUDA release notes can be found `here <https://github.com/CVCUDA/CV-CUDA/releases/tag/v0.5.0-beta>`_.
+An awesome product requires excellent support.  CV-CUDA release notes can be found `here <https://github.com/CVCUDA/CV-CUDA/releases/tag/v0.6.0-beta>`_.
 
 
 Where Can I Get Help?
@@ -124,6 +124,7 @@ Copyright
     :maxdepth: 1
     :hidden:
 
+    Beta.4 <relnotes/v0.6.0-beta>
     Beta.3 <relnotes/v0.5.0-beta>
     Beta.2 <relnotes/v0.4.0-beta>
     Beta.1 <relnotes/v0.3.1-beta>
diff --git a/docs/sphinx/installation.rst b/docs/sphinx/installation.rst
index c37fd42f2..5e213d536 100644
--- a/docs/sphinx/installation.rst
+++ b/docs/sphinx/installation.rst
@@ -1,5 +1,5 @@
 ..
-  # SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  # SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
   # SPDX-License-Identifier: Apache-2.0
   #
   # Licensed under the Apache License, Version 2.0 (the "License");
@@ -22,7 +22,7 @@ Installation
 Pre-requisites
 --------------
 
-This section describes the recommended dependencies to compile cvcuda
+This section describes the recommended dependencies to install CV-CUDA.
 
 * Ubuntu >= 20.04
 * CUDA driver >= 11.7
@@ -30,65 +30,67 @@ This section describes the recommended dependencies to compile cvcuda
 Setup
 -----
 
-The following steps describe how to install cvcuda. Choose the installation method that meets your environment needs.
-
-Download the cvcuda tar/deb package from `here <https://github.com/CVCUDA/CV-CUDA/releases/tag/v0.5.0-beta>`_
+The following steps describe how to install CV-CUDA. Choose the installation method that meets your environment needs.
+You can download the CV-CUDA tar, deb or wheel packages from `here <https://github.com/CVCUDA/CV-CUDA/releases/tag/v0.6.0-beta>`_
 
 * Tar File Installation
 
-Navigate to your <cvcudapath> directory containing the cvcuda tar file.
+    Unzip the cvcuda runtime package: ::
+
+        tar -xvf cvcuda-lib-x.x.x-cuda11-x86_64-linux.tar.xz
+
+    Unzip the cvcuda developer package: ::
+
+        tar -xvf cvcuda-dev-x.x.x-cuda11-x86_64-linux.tar.xz
 
-Unzip the cvcuda runtime package: ::
+    Unzip the cvcuda python package: ::
 
-    tar -xvf nvcv-lib-x.x.x-cuda11-x86_64-linux.tar.xz
+        tar -xvf cvcuda-python3.*-x.x.x-cuda11-x86_64-linux.tar.xz
 
-Unzip the cvcuda developer package: ::
+    [Optional] Unzip the tests. ::
 
-    tar -xvf nvcv-dev-x.x.x-cuda11-x86_64-linux.tar.xz
+        tar -xvf cvcuda-tests-cuda11-x86_64-linux.tar.xz
 
-Unzip the cvcuda python package: ::
 
-    tar -xvf nvcv-python3.*-x.x.x-cuda11-x86_64-linux.tar.xz
+* Debian Installation
 
-Optionally Unzip the tests. ::
+    Install the runtime library. ::
 
-    tar -xvf cvcuda-tests-cuda11-x86_64-linux.tar.xz
+        dpkg -i cvcuda-lib-x.x.x-cuda11-x86_64-linux.deb
 
-Optionally Unzip the tests. ::
+    Install the developer library. ::
 
-    tar -xvf cvcuda-tests-cuda11-x86_64-linux.tar.xz
+        dpkg -i cvcuda-dev-x.x.x-cuda11-x86_64-linux.deb
 
-* Debian Local Installation
+    Install the python bindings ::
 
-Navigate to your <cvcudapath> directory containing the cvcuda Debian local installer file. ::
+        dpkg -i cvcuda-python3.*-x.x.x-cuda11-x86_64-linux.deb
 
-Install the runtime library. ::
+    [Optional] Install the tests. ::
 
-    sudo dpkg -i nvcv-lib-x.x.x-cuda11-x86_64-linux.deb
+        sudo dpkg -i cvcuda-tests-x.x.x-cuda11-x86_64-linux.deb
 
-Install the developer library. ::
 
-    sudo dpkg -i nvcv-dev-x.x.x-cuda11-x86_64-linux.deb
+* Python Wheel File Installation
 
-Install the python bindings ::
+    Download the appropriate .whl file for your computer architecture, Python and CUDA version from `here <https://github.com/CVCUDA/CV-CUDA/releases/tag/v0.6.0-beta>`_
 
-    sudo dpkg -i nvcv-python3.*-x.x.x-cuda11-x86_64-linux.deb
+    Execute the following command to install appropriate CV-CUDA Python wheel ::
 
-Optionally install the tests. ::
+        pip install cvcuda_<cu_ver>-0.6.0b0-cp<py_ver>-cp<py_ver>-linux_<arch>.whl
 
-    sudo dpkg -i cvcuda-tests-x.x.x-cuda11-x86_64-linux.deb
+    where <cu_ver> is the desired CUDA version, <py_ver> the desired Python version and <arch> the desired architecture.
 
-Optionally install the samples. ::
+    Please note that the Python wheels provided are standalone, they include both the C++/CUDA libraries and the Python bindings.
 
-    sudo dpkg -i cvcuda-samples-x.x.x-cuda11-x86_64-linux.deb
 
-* Verifying the Installation on Linux
+* Verifying the Debian or TAR installation on Linux
 
-To verify that cvcuda is installed and is running properly, run the tests from the install folder for tests.
-Default installation path is /opt/nvidia/cvcuda0/bin. ::
+    To verify that CV-CUDA is installed and is running properly, run the tests from the install folder for tests.
+    Default installation path is /opt/nvidia/cvcuda0/bin. ::
 
-    cd /opt/nvidia/cvcuda0/bin
-    ./run_tests.sh
+        cd /opt/nvidia/cvcuda0/bin
+        ./run_tests.sh
 
 If CV-CUDA is properly installed and running on your Linux system, all tests will pass.
 
diff --git a/docs/sphinx/relnotes/v0.5.0-beta.rst b/docs/sphinx/relnotes/v0.5.0-beta.rst
index f79f78303..bd3633197 100644
--- a/docs/sphinx/relnotes/v0.5.0-beta.rst
+++ b/docs/sphinx/relnotes/v0.5.0-beta.rst
@@ -19,47 +19,42 @@
 Beta.3
 ======
 
-CV-CUDA 0.5.0 is a major release of the library providing multiple new operators, features, and fixes to multiple customer-reported issues.
+CV-CUDA 0.5.0 is a comprehensive update introducing new security, compliance, and performance enhancements, alongside bug fixes and new features.
 
 Release Highlights
 ------------------
 
-CV-CUDA v0.5.0 includes the following key changes:
+CV-CUDA v0.5.0 includes significant improvements:
 
 * **New Operators**:
-  * FindHomography: Calculates a perspective transform from four pairs of the corresponding points
-  * Label: Labels connected regions in an image using 4-way connectivity for foreground and 8-way for background pixels
-  * PairwiseMatcher: Matches features computed separately (e.g. via the SIFT operator) in two images using the brute force method
-  * Stack: Concatenates two input tensors into a single output tensor
+  - FindHomography: Calculates a perspective transform from four pairs of the corresponding points
+  - Label: Labels connected regions in an image using 4-way connectivity for foreground and 8-way for background pixels
+  - PairwiseMatcher: Matches features computed separately (e.g. via the SIFT operator) in two images using the brute force method
 
 * **New Features**:
-  * Added `TensorBatch` in C++ and Python, a container type that can hold a list of non-uniformly shaped tensors
-  * Added `Workspace` in C++ and Python, an abstraction of memory and asynchronous resources for CV-CUDA operators
-  * Added better color format support in nvcv_types
-  * New sample application for the `Label` operator
-  * JetPack 5.1.2 support for L4T (Jetson Orin, L4T 35.4.1, CUDA 11.4)
-  * Enhanced documentation
+  - Implemented Python class for `TensorBatch``, a container type that can hold a list of non-uniformly shaped tensors
+  - Added support for RGBD image formats
+  - Enhanced documentation
 
 * **Bug Fixes**:
-  * Resolved memory leak in `NvBlurBoxes`
-  * Fixed segmentation fault issue in Python with certain imports
-  * Corrected `typestr` format issue in `__cuda_array_interface__`
-  * Addressed occasional hanging in `OpBoxBlur` on RGBA images
+  - Resolved memory leak in NvBlurBoxes
+  - Fixed segmentation fault issue in Python with certain imports
+  - Corrected typestr format issue in `__cuda_array_interface__`
+  - Addressed occasional hanging in OpBoxBlur on RGBA images
 
 Compatibility
 -------------
 
-* GPU Compute Capability: 7+.x
-* Ubuntu x86_64: 20.04, 22.04
+* Continues to support GPU Compute Capability: 7+.x
+* Compatible with Ubuntu x86_64: 20.04, 22.04
 * CUDA Toolkit: 11.7+ (11.2+ for library build and run)
-* L4T: 35.4.1, JetPack 5.1.2 aarch64
-* GCC: 11.0+ (9.x and 10.x for APIs with pre-built binary)
-* Python: 3.8, 3.10
+* GCC: 11.0+ (9.0 and 10.0 for APIs, with pre-built binary and run)
+* Python: 3.7, 3.8, 3.10
 
 Known Issues/Limitations
 ------------------------
 
-* For GCC versions lower than 11.0, C++17 support needs to be enabled when compiling CV-CUDA.
+* The release notes do not specify new known issues or limitations for this version.
 
 License
 -------
diff --git a/docs/sphinx/relnotes/v0.6.0-beta.rst b/docs/sphinx/relnotes/v0.6.0-beta.rst
new file mode 100644
index 000000000..ca0995a67
--- /dev/null
+++ b/docs/sphinx/relnotes/v0.6.0-beta.rst
@@ -0,0 +1,78 @@
+..
+  # SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  # SPDX-License-Identifier: Apache-2.0
+  #
+  # Licensed under the Apache License, Version 2.0 (the "License");
+  # you may not use this file except in compliance with the License.
+  # You may obtain a copy of the License at
+  #
+  # http://www.apache.org/licenses/LICENSE-2.0
+  #
+  # Unless required by applicable law or agreed to in writing, software
+  # distributed under the License is distributed on an "AS IS" BASIS,
+  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  # See the License for the specific language governing permissions and
+  # limitations under the License.
+
+.. _v0.6.0-beta:
+
+Beta.4
+======
+
+CV-CUDA 0.6.0 is a comprehensive update introducing new packaging and documentation enhancements, along with bug fixes and new features.
+
+Release Highlights
+------------------
+
+CV-CUDA v0.6.0 includes significant improvements:
+
+* **New Operator**:
+
+  * HQResize: Advanced resize operator supporting 2D and 3D data, tensors, tensor batches, and varshape image batches (2D only). Supports nearest neighbor, linear, cubic, Gaussian and Lanczos interpolation, with optional antialiasing when down-sampling.
+
+* **New Features**:
+
+  * Standalone Python Wheels, including tooling and documentation to generate them. Prebuilt binaries for selected configurations.
+
+  * Homogenized package naming
+
+  * Improved documentation of hardware/software compatibility, build and test tutorials
+
+  * Added Python Operator benchmarking application
+
+  * Samples updated to new codec libraries, PyNvVideoCodec and NvImageCodec
+
+  * Support of rank 2 tensors in MedianBlur
+
+  * Additional tests for various operators
+
+* **Bug Fixes**:
+
+  * Fix name clashes with NVTX
+
+  * Fix workspace memory allocation of complex filters
+
+  * Fix memory fault in MinAreaRect
+
+Compatibility and Known Limitations
+-----------------------------------
+
+See main README on `CV-CUDA GitHub <https://github.com/CVCUDA/CV-CUDA>`_.
+
+License
+-------
+
+CV-CUDA is licensed under the `Apache 2.0 <https://github.com/CVCUDA/CV-CUDA/blob/main/LICENSE.md>`_ license.
+
+Resources
+---------
+
+1. `CV-CUDA GitHub <https://github.com/CVCUDA/CV-CUDA>`_
+2. `CV-CUDA Increasing Throughput and Reducing Costs for AI-Based Computer Vision with CV-CUDA <https://developer.nvidia.com/blog/increasing-throughput-and-reducing-costs-for-computer-vision-with-cv-cuda/>`_
+3. `NVIDIA Announces Microsoft, Tencent, Baidu Adopting CV-CUDA for Computer Vision AI <https://blogs.nvidia.com/blog/2023/03/21/cv-cuda-ai-computer-vision/>`_
+4. `CV-CUDA helps Tencent Cloud audio and video PaaS platform achieve full-process GPU acceleration for video enhancement AI <https://developer.nvidia.com/zh-cn/blog/cv-cuda-high-performance-image-processing/>`_
+
+Acknowledgements
+----------------
+
+CV-CUDA is developed jointly by NVIDIA and the ByteDance Machine Learning team.
diff --git a/docs/sphinx/samples/cpp_samples/cropresize.rst b/docs/sphinx/samples/cpp_samples/cropresize.rst
index 0ba4fd645..671733dae 100644
--- a/docs/sphinx/samples/cpp_samples/cropresize.rst
+++ b/docs/sphinx/samples/cpp_samples/cropresize.rst
@@ -126,7 +126,7 @@ To run the sample
 
 .. code-block:: bash
 
-   ./build/nvcv_samples_cropandresize -i <image path> -b <batch size>
+   ./build/cvcuda_sample_cropandresize -i <image path> -b <batch size>
 
 Sample Output
 -------------
diff --git a/docs/sphinx/samples/python_samples/classification.rst b/docs/sphinx/samples/python_samples/classification.rst
index 119611627..16c23cb48 100644
--- a/docs/sphinx/samples/python_samples/classification.rst
+++ b/docs/sphinx/samples/python_samples/classification.rst
@@ -1,5 +1,5 @@
 ..
-  # SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  # SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
   # SPDX-License-Identifier: Apache-2.0
   #
   # Licensed under the Apache License, Version 2.0 (the "License");
@@ -36,11 +36,11 @@ Writing the Sample App
 
 The classification sample app has been designed to be modular in all aspects. It imports and uses various modules such as data decoders, pipeline pre and post processors and the model inference. Some of these modules are defined in the same folder as the sample whereas the rest are defined in the common scripts folder for a wider re-use.
 
-1. Modules used by this sample app that are defined in the common folder (i.e. not specific just to this sample) are the ``ImageBatchDecoderPyTorch`` for PyTorch based image decoding and ``VideoBatchDecoderVPF`` for VPF based video decoding.
+1. Modules used by this sample app that are defined in the common folder (i.e. not specific just to this sample) are the ``ImageBatchDecoder`` for nvImageCodec based image decoding and ``VideoBatchDecoder`` for PyNvVideoCodec based video decoding.
 
 2. Modules specific to this sample (i.e. defined in the classification sample folder) are ``PreprocessorCvcuda`` and ``PostprocessorCvcuda`` for CVCUDA based pre and post processing pipelines and ``ClassificationPyTorch`` and ``ClassificationTensorRT`` for the model inference.
 
-The first stage in our pipeline is importing all necessary python modules. Apart from the modules described above, this also includes modules such as torch and torchvision, torchnvjpeg, vpf and the main package of CVCUDA (i.e. nvcv) among others. Be sure to import ``pycuda.driver`` before importing any other GPU packages like torch or cvcuda to ensure a proper initialization.
+The first stage in our pipeline is importing all necessary python modules. Apart from the modules described above, this also includes modules such as torch and torchvision, torchnvjpeg, vpf and the main package of CVCUDA among others. Be sure to import ``pycuda.driver`` before importing any other GPU packages like torch or cvcuda to ensure a proper initialization.
 
 .. literalinclude:: ../../../../samples/classification/python/main.py
    :language: python
@@ -83,8 +83,8 @@ The ``run_sample`` function is the primary function that runs this sample. It se
 Next, we instantiate various classes to help us run the sample. These classes are:
 
 1. ``PreprocessorCvcuda`` : A CVCUDA based pre-processing pipeline for classification.
-2. ``ImageBatchDecoderPyTorch`` : A PyTorch based image decoder to read the images.
-3. ``VideoBatchDecoderVPF`` : A VPF based video decoder to read the video.
+2. ``ImageBatchDecoder`` : A nvImageCodec based image decoder to read the images.
+3. ``VideoBatchDecoder`` : A PyNvVideoCodec based video decoder to read the video.
 4. ``PostprocessorCvcuda`` : A post-processing pipeline for classification.
 5. ``classificationPyTorch`` : A PyTorch based classification model to execute inference.
 6. ``classificationTensorRT`` : A TensorRT based classification model to execute inference.
@@ -120,8 +120,8 @@ That's it for the classification sample. To understand more about how each stage
 
     PreprocessorCvcuda <classification/preprocessor_cvcuda>
     PostprocessorCvcuda <classification/postprocessor_cvcuda>
-    ImageBatchDecoderPyTorch <commons/imagebatchdecoder_pytorch>
-    VideoBatchDecoderVPF <commons/videobatchdecoder_vpf>
+    ImageBatchDecoder <commons/imagebatchdecoder_nvcodec>
+    VideoBatchDecoder <commons/videobatchdecoder_nvcodec>
     ClassificationPyTorch <classification/classification_pytorch>
     ClassificationTensorRT <classification/classification_tensorrt>
 
@@ -171,7 +171,7 @@ The top 5 classification results for the tabby_cat_tiger.jpg image is as follows
 
    user@machine:~/cvcuda/samples$ python3 classification/python/main.py -b 1
    [perf_utils:85] 2023-07-27 22:27:17 WARNING perf_utils is used without benchmark.py. Benchmarking mode is turned off.
-   [perf_utils:89] 2023-07-27 22:27:17 INFO    Using CV-CUDA version: 0.5.0-beta
+   [perf_utils:89] 2023-07-27 22:27:17 INFO    Using CV-CUDA version: 0.6.0-beta
    [pipelines:35] 2023-07-27 22:27:17 INFO     Using CVCUDA as preprocessor.
    [torch_utils:77] 2023-07-27 22:27:17 INFO   Using torchnvjpeg as decoder.
    [pipelines:122] 2023-07-27 22:27:17 INFO    Using CVCUDA as post-processor.
diff --git a/docs/sphinx/samples/python_samples/commons/imagebatchdecoder_pytorch.rst b/docs/sphinx/samples/python_samples/commons/imagebatchdecoder_nvcodec.rst
similarity index 52%
rename from docs/sphinx/samples/python_samples/commons/imagebatchdecoder_pytorch.rst
rename to docs/sphinx/samples/python_samples/commons/imagebatchdecoder_nvcodec.rst
index 93ed4ad0b..edb14806e 100644
--- a/docs/sphinx/samples/python_samples/commons/imagebatchdecoder_pytorch.rst
+++ b/docs/sphinx/samples/python_samples/commons/imagebatchdecoder_nvcodec.rst
@@ -1,5 +1,5 @@
 ..
-   # SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+   # SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
    # SPDX-License-Identifier: Apache-2.0
    #
    # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,56 +14,56 @@
    # See the License for the specific language governing permissions and
    # limitations under the License.
 
-.. _imagebatchdecoder_pytorch:
+.. _imagebatchdecoder_nvcodec:
 
-Image Decoding using PyTorch
+Image Decoding using nvImageCodec
 ====================
 
 
-The image batch decoder is responsible for parsing the input expression, reading and decoding image data. The actual decoding is done in batches using the library ``torchnvjpeg``. Although used in the semantic segmentation sample, this image decoder is generic enough to be used in other applications. The code associated with this class can be found in the ``samples/common/python/torch_utils.py`` file.
+The image batch decoder is responsible for parsing the input expression, reading and decoding image data. The actual decoding is done in batches using the library `nvImageCodec <https://github.com/NVIDIA/nvImageCodec>`_. Although used in the semantic segmentation sample, this image decoder is generic enough to be used in other applications. The code associated with this class can be found in the ``samples/common/python/nvcodec_utils.py`` file.
 
 
 Before the data can be read or decoded, we must parse it (i.e figure out what kind of data it is). Depending on the ``input_path``'s value, we either read one image and create a dummy list with the data from the same image to simulate a batch or read a bunch of images from a directory.
 
-.. literalinclude:: ../../../../../samples/common/python/torch_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_init_imagebatchdecoder_pytorch
-   :end-before: end_parse_imagebatchdecoder_pytorch
+   :start-after: begin_init_imagebatchdecoder_nvimagecodec
+   :end-before: end_parse_imagebatchdecoder_nvimagecodec
    :dedent:
 
 Once we have a list of image file names that we can read, we will split them into batches based on the batch size.
 
-.. literalinclude:: ../../../../../samples/common/python/torch_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_batch_imagebatchdecoder_pytorch
-   :end-before: end_init_imagebatchdecoder_pytorch
+   :start-after: begin_batch_imagebatchdecoder_nvimagecodec
+   :end-before: end_init_imagebatchdecoder_nvimagecodec
    :dedent:
 
 That is all we need to do for the initialization. Now as soon as a call to decoder is issued, we would start reading and decoding the data. This begins with reading the data bytes in batches and returning None if there is no data left to be read.
 
-.. literalinclude:: ../../../../../samples/common/python/torch_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_call_imagebatchdecoder_pytorch
-   :end-before: end_read_imagebatchdecoder_pytorch
+   :start-after: begin_call_imagebatchdecoder_nvimagecodec
+   :end-before: end_read_imagebatchdecoder_nvimagecodec
    :dedent:
 
-Once the data has been read, we use ``torchnvjpeg`` to decode it into a list of image tensors. The torchnvjpeg instance is allocated either on its first use or whenever there is a change in the batch size (i.e. last batch). Since what we get at this point is a list of images (i.e a python list of 3D tensors), we would need to convert them to a 4D tensor by stacking them up on the first dimension.
+Once the data has been read, we use ``nvImageCodec`` to decode it into a list of image tensors. The nvImageCodec instance is allocated either on its first use or whenever there is a change in the batch size (i.e. last batch). Since what we get at this point is a list of images (i.e a python list of 3D tensors), we would need to convert them to a 4D tensor by stacking them up on the first dimension.
 
-.. literalinclude:: ../../../../../samples/common/python/torch_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_decode_imagebatchdecoder_pytorch
-   :end-before: end_decode_imagebatchdecoder_pytorch
+   :start-after: begin_decode_imagebatchdecoder_nvimagecodec
+   :end-before: end_decode_imagebatchdecoder_nvimagecodec
    :dedent:
 
 The final step is to pack all of this data into a special CVCUDA samples object called as ``Batch``. The ``Batch`` object helps us keep track of the data associated with the batch, the index of the batch and optionally any filename information one wants to attach (i.e. which files the data came from).
 
-.. literalinclude:: ../../../../../samples/common/python/torch_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_return_imagebatchdecoder_pytorch
-   :end-before: end_return_imagebatchdecoder_pytorch
+   :start-after: begin_return_imagebatchdecoder_nvimagecodec
+   :end-before: end_return_imagebatchdecoder_nvimagecodec
    :dedent:
diff --git a/docs/sphinx/samples/python_samples/commons/imagebatchencoder_pytorch.rst b/docs/sphinx/samples/python_samples/commons/imagebatchencoder_nvcodec.rst
similarity index 58%
rename from docs/sphinx/samples/python_samples/commons/imagebatchencoder_pytorch.rst
rename to docs/sphinx/samples/python_samples/commons/imagebatchencoder_nvcodec.rst
index dfd25650e..3cdb507d7 100644
--- a/docs/sphinx/samples/python_samples/commons/imagebatchencoder_pytorch.rst
+++ b/docs/sphinx/samples/python_samples/commons/imagebatchencoder_nvcodec.rst
@@ -1,5 +1,5 @@
 ..
-   # SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+   # SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
    # SPDX-License-Identifier: Apache-2.0
    #
    # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,28 +14,28 @@
    # See the License for the specific language governing permissions and
    # limitations under the License.
 
-.. _imagebatchencoder_pytorch:
+.. _imagebatchencoder_nvcodec:
 
-Image Encoding using PyTorch
+Image Encoding using nvImageCodec
 ====================
 
 
-The image batch encoder is responsible for saving image tensors to the disk as JPG images. The actual encoding is done in batches using the ``PIL`` library. The image encoder is generic enough to be across the sample applications. The code associated with this class can be found in the ``samples/common/python/torch_utils.py`` file.
+The image batch encoder is responsible for saving image tensors to the disk as JPG images. The actual encoding is done in batches using the `nvImageCodec <https://github.com/NVIDIA/nvImageCodec>`_ library. The image encoder is generic enough to be across the sample applications. The code associated with this class can be found in the ``samples/common/python/nvcodec_utils.py`` file.
 
 The image batch encoder is a relatively simple class. Here is how its ``__init__`` method is defined.
 
-.. literalinclude:: ../../../../../samples/common/python/torch_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_init_imagebatchencoder_pytorch
-   :end-before: end_init_imagebatchencoder_pytorch
+   :start-after: begin_init_imagebatchencoder_nvimagecodec
+   :end-before: end_init_imagebatchencoder_nvimagecodec
    :dedent:
 
-Once the initialization is complete, we encode the images in the ``__call__`` method. Since the ``Batch`` object is passed, we have information of the data, its batch index and the original file name used to read the data. We can use this together with PyTorch's functions to detach the tensor, transfer it to the CPU and save it as PIL JPG image.
+Once the initialization is complete, we encode the images in the ``__call__`` method. Since the ``Batch`` object is passed, we have information of the data, its batch index and the original file name used to read the data.
 
-.. literalinclude:: ../../../../../samples/common/python/torch_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_call_imagebatchencoder_pytorch
-   :end-before: end_call_imagebatchencoder_pytorch
+   :start-after: begin_call_imagebatchencoder_nvimagecodec
+   :end-before: end_call_imagebatchencoder_nvimagecodec
    :dedent:
diff --git a/docs/sphinx/samples/python_samples/commons/videobatchdecoder_vpf.rst b/docs/sphinx/samples/python_samples/commons/videobatchdecoder_nvcodec.rst
similarity index 54%
rename from docs/sphinx/samples/python_samples/commons/videobatchdecoder_vpf.rst
rename to docs/sphinx/samples/python_samples/commons/videobatchdecoder_nvcodec.rst
index 8fb986029..9219f0aef 100644
--- a/docs/sphinx/samples/python_samples/commons/videobatchdecoder_vpf.rst
+++ b/docs/sphinx/samples/python_samples/commons/videobatchdecoder_nvcodec.rst
@@ -1,5 +1,5 @@
 ..
-   # SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+   # SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
    # SPDX-License-Identifier: Apache-2.0
    #
    # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,18 +14,18 @@
    # See the License for the specific language governing permissions and
    # limitations under the License.
 
-.. _videobatchdecoder_vpf:
+.. _videobatchdecoder_pyvideocodec:
 
-Video Decoding using VPF
+Video Decoding using pyNvVideoCodec
 ====================
 
 
-The video batch decoder is responsible for reading an MP4 video as PyTorch tensors. The actual decoding is done per frame using NVIDIA's `Video Processing Framework <https://github.com/NVIDIA/VideoProcessingFramework>`_. The video decoder is generic enough to be used across the sample applications. The code associated with this class can be found in the ``samples/common/python/vpf_utils.py`` file.
+The video batch decoder is responsible for reading an MP4 video as tensors. The actual decoding is done per frame using NVIDIA's PyNvVideoCodec API. The video decoder is generic enough to be used across the sample applications. The code associated with this class can be found in the ``samples/common/python/nvcodec_utils.py`` file.
 
 There are two classes responsible for the decoding work:
 
-1. ``VideoBatchDecoderVPF`` and
-2. ``nvdecoder``
+1. ``VideoBatchDecoder`` and
+2. ``nvVideoDecoder``
 
 The first class acts as a wrapper on the second class which allows us to:
 
@@ -34,70 +34,70 @@ The first class acts as a wrapper on the second class which allows us to:
 3. Use accelerated ops in CVCUDA to perform the necessary color conversion from NV12 to RGB after decoding the video.
 
 
-VideoBatchDecoderVPF
+VideoBatchDecoder
 ------------------
 
-Let's get started by understanding how this class is initialized in its ``__init__`` method. We use VPF's ``PyFFmpegDemuxer`` to read a few properties of the video. The decoder instance and CVCUDA color conversion tensors both are allocated when needed upon the first use.
+Let's get started by understanding how this class is initialized in its ``__init__`` method. We use  ``PyNvDemuxer`` to read a few properties of the video. The decoder instance and CVCUDA color conversion tensors both are allocated when needed upon the first use.
 
 **Note**: Due to the nature of NV12, representing it directly as a CVCUDA tensor is a bit challenging. Be sure to read through the explanation in the comments of the code shown below to understand more.
 
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_init_videobatchdecoder_vpf
-   :end-before: end_init_videobatchdecoder_vpf
+   :start-after: begin_init_videobatchdecoder_pyvideocodec
+   :end-before: end_init_videobatchdecoder_pyvideocodec
    :dedent:
 
 
 Once things are defined and initialized, we would start the decoding when a call to the ``__call__`` function is made.
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_call_videobatchdecoder_vpf
-   :end-before: end_alloc_videobatchdecoder_vpf
+   :start-after: begin_call_videobatchdecoder_pyvideocodec
+   :end-before: end_alloc_videobatchdecoder_pyvideocodec
    :dedent:
 
 Next, we call the ``nvdecoder`` instance to actually do the decoding and stack the image tensors up to form a 4D tensor.
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_decode_videobatchdecoder_vpf
-   :end-before: end_decode_videobatchdecoder_vpf
+   :start-after: begin_decode_videobatchdecoder_pyvideocodec
+   :end-before: end_decode_videobatchdecoder_pyvideocodec
    :dedent:
 
 Once the video batch is ready, we use CVCUDA's ``cvtcolor_into`` function to convert its data from NV12 format to RGB format. We will use pre-allocated tensors to do the color conversion to avoid allocating same tensors on every batch.
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_convert_videobatchdecoder_vpf
-   :end-before: end_convert_videobatchdecoder_vpf
+   :start-after: begin_convert_videobatchdecoder_pyvideocodec
+   :end-before: end_convert_videobatchdecoder_pyvideocodec
    :dedent:
 
 
 The final step is to pack all of this data into a special CVCUDA samples object called as ``Batch``. The ``Batch`` object helps us keep track of the data associated with the batch, the index of the batch and optionally any filename information one wants to attach (i.e. which files did the data come from).
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_batch_videobatchdecoder_vpf
-   :end-before: end_batch_videobatchdecoder_vpf
+   :start-after: begin_batch_videobatchdecoder_pyvideocodec
+   :end-before: end_batch_videobatchdecoder_pyvideocodec
    :dedent:
 
 
-nvdecoder
+nvVideoDecoder
 ------------------
 
-This is a class offering hardware accelerated video decoding functionality using VPF. It reads an MP4 video file, decodes it and returns a 3D PyTorch Tensor per frame. Please consult the documentation of the `Video Processing Framework <https://github.com/NVIDIA/VideoProcessingFramework>`_ to learn more about its capabilities and APIs.
+This is a class offering hardware accelerated video decoding functionality using pyNvVideoCodec. It reads an MP4 video file, decodes it and returns a CUDA accessible Tensor per frame. Please consult the documentation of the pyNvVideoCodec to learn more about its capabilities and APIs.
 
-For use in CVCUDA, this class defines the following ``decode_hw`` and ``decode_to_tensor`` functions which decode data to a Torch tensor in a given cuda stream.
+For use in CVCUDA, this class defines the following functions which decode data to a tensor in a given CUDA stream.
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_imp_nvdecoder
-   :end-before: end_imp_nvdecoder
+   :start-after: begin_imp_nvvideodecoder
+   :end-before: end_imp_nvvideodecoder
    :dedent:
diff --git a/docs/sphinx/samples/python_samples/commons/videobatchencoder_vpf.rst b/docs/sphinx/samples/python_samples/commons/videobatchencoder_nvcodec.rst
similarity index 53%
rename from docs/sphinx/samples/python_samples/commons/videobatchencoder_vpf.rst
rename to docs/sphinx/samples/python_samples/commons/videobatchencoder_nvcodec.rst
index faa0dfbf6..96a75bf2b 100644
--- a/docs/sphinx/samples/python_samples/commons/videobatchencoder_vpf.rst
+++ b/docs/sphinx/samples/python_samples/commons/videobatchencoder_nvcodec.rst
@@ -1,5 +1,5 @@
 ..
-   # SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+   # SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
    # SPDX-License-Identifier: Apache-2.0
    #
    # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,18 +14,18 @@
    # See the License for the specific language governing permissions and
    # limitations under the License.
 
-.. _videobatchencoder_vpf:
+.. _videobatchencoder_pyvideocodec:
 
-Video Encoding using VPF
+Video Encoding using VpyNvVideoCodecPF
 ====================
 
 
-The video batch encoder is responsible for writing PyTorch tensors as an MP4 video. The actual encoding is done in batches using  NVIDIA's `Video Processing Framework <https://github.com/NVIDIA/VideoProcessingFramework>`_. The video encoder is generic enough to be used across the sample applications. The code associated with this class can be found in the ``samples/common/python/vpf_utils.py`` file.
+The video batch encoder is responsible for writing tensors as an MP4 video. The actual encoding is done in batches using NVIDIA's pyNvVideoCodec. The video encoder is generic enough to be used across the sample applications. The code associated with this class can be found in the ``samples/common/python/nvcodec_utils.py`` file.
 
 There are two classes responsible for the encoding work:
 
-1. ``VideoBatchEncoderVPF`` and
-2. ``nvencoder``
+1. ``VideoBatchEncoder`` and
+2. ``nvVideoEncoder``
 
 The first class acts as a wrapper on the second class which allows us to:
 
@@ -42,72 +42,72 @@ To get started, here is how the class is initialized in its ``__init__`` method.
 **Note**: Due to the nature of NV12, representing it directly as a CVCUDA tensor is a bit challenging. Be sure to read through the explanation in the comments of the code shown below to understand more.
 
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_init_videobatchencoder_vpf
-   :end-before: end_init_videobatchencoder_vpf
+   :start-after: begin_init_videobatchencoder_pyvideocodec
+   :end-before: end_init_videobatchencoder_pyvideocodec
    :dedent:
 
 
 Once things are defined and initialized, we would start the decoding when a call to the ``__call__`` function is made. We need to first allocate the encoder instance if it wasn't done so already.
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_call_videobatchencoder_vpf
-   :end-before: end_alloc_videobatchdecoder_vpf
+   :start-after: begin_call_videobatchencoder_pyvideocodec
+   :end-before: end_alloc_videobatchdecoder_pyvideocodec
    :dedent:
 
 Next, we use CVCUDA's ``cvtcolor_into`` function to convert the batch data from RGB format to NV12 format. We allocate tensors once to do the color conversion and avoid allocating same tensors on every batch.
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_alloc_cvcuda_videobatchdecoder_vpf
-   :end-before: end_alloc_cvcuda_videobatchdecoder_vpf
+   :start-after: begin_alloc_cvcuda_videobatchdecoder_pyvideocodec
+   :end-before: end_alloc_cvcuda_videobatchdecoder_pyvideocodec
    :dedent:
 
 
 Once the tensors are allocated, we use CVCUDA ops to perform the color conversion.
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_convert_videobatchencoder_vpf
-   :end-before: end_convert_videobatchencoder_vpf
+   :start-after: begin_convert_videobatchencoder_pyvideocodec
+   :end-before: end_convert_videobatchencoder_pyvideocodec
    :dedent:
 
 
-Finally, we call the ``nvencooder`` instance to actually do the encoding.
+Finally, we call the ``nvVideoEncoder`` instance to actually do the encoding.
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_encode_videobatchencoder_vpf
-   :end-before: end_encode_videobatchencoder_vpf
+   :start-after: begin_encode_videobatchencoder_nvvideoencoder
+   :end-before: end_encode_videobatchencoder_nvvideoencoder
    :dedent:
 
 
-nvencoder
+nvVideoEncoder
 ------------------
 
-This is a class offering hardware accelerated video encoding functionality using VPF. It encodes tensors and writes as an MP4 file. Please consult the documentation of the `Video Processing Framework <https://github.com/NVIDIA/VideoProcessingFramework>`_ to learn more about its capabilities and APIs.
+This is a class offering hardware accelerated video encoding functionality using pyNvVideoCodec. It encodes tensors and writes as an MP4 file. Please consult the documentation of the pyNvVideoCodec to learn more about its capabilities and APIs.
 
-For use in CVCUDA, this class defines the following ``tensor_to_surface`` and ``encode_from_tensor`` functions which encode a Torch tensor.
+For use in CVCUDA, this class defines the following ``encode_from_tensor`` functions which encode a Torch tensor.
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_imp_nvencoder
-   :end-before: end_imp_nvencoder
+   :start-after: begin_imp_nvvideoencoder
+   :end-before: end_imp_nvvideoencoder
    :dedent:
 
 Finally, we use the ``av`` library to write packets to an MP4 container. We must properly flush (i.e. write any pending packets) at the end.
 
-.. literalinclude:: ../../../../../samples/common/python/vpf_utils.py
+.. literalinclude:: ../../../../../samples/common/python/nvcodec_utils.py
    :language: python
    :linenos:
-   :start-after: begin_writeframe_nvencoder
-   :end-before: end_writeframe_nvencoder
+   :start-after: begin_writeframe_nvvideoencoder
+   :end-before: end_writeframe_nvvideoencoder
    :dedent:
diff --git a/docs/sphinx/samples/python_samples/object_detection.rst b/docs/sphinx/samples/python_samples/object_detection.rst
index a2d05499a..8a882221a 100644
--- a/docs/sphinx/samples/python_samples/object_detection.rst
+++ b/docs/sphinx/samples/python_samples/object_detection.rst
@@ -1,5 +1,5 @@
 ..
-   # SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+   # SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
    # SPDX-License-Identifier: Apache-2.0
    #
    # Licensed under the Apache License, Version 2.0 (the "License");
@@ -35,11 +35,11 @@ Writing the Sample App
 
 The object detection app has been designed to be modular in all aspects. It imports and uses various modules such as data decoders, encoders, pipeline pre and post processors and the model inference. Some of these modules are defined in the same folder as the sample whereas the rest are defined in the common scripts folder for a wider re-use.
 
-1. Modules used by this sample app that are defined in the common folder (i.e. not specific just to this sample) are the ``ImageBatchDecoderPyTorch`` and ``ImageBatchEncoderPyTorch`` for PyTorch based image decoding and encoding and ``VideoBatchDecoderVPF`` and ``VideoBatchEncoderVPF`` for VPF based video decoding and encoding.
+1. Modules used by this sample app that are defined in the common folder (i.e. not specific just to this sample) are the ``ImageBatchDecoder`` and ``ImageBatchEncoder`` for nvImageCodec based image decoding and encoding and ``VideoBatchDecoder`` and ``VideoBatchEncoder`` for PyNvVideoCodec based video decoding and encoding.
 
 2. Modules specific to this sample (i.e. defined in the object_detection sample folder) are ``PreprocessorCvcuda`` and ``PostprocessorCvcuda`` for CVCUDA based pre and post processing pipelines and ``ObjectDetectionTensorRT`` and ``ObjectDetectionTensorflow`` for the model inference.
 
-The first stage in our pipeline is importing all necessary python modules. Apart from the modules described above, this also includes modules such as torch and torchvision, torchnvjpeg, vpf and the main package of CVCUDA (i.e. nvcv) among others. Be sure to import ``pycuda.driver`` before importing any other GPU packages like torch or cvcuda to ensure a proper initialization.
+The first stage in our pipeline is importing all necessary python modules. Apart from the modules described above, this also includes modules such as torch and torchvision, torchnvjpeg, vpf and the main package of CVCUDA among others. Be sure to import ``pycuda.driver`` before importing any other GPU packages like torch or cvcuda to ensure a proper initialization.
 
 .. literalinclude:: ../../../../samples/object_detection/python/main.py
    :language: python
@@ -91,10 +91,10 @@ Once the streams have been defined and initialized, all the operations in the re
 Next, we instantiate various classes to help us run the sample. These classes are:
 
 1. ``PreprocessorCvcuda`` : A CVCUDA based pre-processing pipeline for object detection.
-2. ``ImageBatchDecoderPyTorch`` : A PyTorch based image decoder to read the images.
-3. ``ImageBatchEncoderPyTorch`` : A PyTorch based image encoder to write the images.
-4. ``VideoBatchDecoderVPF`` : A VPF based video decoder to read the video.
-5. ``VideoBatchEncoderVPF`` : A VPF based video encoder to write the video.
+2. ``ImageBatchDecoder`` : A nvImageCodec based image decoder to read the images.
+3. ``ImageBatchEncoder`` : A nvImageCodec based image encoder to write the images.
+4. ``VideoBatchDecoder`` : A PyNvVideoCodec based video decoder to read the video.
+5. ``VideoBatchEncoder`` : A PyNvVideoCodec based video encoder to write the video.
 6. ``PostProcessorCvcuda`` : A CVCUDA based post-processing pipeline for object detection.
 7. ``ObjectDetectionTensorflow`` : A TensorFlow based object detection model to execute inference.
 8. ``ObjectDetectionTensorRT`` : A TensorRT based object detection model to execute inference.
@@ -122,10 +122,10 @@ That's it for the object detection sample. To understand more about how each sta
 
     PreprocessorCvcuda <object_detection/preprocessor_cvcuda>
     PostprocessorCvcuda <object_detection/postprocessor_cvcuda>
-    ImageBatchDecoderPyTorch <commons/imagebatchdecoder_pytorch>
-    ImageBatchEncoderPyTorch <commons/imagebatchencoder_pytorch>
-    VideoBatchDecoderVPF <commons/videobatchdecoder_vpf>
-    VideoBatchEncoderVPF <commons/videobatchencoder_vpf>
+    ImageBatchDecoder <commons/imagebatchdecoder_nvcodec>
+    ImageBatchEncoder <commons/imagebatchencoder_nvcodec>
+    VideoBatchDecoder <commons/videobatchdecoder_nvcodec>
+    VideoBatchEncoder <commons/videobatchencoder_nvcodec>
     ObjectDetectionTensorFlow <object_detection/objectdetection_tensorflow>
     ObjectDetectionTensorRT <object_detection/objectdetection_tensorrt>
 
@@ -177,7 +177,7 @@ This sample takes as input one or more images or one video and generates the obj
 
         user@machine:~/cvcuda/samples$ python3 object_detection/python/main.py
         [perf_utils:85] 2023-07-27 23:15:34 WARNING perf_utils is used without benchmark.py. Benchmarking mode is turned off.
-        [perf_utils:89] 2023-07-27 23:15:34 INFO   Using CV-CUDA version: 0.5.0-beta
+        [perf_utils:89] 2023-07-27 23:15:34 INFO   Using CV-CUDA version: 0.6.0-beta
         [pipelines:30] 2023-07-27 23:15:36 INFO   Using CVCUDA as preprocessor.
         [torch_utils:77] 2023-07-27 23:15:36 INFO   Using torchnvjpeg as decoder.
         [torch_utils:151] 2023-07-27 23:15:36 INFO   Using PyTorch/PIL as encoder.
diff --git a/docs/sphinx/samples/python_samples/segmentation.rst b/docs/sphinx/samples/python_samples/segmentation.rst
index 5dd4d1944..53cf5b2eb 100644
--- a/docs/sphinx/samples/python_samples/segmentation.rst
+++ b/docs/sphinx/samples/python_samples/segmentation.rst
@@ -1,5 +1,5 @@
 ..
-   # SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+   # SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
    # SPDX-License-Identifier: Apache-2.0
    #
    # Licensed under the Apache License, Version 2.0 (the "License");
@@ -35,11 +35,11 @@ Writing the Sample App
 
 The segmentation sample app has been designed to be modular in all aspects. It imports and uses various modules such as data decoders, encoders, pipeline pre and post processors and the model inference. Some of these modules are defined in the same folder as the sample whereas the rest are defined in the common scripts folder for a wider re-use.
 
-1. Modules used by this sample app that are defined in the common folder (i.e. not specific just to this sample) are the ``ImageBatchDecoderPyTorch`` and ``ImageBatchEncoderPyTorch`` for PyTorch based image decoding and encoding and ``VideoBatchDecoderVPF`` and ``VideoBatchEncoderVPF`` for VPF based video decoding and encoding.
+1. Modules used by this sample app that are defined in the common folder (i.e. not specific just to this sample) are the ``ImageBatchDecoder`` and ``ImageBatchEncoder`` for nvImageCodec based image decoding and encoding and ``VideoBatchDecoder`` and ``VideoBatchEncoder`` for PyNvVideoCodec based video decoding and encoding.
 
 2. Modules specific to this sample (i.e. defined in the segmentation sample folder) are ``PreprocessorCvcuda`` and ``PostprocessorCvcuda`` for CVCUDA based pre and post processing pipelines and ``SegmentationPyTorch`` and ``SegmentationTensorRT`` for the model inference.
 
-The first stage in our pipeline is importing all necessary python modules. Apart from the modules described above, this also includes modules such as torch and torchvision, torchnvjpeg, vpf and the main package of CVCUDA (i.e. nvcv) among others. Be sure to import ``pycuda.driver`` before importing any other GPU packages like torch or cvcuda to ensure a proper initialization.
+The first stage in our pipeline is importing all necessary python modules. Apart from the modules described above, this also includes modules such as torch and torchvision, torchnvjpeg, vpf and the main package of CVCUDA among others. Be sure to import ``pycuda.driver`` before importing any other GPU packages like torch or cvcuda to ensure a proper initialization.
 
 .. literalinclude:: ../../../../samples/segmentation/python/main.py
    :language: python
@@ -83,10 +83,10 @@ The ``run_sample`` function is the primary function that runs this sample. It se
 Next, we instantiate various classes to help us run the sample. These classes are:
 
 1. ``PreprocessorCvcuda`` : A CVCUDA based pre-processing pipeline for semantic segmentation.
-2. ``ImageBatchDecoderPyTorch`` : A PyTorch based image decoder to read the images.
-3. ``ImageBatchEncoderPyTorch`` : A PyTorch based image encoder to write the images.
-4. ``VideoBatchDecoderVPF`` : A VPF based video decoder to read the video.
-5. ``VideoBatchEncoderVPF`` : A VPF based video encoder to write the video.
+2. ``ImageBatchDecoder`` : A nvImageCodec based image decoder to read the images.
+3. ``ImageBatchEncoder`` : A nvImageCodec based image encoder to write the images.
+4. ``VideoBatchDecoder`` : A PyNvVideoCodec based video decoder to read the video.
+5. ``VideoBatchEncoder`` : A PyNvVideoCodec based video encoder to write the video.
 6. ``PostprocessorCvcuda`` : A CVCUDA based post-processing pipeline for semantic segmentation.
 7. ``SegmentationPyTorch`` : A PyTorch based semantic segmentation model to execute inference.
 8. ``SegmentationTensorRT`` : A TensorRT based semantic segmentation model to execute inference.
@@ -121,10 +121,10 @@ That's it for the semantic segmentation sample. To understand more about how eac
 
     PreprocessorCvcuda <segmentation/preprocessor_cvcuda>
     PostprocessorCvcuda <segmentation/postprocessor_cvcuda>
-    ImageBatchDecoderPyTorch <commons/imagebatchdecoder_pytorch>
-    ImageBatchEncoderPyTorch <commons/imagebatchencoder_pytorch>
-    VideoBatchDecoderVPF <commons/videobatchdecoder_vpf>
-    VideoBatchEncoderVPF <commons/videobatchencoder_vpf>
+    ImageBatchDecoder <commons/imagebatchdecoder_nvcodec>
+    ImageBatchEncoder <commons/imagebatchencoder_nvcodec>
+    VideoBatchDecoder <commons/videobatchdecoder_nvcodec>
+    VideoBatchEncoder <commons/videobatchencoder_nvcodec>
     SegmentationPyTorch <segmentation/segmentation_pytorch>
     SegmentationTensorRT <segmentation/segmentation_tensorrt>
 
@@ -182,7 +182,7 @@ This sample takes as input the one or more images or one video and generates the
 
    user@machine:~/cvcuda/samples$ python3 segmentation/python/main.py -b 5 -c __background__ -o /tmp -i assets/images/
    [perf_utils:85] 2023-07-27 23:17:49 WARNING perf_utils is used without benchmark.py. Benchmarking mode is turned off.
-   [perf_utils:89] 2023-07-27 23:17:49 INFO   Using CV-CUDA version: 0.5.0-beta
+   [perf_utils:89] 2023-07-27 23:17:49 INFO   Using CV-CUDA version: 0.6.0-beta
    [pipelines:35] 2023-07-27 23:17:50 INFO   Using CVCUDA as preprocessor.
    [torch_utils:60] 2023-07-27 23:17:50 INFO   Found a total of 3 JPEG images.
    [torch_utils:77] 2023-07-27 23:17:50 INFO   Using torchnvjpeg as decoder.
diff --git a/lint/copyright_check.sh b/lint/copyright_check.sh
index cd6a597ea..d46ecb7c6 100755
--- a/lint/copyright_check.sh
+++ b/lint/copyright_check.sh
@@ -18,17 +18,19 @@
 # Check if input files have valid copyright message
 # Ref: https://confluence.nvidia.com/display/RP/Standardizing+on+SPDX+Identifiers
 
-valid_license='Apache-2.0'
+valid_licenses=('Apache-2.0' 'LicenseRef-NvidiaProprietary')
 
 # Detects that the line is a comment.
-rgx_comment='^[[:space:]]*[[:graph:]]\+[[:space:]]\+'
+# The following line detects comments in source code and mark down files.
+# It can detect c++ style comments, python style comments or markdown style comments.
+rgx_comment='^[[:space:]]*[[:graph:]]\+[[:space:]]\+[[:graph:]]*[[:space:]]*["]*'
 
 function get_tag()
 {
     local tag=$1
     shift
 
-    local rgx="s@^\($rgx_comment\)\?$tag:[[:space:]]*\(.*\)@\2@p"
+    local rgx="s@^\($rgx_comment\)\?$tag:[[:space:]]*\([^\"]*\)\"*@\2@p"
 
     sed -n "$rgx" "$file"
 }
@@ -56,8 +58,9 @@ function check_license()
     fi
 
     # Check if it is valid
-    if [[ "$license" != "$valid_license" ]]; then
-        error "$file" "License '$license' not valid. Must be '$valid_license'." && false
+    if [[ ! " ${valid_licenses[*]} " =~ [[:space:]]${license}[[:space:]] ]]; then
+        valid_licenses_str="${valid_licenses[*]}"
+        error "$file" "License '$license' not valid. Must be a value from '${valid_licenses_str//${IFS:0:1}/, }'." && false
     fi
 }
 
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index ea384fb18..7647d0491 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,7 +15,7 @@
 
 cmake_minimum_required(VERSION 3.18)
 
-project(nvcv_python CXX C)
+project(cvcuda_python CXX C)
 
 set(CMAKE_CXX_STANDARD 20)
 
@@ -37,6 +37,11 @@ string(REPLACE "." "" PYTHON_MODULE_NAME "${PYTHON_MODULE_NAME}")
 
 include(GNUInstallDirs)
 set(PYTHON_MODULE_FILENAME_LIST "" CACHE INTERNAL "")
+
+if(CMAKE_BUILD_TYPE STREQUAL "Release")
+    add_custom_target(wheel ALL)
+endif()
+
 function(nvcv_python_add_module)
     cmake_parse_arguments(ARG "SHARED;MODULE" "TARGET;OUTPUT_NAME" "SOURCES" ${ARGV})
 
@@ -74,9 +79,14 @@ function(nvcv_python_add_module)
     set(PYTHON_MODULE_FILENAME_LIST
         "${PYTHON_MODULE_FILENAME_LIST};${prefix}${ARG_OUTPUT_NAME}${suffix}" CACHE INTERNAL "")
 
+    if(CMAKE_BUILD_TYPE STREQUAL "Release")
+        add_dependencies(wheel ${ARG_TARGET})
+    endif()
+
     install(TARGETS ${ARG_TARGET}
         LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/python
         COMPONENT ${PYTHON_MODULE_NAME}
+
 )
 endfunction()
 
@@ -91,3 +101,10 @@ string(JOIN " " PYTHON_MODULE_FILENAME_LIST ${PYTHON_MODULE_FILENAME_LIST})
 
 configure_file(cpack/debian_python_postinst.in cpack/postinst @ONLY)
 configure_file(cpack/debian_python_prerm.in cpack/prerm @ONLY)
+
+# Create Python wheel
+if(CMAKE_BUILD_TYPE STREQUAL "Release")
+    add_custom_command(
+        TARGET wheel
+        COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/build_wheels.sh" "${BUILD_ROOT}" ${PYTHON_VERSION_SHORT} )
+endif()
diff --git a/python/build_wheels.sh b/python/build_wheels.sh
new file mode 100755
index 000000000..ecc162092
--- /dev/null
+++ b/python/build_wheels.sh
@@ -0,0 +1,84 @@
+#!/bin/bash -e
+
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Creates the Python self contained wheels
+
+# Usage: build_wheels.sh [build_artifacts_dir] [python_versions]
+# Note: This script is automatically called by cmake/make. The proper way to
+# build python wheels is to issue the command:
+#
+# Do not run this script outside of cmake.
+
+set -e  # Stops this script if any one command fails.
+
+if [ "$#" -lt 2 ]; then
+    echo "Usage: build_wheels.sh <build_dir> [python_versions,...]"
+    exit 1
+fi
+
+BUILD_DIR=$(realpath "$1"); shift
+PY_VERSIONS=("$@")
+LIB_DIR="${BUILD_DIR}/lib"
+
+echo "BUILD_DIR: $BUILD_DIR"
+echo "Python Versions: ${PY_VERSIONS[*]}"
+
+for py_version in "${PY_VERSIONS[@]}"
+do
+    py_version_flat="${py_version//./}"  # Gets the non dotted version string
+    echo "Building Python wheels for: Python${py_version}"
+
+    # Step 1: Create a directories to store all wheels related files for this python version
+    py_dir="${BUILD_DIR}/python${py_version}"
+    wheel_dir="${py_dir}/wheel"
+    mkdir -p "${wheel_dir}"
+    rm -rf ${wheel_dir:?}/*
+    mkdir -p "${wheel_dir}/cvcuda.libs"
+
+    cd "${wheel_dir}"
+
+    # Step 2: Copy necessary .so files under one directory
+    # We will copy the target of the linked file and not the symlink only.
+    # Also the new file-name of the .so will be the actual so-name present inside the header of the .so
+    # This can be retrieved by using patchelf.
+    # This allows us to copy .so files without knowing their versions and also making sure they still
+    # work after copying.
+    # Copy the core .so files first
+    for so_file_name in libcvcuda.so libnvcv_types.so
+    do
+        cp -L "${LIB_DIR}/${so_file_name}" \
+            "${wheel_dir}/cvcuda.libs/`patchelf --print-soname "${LIB_DIR}/${so_file_name}"`"
+    done
+
+    # Copy the bindings .so files + patch them in their rpath.
+    # This allows the bindings to find the core .so files in a directory named cvcuda.libs only.
+    for so_file_path in ${LIB_DIR}/python/*.cpython-${py_version_flat}*.so
+    do
+        so_file_name=$(basename ${so_file_path})
+        cp -L "${so_file_path}" \
+            "${wheel_dir}/"
+
+        patchelf --force-rpath --set-rpath '$ORIGIN'/cvcuda.libs "${wheel_dir}/${so_file_name}"
+    done
+
+    # Step 3: Copy the setup.py corresponding to current python version to our wheels directory.
+    cp "${py_dir}/setup.py" "${wheel_dir}"
+
+    # Step 3: Create wheel
+    python${py_version} setup.py bdist_wheel --dist-dir="${wheel_dir}"
+
+done
diff --git a/python/mod_cvcuda/CMakeLists.txt b/python/mod_cvcuda/CMakeLists.txt
index 5db4089ac..45ecc94e0 100644
--- a/python/mod_cvcuda/CMakeLists.txt
+++ b/python/mod_cvcuda/CMakeLists.txt
@@ -42,6 +42,7 @@ nvcv_python_add_module(
         OpBoxBlur.cpp
         OpBrightnessContrast.cpp
         OpColorTwist.cpp
+        OpHQResize.cpp
         OsdElement.cpp
         OpRemap.cpp
         RemapMapValueType.cpp
diff --git a/python/mod_cvcuda/InterpolationType.cpp b/python/mod_cvcuda/InterpolationType.cpp
index 7b6c0fa10..eb1c934e1 100644
--- a/python/mod_cvcuda/InterpolationType.cpp
+++ b/python/mod_cvcuda/InterpolationType.cpp
@@ -30,6 +30,7 @@ void ExportInterpolationType(py::module &m)
         .value("AREA", NVCV_INTERP_AREA, "Area-based (resampling using pixels in area) interpolation")
         .value("LANCZOS", NVCV_INTERP_LANCZOS, "Lanczos interpolation")
         .value("WARP_INVERSE_MAP", NVCV_WARP_INVERSE_MAP, "Inverse transformation")
+        .value("GAUSSIAN", NVCV_INTERP_GAUSSIAN, "Gaussian interpolation")
         .value("HAMMING", NVCV_INTERP_HAMMING, "Hamming interpolation")
         .value("BOX", NVCV_INTERP_BOX, "Box interpolation")
         .def("__or__", [](NVCVInterpolationType e1, NVCVInterpolationType e2) { return int(e1) | int(e2); });
diff --git a/python/mod_cvcuda/Main.cpp b/python/mod_cvcuda/Main.cpp
index 226336f24..130d01680 100644
--- a/python/mod_cvcuda/Main.cpp
+++ b/python/mod_cvcuda/Main.cpp
@@ -106,6 +106,7 @@ PYBIND11_MODULE(cvcuda, m)
     ExportOpBoxBlur(m);
     ExportOpBrightnessContrast(m);
     ExportOpColorTwist(m);
+    ExportOpHQResize(m);
     ExportOpRemap(m);
     ExportOpCropFlipNormalizeReformat(m);
     ExportOpNonMaximumSuppression(m);
diff --git a/python/mod_cvcuda/OpAdaptiveThreshold.cpp b/python/mod_cvcuda/OpAdaptiveThreshold.cpp
index 6b2c17b18..30801fb5f 100644
--- a/python/mod_cvcuda/OpAdaptiveThreshold.cpp
+++ b/python/mod_cvcuda/OpAdaptiveThreshold.cpp
@@ -41,9 +41,9 @@ Tensor AdaptiveThresholdInto(Tensor &output, Tensor &input, double max_value, NV
     auto adaptiveThreshold = CreateOperator<cvcuda::AdaptiveThreshold>(block_size, 0);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*adaptiveThreshold});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*adaptiveThreshold});
 
     adaptiveThreshold->submit(pstream->cudaHandle(), input, output, max_value, adaptive_method, threshold_type,
                               block_size, c);
@@ -72,9 +72,9 @@ ImageBatchVarShape AdaptiveThresholdVarShapeInto(ImageBatchVarShape &output, Ima
     auto adaptiveThreshold = CreateOperator<cvcuda::AdaptiveThreshold>(max_block_size, input.capacity());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, max_value, block_size, c});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*adaptiveThreshold});
+    guard.add(LockMode::LOCK_MODE_READ, {input, max_value, block_size, c});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*adaptiveThreshold});
 
     adaptiveThreshold->submit(pstream->cudaHandle(), input, output, max_value, adaptive_method, threshold_type,
                               block_size, c);
diff --git a/python/mod_cvcuda/OpAdvCvtColor.cpp b/python/mod_cvcuda/OpAdvCvtColor.cpp
index f24337b75..c9a4eff2e 100644
--- a/python/mod_cvcuda/OpAdvCvtColor.cpp
+++ b/python/mod_cvcuda/OpAdvCvtColor.cpp
@@ -37,9 +37,9 @@ Tensor AdvCvtColorInto(Tensor &output, Tensor &input, NVCVColorConversionCode co
 
     auto          op = CreateOperator<cvcuda::AdvCvtColor>();
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
     op->submit(pstream->cudaHandle(), input, output, code, spec);
     return std::move(output);
 }
diff --git a/python/mod_cvcuda/OpAverageBlur.cpp b/python/mod_cvcuda/OpAverageBlur.cpp
index 74070cc65..dc37c337f 100644
--- a/python/mod_cvcuda/OpAverageBlur.cpp
+++ b/python/mod_cvcuda/OpAverageBlur.cpp
@@ -45,9 +45,9 @@ Tensor AverageBlurInto(Tensor &output, Tensor &input, const std::tuple<int, int>
     auto averageBlur = CreateOperator<cvcuda::AverageBlur>(kernelSizeArg, 0);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*averageBlur});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_WRITE, {*averageBlur});
 
     averageBlur->submit(pstream->cudaHandle(), input, output, kernelSizeArg, kernelAnchorArg, border);
 
@@ -76,9 +76,9 @@ ImageBatchVarShape AverageBlurVarShapeInto(ImageBatchVarShape &output, ImageBatc
     auto averageBlur = CreateOperator<cvcuda::AverageBlur>(maxKernelSizeArg, input.capacity());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, kernel_size, kernel_anchor});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*averageBlur});
+    guard.add(LockMode::LOCK_MODE_READ, {input, kernel_size, kernel_anchor});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*averageBlur});
 
     averageBlur->submit(pstream->cudaHandle(), input, output, kernel_size, kernel_anchor, border);
 
diff --git a/python/mod_cvcuda/OpBilateralFilter.cpp b/python/mod_cvcuda/OpBilateralFilter.cpp
index 4f5df728e..8e844351d 100644
--- a/python/mod_cvcuda/OpBilateralFilter.cpp
+++ b/python/mod_cvcuda/OpBilateralFilter.cpp
@@ -42,9 +42,9 @@ Tensor BilateralFilterInto(Tensor &output, Tensor &input, int diameter, float si
     auto bilateral_filter = CreateOperator<cvcuda::BilateralFilter>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*bilateral_filter});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*bilateral_filter});
 
     bilateral_filter->submit(pstream->cudaHandle(), input, output, diameter, sigmaColor, sigmaSpace, borderMode);
 
@@ -71,9 +71,9 @@ ImageBatchVarShape VarShapeBilateralFilterInto(ImageBatchVarShape &output, Image
     auto bilateral_filter = CreateOperator<cvcuda::BilateralFilter>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, diameter, sigmaColor, sigmaSpace});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*bilateral_filter});
+    guard.add(LockMode::LOCK_MODE_READ, {input, diameter, sigmaColor, sigmaSpace});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*bilateral_filter});
 
     bilateral_filter->submit(pstream->cudaHandle(), input, output, diameter, sigmaColor, sigmaSpace, borderMode);
 
diff --git a/python/mod_cvcuda/OpBndBox.cpp b/python/mod_cvcuda/OpBndBox.cpp
index a446347f9..1551832f7 100644
--- a/python/mod_cvcuda/OpBndBox.cpp
+++ b/python/mod_cvcuda/OpBndBox.cpp
@@ -36,9 +36,9 @@ Tensor BndBoxInto(Tensor &output, Tensor &input, NVCVBndBoxesI bboxes, std::opti
     auto op = CreateOperator<cvcuda::BndBox>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     op->submit(pstream->cudaHandle(), input, output, bboxes);
 
diff --git a/python/mod_cvcuda/OpBoxBlur.cpp b/python/mod_cvcuda/OpBoxBlur.cpp
index 747bf9740..2c1b21dab 100644
--- a/python/mod_cvcuda/OpBoxBlur.cpp
+++ b/python/mod_cvcuda/OpBoxBlur.cpp
@@ -36,9 +36,9 @@ Tensor BoxBlurInto(Tensor &output, Tensor &input, NVCVBlurBoxesI bboxes, std::op
     auto op = CreateOperator<cvcuda::BoxBlur>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     op->submit(pstream->cudaHandle(), input, output, bboxes);
 
diff --git a/python/mod_cvcuda/OpBrightnessContrast.cpp b/python/mod_cvcuda/OpBrightnessContrast.cpp
index b7921850a..f0c106dd9 100644
--- a/python/mod_cvcuda/OpBrightnessContrast.cpp
+++ b/python/mod_cvcuda/OpBrightnessContrast.cpp
@@ -58,16 +58,16 @@ auto runGuard(Op &op, Src &src, Dst &dst, std::optional<Tensor> &brightness, std
     }
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {src});
+    guard.add(LockMode::LOCK_MODE_READ, {src});
     for (auto &arg : {brightness, contrast, brightnessShift, contrastCenter})
     {
         if (arg)
         {
-            guard.add(LockMode::LOCK_READ, {*arg});
+            guard.add(LockMode::LOCK_MODE_READ, {*arg});
         }
     }
-    guard.add(LockMode::LOCK_WRITE, {dst});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_WRITE, {dst});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     call(*pstream, brightness ? *brightness : nvcv::Tensor{nullptr}, contrast ? *contrast : nvcv::Tensor{nullptr},
          brightnessShift ? *brightnessShift : nvcv::Tensor{nullptr},
diff --git a/python/mod_cvcuda/OpCenterCrop.cpp b/python/mod_cvcuda/OpCenterCrop.cpp
index c8eea222b..259928511 100644
--- a/python/mod_cvcuda/OpCenterCrop.cpp
+++ b/python/mod_cvcuda/OpCenterCrop.cpp
@@ -44,9 +44,9 @@ Tensor CenterCropInto(Tensor &output, Tensor &input, const std::tuple<int, int>
     auto center_crop = CreateOperator<cvcuda::CenterCrop>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*center_crop});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*center_crop});
 
     nvcv::Size2D cropSizeArg{std::get<0>(cropSize), std::get<1>(cropSize)};
 
diff --git a/python/mod_cvcuda/OpChannelReorder.cpp b/python/mod_cvcuda/OpChannelReorder.cpp
index 8bc15732a..653dd359e 100644
--- a/python/mod_cvcuda/OpChannelReorder.cpp
+++ b/python/mod_cvcuda/OpChannelReorder.cpp
@@ -44,9 +44,9 @@ ImageBatchVarShape ChannelReorderVarShapeInto(ImageBatchVarShape &output, ImageB
     auto chReorder = CreateOperator<cvcuda::ChannelReorder>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, orders});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*chReorder});
+    guard.add(LockMode::LOCK_MODE_READ, {input, orders});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*chReorder});
 
     chReorder->submit(pstream->cudaHandle(), input, output, orders);
 
diff --git a/python/mod_cvcuda/OpColorTwist.cpp b/python/mod_cvcuda/OpColorTwist.cpp
index c37ee3069..54c44404e 100644
--- a/python/mod_cvcuda/OpColorTwist.cpp
+++ b/python/mod_cvcuda/OpColorTwist.cpp
@@ -56,9 +56,9 @@ auto runGuard(Op &op, Src &src, Dst &dst, const Tensor &twist, std::optional<Str
     }
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {src, twist});
-    guard.add(LockMode::LOCK_WRITE, {dst});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {src, twist});
+    guard.add(LockMode::LOCK_MODE_WRITE, {dst});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     call(*pstream);
 }
diff --git a/python/mod_cvcuda/OpComposite.cpp b/python/mod_cvcuda/OpComposite.cpp
index 66e30c4f9..935ff4556 100644
--- a/python/mod_cvcuda/OpComposite.cpp
+++ b/python/mod_cvcuda/OpComposite.cpp
@@ -42,9 +42,9 @@ Tensor CompositeInto(Tensor &output, Tensor &foreground, Tensor &background, Ten
     auto composite = CreateOperator<cvcuda::Composite>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {foreground, background, fgMask});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*composite});
+    guard.add(LockMode::LOCK_MODE_READ, {foreground, background, fgMask});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*composite});
 
     composite->submit(pstream->cudaHandle(), foreground, background, fgMask, output);
 
@@ -73,9 +73,9 @@ ImageBatchVarShape CompositeVarShapeInto(ImageBatchVarShape &output, ImageBatchV
     auto composite = CreateOperator<cvcuda::Composite>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {foreground, background, fgMask});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*composite});
+    guard.add(LockMode::LOCK_MODE_READ, {foreground, background, fgMask});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*composite});
 
     composite->submit(pstream->cudaHandle(), foreground, background, fgMask, output);
 
diff --git a/python/mod_cvcuda/OpConv2D.cpp b/python/mod_cvcuda/OpConv2D.cpp
index 8fc0ab9fd..41d6f64c2 100644
--- a/python/mod_cvcuda/OpConv2D.cpp
+++ b/python/mod_cvcuda/OpConv2D.cpp
@@ -44,9 +44,9 @@ ImageBatchVarShape Conv2DVarShapeInto(ImageBatchVarShape &output, ImageBatchVarS
     auto conv2D = CreateOperator<cvcuda::Conv2D>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, kernel, kernel_anchor});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*conv2D});
+    guard.add(LockMode::LOCK_MODE_READ, {input, kernel, kernel_anchor});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*conv2D});
 
     conv2D->submit(pstream->cudaHandle(), input, output, kernel, kernel_anchor, border);
 
diff --git a/python/mod_cvcuda/OpConvertTo.cpp b/python/mod_cvcuda/OpConvertTo.cpp
index f60759029..767c54fcd 100644
--- a/python/mod_cvcuda/OpConvertTo.cpp
+++ b/python/mod_cvcuda/OpConvertTo.cpp
@@ -36,9 +36,9 @@ Tensor ConvertToInto(Tensor &output, Tensor &input, float scale, float offset, s
     auto cvt = CreateOperator<cvcuda::ConvertTo>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*cvt});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*cvt});
 
     cvt->submit(pstream->cudaHandle(), input, output, scale, offset);
 
diff --git a/python/mod_cvcuda/OpCopyMakeBorder.cpp b/python/mod_cvcuda/OpCopyMakeBorder.cpp
index 6711948be..8a3075699 100644
--- a/python/mod_cvcuda/OpCopyMakeBorder.cpp
+++ b/python/mod_cvcuda/OpCopyMakeBorder.cpp
@@ -55,9 +55,9 @@ Tensor CopyMakeBorderInto(Tensor &output, Tensor &input, NVCVBorderType borderMo
     auto copyMakeBorder = CreateOperator<cvcuda::CopyMakeBorder>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*copyMakeBorder});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*copyMakeBorder});
 
     copyMakeBorder->submit(pstream->cudaHandle(), input, output, top, left, borderMode, bValue);
 
@@ -101,9 +101,9 @@ Tensor VarShapeCopyMakeBorderStackInto(Tensor &output, ImageBatchVarShape &input
     auto copyMakeBorder = CreateOperator<cvcuda::CopyMakeBorder>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, top, left});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*copyMakeBorder});
+    guard.add(LockMode::LOCK_MODE_READ, {input, top, left});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*copyMakeBorder});
 
     copyMakeBorder->submit(pstream->cudaHandle(), input, output, top, left, borderMode, bValue);
 
@@ -149,9 +149,9 @@ ImageBatchVarShape VarShapeCopyMakeBorderInto(ImageBatchVarShape &output, ImageB
     auto copyMakeBorder = CreateOperator<cvcuda::CopyMakeBorder>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, top, left});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*copyMakeBorder});
+    guard.add(LockMode::LOCK_MODE_READ, {input, top, left});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*copyMakeBorder});
 
     copyMakeBorder->submit(pstream->cudaHandle(), input, output, top, left, borderMode, bValue);
 
diff --git a/python/mod_cvcuda/OpCropFlipNormalizeReformat.cpp b/python/mod_cvcuda/OpCropFlipNormalizeReformat.cpp
index bfae999b7..1eaacfce5 100644
--- a/python/mod_cvcuda/OpCropFlipNormalizeReformat.cpp
+++ b/python/mod_cvcuda/OpCropFlipNormalizeReformat.cpp
@@ -52,9 +52,9 @@ Tensor CropFlipNormalizeReformatInto(Tensor &output, ImageBatchVarShape &input,
     }
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, cropRect, flipCode, base, scale});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input, cropRect, flipCode, base, scale});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
     op->submit(pstream->cudaHandle(), input, output, cropRect, borderMode, borderValue, flipCode, base, scale,
                globalScale, globalShift, epsilon, *flags);
 
diff --git a/python/mod_cvcuda/OpCustomCrop.cpp b/python/mod_cvcuda/OpCustomCrop.cpp
index af6b2ff80..c448eccda 100644
--- a/python/mod_cvcuda/OpCustomCrop.cpp
+++ b/python/mod_cvcuda/OpCustomCrop.cpp
@@ -38,9 +38,9 @@ Tensor CustomCropInto(Tensor &output, Tensor &input, const NVCVRectI &rcCrop, st
     auto crop = CreateOperator<cvcuda::CustomCrop>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*crop});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*crop});
 
     crop->submit(pstream->cudaHandle(), input, output, rcCrop);
 
diff --git a/python/mod_cvcuda/OpCvtColor.cpp b/python/mod_cvcuda/OpCvtColor.cpp
index 3b8eb883a..39118b477 100644
--- a/python/mod_cvcuda/OpCvtColor.cpp
+++ b/python/mod_cvcuda/OpCvtColor.cpp
@@ -45,9 +45,9 @@ Tensor CvtColorInto(Tensor &output, Tensor &input, NVCVColorConversionCode code,
     auto cvtColor = CreateOperator<cvcuda::CvtColor>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*cvtColor});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*cvtColor});
 
     cvtColor->submit(pstream->cudaHandle(), input, output, code);
 
@@ -89,9 +89,9 @@ ImageBatchVarShape CvtColorVarShapeInto(ImageBatchVarShape &output, ImageBatchVa
     auto cvtColor = CreateOperator<cvcuda::CvtColor>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*cvtColor});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {input});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*cvtColor});
 
     cvtColor->submit(pstream->cudaHandle(), input, output, code);
 
diff --git a/python/mod_cvcuda/OpErase.cpp b/python/mod_cvcuda/OpErase.cpp
index e73022967..7f7503e26 100644
--- a/python/mod_cvcuda/OpErase.cpp
+++ b/python/mod_cvcuda/OpErase.cpp
@@ -47,9 +47,9 @@ Tensor EraseInto(Tensor &output, Tensor &input, Tensor &anchor, Tensor &erasing,
     auto erase = CreateOperator<cvcuda::Erase>((int)shape[0]);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, anchor, erasing, values, imgIdx});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*erase});
+    guard.add(LockMode::LOCK_MODE_READ, {input, anchor, erasing, values, imgIdx});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*erase});
 
     erase->submit(pstream->cudaHandle(), input, output, anchor, erasing, values, imgIdx, random, seed);
 
@@ -83,9 +83,9 @@ ImageBatchVarShape EraseVarShapeInto(ImageBatchVarShape &output, ImageBatchVarSh
     auto erase = CreateOperator<cvcuda::Erase>((int)shape[0]);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, anchor, erasing, values, imgIdx});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*erase});
+    guard.add(LockMode::LOCK_MODE_READ, {input, anchor, erasing, values, imgIdx});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*erase});
 
     erase->submit(pstream->cudaHandle(), input, output, anchor, erasing, values, imgIdx, random, seed);
 
diff --git a/python/mod_cvcuda/OpFindContours.cpp b/python/mod_cvcuda/OpFindContours.cpp
index 5202905b0..137bf645f 100644
--- a/python/mod_cvcuda/OpFindContours.cpp
+++ b/python/mod_cvcuda/OpFindContours.cpp
@@ -46,10 +46,10 @@ TupleTensor2 FindContoursInto(Tensor &points, Tensor &numPoints, Tensor &input,
     auto         findContours = CreateOperator<cvcuda::FindContours>(size, static_cast<int32_t>(input.shape()[0]));
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {points});
-    guard.add(LockMode::LOCK_WRITE, {numPoints});
-    guard.add(LockMode::LOCK_WRITE, {*findContours});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {points});
+    guard.add(LockMode::LOCK_MODE_WRITE, {numPoints});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*findContours});
 
     findContours->submit(pstream->cudaHandle(), input, points, numPoints);
 
diff --git a/python/mod_cvcuda/OpFindHomography.cpp b/python/mod_cvcuda/OpFindHomography.cpp
index 125535981..3560cc91f 100644
--- a/python/mod_cvcuda/OpFindHomography.cpp
+++ b/python/mod_cvcuda/OpFindHomography.cpp
@@ -151,9 +151,10 @@ Tensor FindHomographyInto(Tensor &models, Tensor &srcPts, Tensor &dstPts, std::o
     auto findHomography = CreateOperatorEx<PyOpFindHomography>(batchSize, numPoints);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {srcPts});
-    guard.add(LockMode::LOCK_READ, {dstPts});
-    guard.add(LockMode::LOCK_WRITE, {models});
+    guard.add(LockMode::LOCK_MODE_READ, {srcPts});
+    guard.add(LockMode::LOCK_MODE_READ, {dstPts});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {models});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*findHomography});
 
     findHomography->submit(pstream->cudaHandle(), srcPts, dstPts, models);
 
@@ -194,9 +195,10 @@ TensorBatch VarShapeFindHomographyInto(TensorBatch &models, TensorBatch &srcPts,
     auto findHomography = CreateOperatorEx<PyOpFindHomography>(batchSize, maxNumPoints);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {srcPts});
-    guard.add(LockMode::LOCK_READ, {dstPts});
-    guard.add(LockMode::LOCK_WRITE, {models});
+    guard.add(LockMode::LOCK_MODE_READ, {srcPts});
+    guard.add(LockMode::LOCK_MODE_READ, {dstPts});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {models});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*findHomography});
 
     findHomography->submit(pstream->cudaHandle(), srcPts, dstPts, models);
 
diff --git a/python/mod_cvcuda/OpFlip.cpp b/python/mod_cvcuda/OpFlip.cpp
index e63dec27b..72dce09d3 100644
--- a/python/mod_cvcuda/OpFlip.cpp
+++ b/python/mod_cvcuda/OpFlip.cpp
@@ -41,9 +41,9 @@ Tensor FlipInto(Tensor &output, Tensor &input, int32_t flipCode, std::optional<S
     auto Flip = CreateOperator<cvcuda::Flip>(0);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*Flip});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*Flip});
 
     Flip->submit(pstream->cudaHandle(), input, output, flipCode);
 
@@ -68,9 +68,9 @@ ImageBatchVarShape FlipVarShapeInto(ImageBatchVarShape &output, ImageBatchVarSha
     auto flip = CreateOperator<cvcuda::Flip>(0);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, flipCode});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*flip});
+    guard.add(LockMode::LOCK_MODE_READ, {input, flipCode});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*flip});
 
     flip->submit(pstream->cudaHandle(), input, output, flipCode);
 
diff --git a/python/mod_cvcuda/OpGammaContrast.cpp b/python/mod_cvcuda/OpGammaContrast.cpp
index 85a644916..8df72480b 100644
--- a/python/mod_cvcuda/OpGammaContrast.cpp
+++ b/python/mod_cvcuda/OpGammaContrast.cpp
@@ -42,9 +42,9 @@ ImageBatchVarShape VarShapeGammaContrastInto(ImageBatchVarShape &output, ImageBa
     auto gamma_contrast = CreateOperator<cvcuda::GammaContrast>(input.capacity(), input.uniqueFormat().numChannels());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, gamma});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*gamma_contrast});
+    guard.add(LockMode::LOCK_MODE_READ, {input, gamma});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*gamma_contrast});
 
     gamma_contrast->submit(pstream->cudaHandle(), input, output, gamma);
 
diff --git a/python/mod_cvcuda/OpGaussian.cpp b/python/mod_cvcuda/OpGaussian.cpp
index 89634c79a..fdf9de806 100644
--- a/python/mod_cvcuda/OpGaussian.cpp
+++ b/python/mod_cvcuda/OpGaussian.cpp
@@ -46,9 +46,9 @@ Tensor GaussianInto(Tensor &output, Tensor &input, const std::tuple<int, int> &k
     auto gaussian = CreateOperator<cvcuda::Gaussian>(kernelSizeArg, 0);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*gaussian});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*gaussian});
 
     gaussian->submit(pstream->cudaHandle(), input, output, kernelSizeArg, sigmaArg, border);
 
@@ -77,9 +77,9 @@ ImageBatchVarShape VarShapeGaussianInto(ImageBatchVarShape &output, ImageBatchVa
     auto gaussian = CreateOperator<cvcuda::Gaussian>(maxKernelSizeArg, input.capacity());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, ksize, sigma});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*gaussian});
+    guard.add(LockMode::LOCK_MODE_READ, {input, ksize, sigma});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*gaussian});
 
     gaussian->submit(pstream->cudaHandle(), input, output, ksize, sigma, border);
 
diff --git a/python/mod_cvcuda/OpGaussianNoise.cpp b/python/mod_cvcuda/OpGaussianNoise.cpp
index 255572851..94280ceba 100644
--- a/python/mod_cvcuda/OpGaussianNoise.cpp
+++ b/python/mod_cvcuda/OpGaussianNoise.cpp
@@ -41,9 +41,9 @@ Tensor GaussianNoiseInto(Tensor &output, Tensor &input, Tensor &mu, Tensor &sigm
     auto              gaussiannoise = CreateOperator<cvcuda::GaussianNoise>((int)shape[0]);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, mu, sigma});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*gaussiannoise});
+    guard.add(LockMode::LOCK_MODE_READ, {input, mu, sigma});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*gaussiannoise});
 
     gaussiannoise->submit(pstream->cudaHandle(), input, output, mu, sigma, per_channel, seed);
 
@@ -70,9 +70,9 @@ ImageBatchVarShape GaussianNoiseVarShapeInto(ImageBatchVarShape &output, ImageBa
     auto gaussiannoise = CreateOperator<cvcuda::GaussianNoise>(input.numImages());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, mu, sigma});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*gaussiannoise});
+    guard.add(LockMode::LOCK_MODE_READ, {input, mu, sigma});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*gaussiannoise});
 
     gaussiannoise->submit(pstream->cudaHandle(), input, output, mu, sigma, per_channel, seed);
 
diff --git a/python/mod_cvcuda/OpHQResize.cpp b/python/mod_cvcuda/OpHQResize.cpp
new file mode 100644
index 000000000..295771013
--- /dev/null
+++ b/python/mod_cvcuda/OpHQResize.cpp
@@ -0,0 +1,761 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Operators.hpp"
+#include "WorkspaceCache.hpp"
+
+#include <common/PyUtil.hpp>
+#include <common/String.hpp>
+#include <cvcuda/OpHQResize.hpp>
+#include <nvcv/TensorDataAccess.hpp>
+#include <nvcv/python/ImageBatchVarShape.hpp>
+#include <nvcv/python/ImageFormat.hpp>
+#include <nvcv/python/ResourceGuard.hpp>
+#include <nvcv/python/Stream.hpp>
+#include <nvcv/python/Tensor.hpp>
+#include <nvcv/python/TensorBatch.hpp>
+
+namespace cvcudapy {
+
+namespace {
+
+using Roi  = pybind11::tuple;
+using Rois = std::vector<Roi>;
+
+inline void GetMinMagInterpolation(NVCVInterpolationType                      &minInterpolationArg,
+                                   NVCVInterpolationType                      &magInterpolationArg,
+                                   const std::optional<NVCVInterpolationType> &interpolation,
+                                   const std::optional<NVCVInterpolationType> &minInterpolation,
+                                   const std::optional<NVCVInterpolationType> &magInterpolation)
+{
+    if (interpolation)
+    {
+        if (minInterpolation || magInterpolation)
+        {
+            throw py::value_error(
+                "When `interpolation` is specified, the `min_interpolation` and `mag_interpolation` should not be "
+                "specified.");
+        }
+        minInterpolationArg = magInterpolationArg = *interpolation;
+    }
+    else
+    {
+        if (!minInterpolation || !magInterpolation)
+        {
+            throw py::value_error(
+                "Either `interpolation`, or both `min_interpolation` and `mag_interpolation` must be specified.");
+        }
+        minInterpolationArg = *minInterpolation;
+        magInterpolationArg = *magInterpolation;
+    }
+}
+
+inline void ParseRoi(HQResizeRoiF &parsedRoi, const Roi &roi, int ndim)
+{
+    assert(ndim == 2 || ndim == 3);
+    auto roiSize = roi.size();
+    if (roiSize != static_cast<decltype(roiSize)>(2 * ndim))
+    {
+        if (ndim == 2)
+        {
+            throw std::runtime_error(
+                "Got wrong number of ROI components. For image resize, 4 integers are expected: "
+                "low_height, low_width, high_height, high_width describing the bounding box for "
+                "the input.");
+        }
+        else
+        {
+            throw std::runtime_error(
+                "Got wrong number of ROI components. For volumetric data, 6 integers are expected: "
+                "low_depth, low_height, low_width, high_depth, high_height, high_width "
+                "describing the bounding box for the input.");
+        }
+    }
+    for (int d = 0; d < ndim; d++)
+    {
+        parsedRoi.lo[d] = roi[d].cast<float>();
+    }
+    for (int d = 0; d < ndim; d++)
+    {
+        parsedRoi.hi[d] = roi[ndim + d].cast<float>();
+    }
+}
+
+class RoiHelper
+{
+public:
+    RoiHelper(const std::optional<Rois> &maybeRois, int ndim)
+        : m_ndim{ndim}
+    {
+        if (maybeRois)
+        {
+            auto &rois = *maybeRois;
+            m_rois.resize(rois.size());
+            for (uint64_t i = 0; i < rois.size(); i++)
+            {
+                auto &roi       = m_rois[i];
+                auto &passedRoi = rois[i];
+                ParseRoi(roi, passedRoi, ndim);
+            }
+        }
+    }
+
+    RoiHelper(const std::optional<Roi> &maybeRoi, int ndim)
+        : m_ndim{ndim}
+    {
+        if (maybeRoi)
+        {
+            m_rois.resize(1);
+            ParseRoi(m_rois[0], *maybeRoi, ndim);
+        }
+    }
+
+    HQResizeRoisF NonOwningHandle()
+    {
+        int32_t       size = m_rois.size();
+        HQResizeRoiF *data = size == 0 ? nullptr : m_rois.data();
+        return {size, m_ndim, data};
+    }
+
+private:
+    int                       m_ndim;
+    std::vector<HQResizeRoiF> m_rois;
+};
+
+inline HQResizeTensorShapeI TensorShape(const nvcv::TensorLayout &layout, const nvcv::TensorShape &shape,
+                                        int resizeNDim)
+{
+    assert(resizeNDim == 2 || resizeNDim == 3);
+
+    char                 shapeArgLayout[4] = "DHW";
+    HQResizeTensorShapeI tensorShape;
+    for (int d = 0; d < resizeNDim; d++)
+    {
+        int axis = layout.find(shapeArgLayout[d + 3 - resizeNDim]);
+        if (axis < 0)
+        {
+            throw std::runtime_error(
+                "The layout of an input tensor to the resize operator must contain HW extents in the layout (for "
+                "images) or DHW extents (for 3D resampling). Some extents are missing in the input tensor.");
+        }
+        tensorShape.extent[d] = shape[axis];
+    }
+    int channelAxis         = layout.find('C');
+    tensorShape.numChannels = channelAxis < 0 ? 1 : shape[channelAxis];
+    tensorShape.ndim        = resizeNDim;
+    return tensorShape;
+}
+
+class BatchShapesHelper
+{
+public:
+    BatchShapesHelper(const nvcv::ImageBatchVarShape &batch)
+    {
+        int32_t numSamples = batch.numImages();
+        m_shapes.resize(numSamples);
+        m_ndim        = 2;
+        m_numChannels = batch.uniqueFormat().numChannels();
+        for (int i = 0; i < numSamples; i++)
+        {
+            const auto &imgShape = batch[i].size();
+            auto       &shape    = m_shapes[i];
+            shape.extent[0]      = imgShape.h;
+            shape.extent[1]      = imgShape.w;
+        }
+    }
+
+    BatchShapesHelper(const TensorBatch &batch)
+    {
+        int32_t numSamples = batch.numTensors();
+        auto    layout     = batch.layout();
+        bool    hasDepth   = layout.find('D') >= 0;
+        m_ndim             = hasDepth ? 3 : 2;
+        m_numChannels      = -1;
+        m_shapes.resize(numSamples);
+        for (int i = 0; i < numSamples; i++)
+        {
+            const auto &tensor = batch[i];
+            m_shapes[i]        = TensorShape(layout, tensor.shape(), m_ndim);
+            if (i == 0)
+            {
+                m_numChannels = m_shapes[i].numChannels;
+            }
+            else if (m_numChannels != m_shapes[i].numChannels)
+            {
+                m_numChannels = -1;
+            }
+        }
+    }
+
+    HQResizeTensorShapesI NonOwningHandle()
+    {
+        int32_t size = m_shapes.size();
+        return {size ? m_shapes.data() : nullptr, size, m_ndim, m_numChannels};
+    }
+
+private:
+    int32_t                           m_ndim;
+    int32_t                           m_numChannels;
+    std::vector<HQResizeTensorShapeI> m_shapes;
+};
+
+inline Shape ResizedTensorShape(const nvcv::TensorLayout &srcLayout, const nvcv::TensorShape &srcShape,
+                                const Shape &outShape)
+{
+    int resizeNDim = outShape.size();
+    if (resizeNDim != 2 && resizeNDim != 3)
+    {
+        throw std::runtime_error(
+            "The `out_shape` must be a tuple of 2 or 3 integers (for 2D or 3D resampling respectively).");
+    }
+
+    bool hasDepth     = srcLayout.find('D') >= 0;
+    int  expectedNDim = hasDepth ? 3 : 2;
+
+    if (expectedNDim != resizeNDim)
+    {
+        if (hasDepth)
+        {
+            throw std::runtime_error(
+                "The input tensor contains depth extent (`D`) in the layout. For 3D resize, please specify the resized "
+                "shape for 3 extents: depth, height, and width. Got 2 extents.");
+        }
+        else
+        {
+            throw std::runtime_error(
+                "Expected the resized shape to consists of 2 integers: for resized height and width. Got 3 integers.");
+        }
+    }
+
+    char shapeArgLayout[4] = "DHW";
+    int  shapeArg[3];
+    for (int d = 0; d < resizeNDim; d++)
+    {
+        shapeArg[d] = outShape[d].cast<int>();
+    }
+
+    Shape resizedShape(srcShape.rank());
+    for (int i = 0; i < srcShape.rank(); i++)
+    {
+        resizedShape[i] = srcShape[i];
+    }
+
+    assert(srcShape.rank() == srcLayout.rank());
+    for (int d = 0; d < resizeNDim; d++)
+    {
+        int axis = srcLayout.find(shapeArgLayout[d + 3 - resizeNDim]);
+        if (axis < 0)
+        {
+            throw std::runtime_error(
+                "The layout of an input tensor to the resize operator must contain HW extents in the layout (for "
+                "images) or DHW extents (for 3D resampling). Some extents are missing in the input tensor.");
+        }
+        resizedShape[axis] = shapeArg[d];
+    }
+    return resizedShape;
+}
+
+class PyOpHQResize : public nvcvpy::Container
+{
+public:
+    // Define a Key class to be used by the cache to fetch similar items for potential reuse.
+    class Key : public nvcvpy::IKey
+    {
+    public:
+        // the filters are generated by the operator constructor for a given device
+        Key(int deviceId)
+            : m_deviceId{deviceId}
+        {
+        }
+
+    private:
+        size_t doGetHash() const override
+        {
+            return m_deviceId;
+        }
+
+        bool doIsCompatible(const nvcvpy::IKey &that_) const override
+        {
+            const Key *thatKey = dynamic_cast<const Key *>(&that_);
+            return thatKey != nullptr && thatKey->m_deviceId == m_deviceId;
+        }
+
+        int m_deviceId;
+    };
+
+    PyOpHQResize(int deviceId)
+        : m_key(deviceId)
+        , m_op()
+    {
+    }
+
+    void submit(cudaStream_t stream, const Tensor &in, const Tensor &out, const NVCVInterpolationType minInterpolation,
+                const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoiF *roi)
+    {
+        if (in.layout() != out.layout())
+        {
+            throw std::runtime_error("Input and output tensors must have the same layout");
+        }
+
+        int resizeNDim = in.layout().find('D') >= 0 ? 3 : 2;
+
+        auto inAccess = nvcv::TensorDataAccessStridedImagePlanar::Create(in.exportData());
+        if (!inAccess)
+        {
+            throw std::runtime_error("Incompatible input tensor layout");
+        }
+
+        auto outAccess = nvcv::TensorDataAccessStridedImagePlanar::Create(out.exportData());
+        if (!outAccess)
+        {
+            throw std::runtime_error("Incompatible input tensor layout");
+        }
+
+        int                  numSamples = inAccess->numSamples();
+        HQResizeTensorShapeI inShape    = TensorShape(in.layout(), in.shape(), resizeNDim);
+        HQResizeTensorShapeI outShape   = TensorShape(out.layout(), out.shape(), resizeNDim);
+
+        auto req = m_op.getWorkspaceRequirements(numSamples, inShape, outShape, minInterpolation, magInterpolation,
+                                                 antialias, roi);
+        auto ws  = WorkspaceCache::instance().get(req, stream);
+        m_op(stream, ws.get(), in, out, minInterpolation, magInterpolation, antialias, roi);
+    }
+
+    void submit(cudaStream_t stream, const nvcv::ImageBatchVarShape &in, const nvcv::ImageBatchVarShape &out,
+                const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation,
+                bool antialias, const HQResizeRoisF rois)
+    {
+        BatchShapesHelper inShapes(in);
+        BatchShapesHelper outShapes(out);
+        auto              req
+            = m_op.getWorkspaceRequirements(in.numImages(), inShapes.NonOwningHandle(), outShapes.NonOwningHandle(),
+                                            minInterpolation, magInterpolation, antialias, rois);
+        auto ws = WorkspaceCache::instance().get(req, stream);
+        m_op(stream, ws.get(), in, out, minInterpolation, magInterpolation, antialias, rois);
+    }
+
+    void submit(cudaStream_t stream, const TensorBatch &in, const TensorBatch &out,
+                const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation,
+                bool antialias, const HQResizeRoisF rois)
+    {
+        if (in.layout() != out.layout())
+        {
+            throw std::runtime_error("Input and output batches must have the same layout");
+        }
+        BatchShapesHelper inShapes(in);
+        BatchShapesHelper outShapes(out);
+        auto              req
+            = m_op.getWorkspaceRequirements(in.numTensors(), inShapes.NonOwningHandle(), outShapes.NonOwningHandle(),
+                                            minInterpolation, magInterpolation, antialias, rois);
+        auto ws = WorkspaceCache::instance().get(req, stream);
+        m_op(stream, ws.get(), in, out, minInterpolation, magInterpolation, antialias, rois);
+    }
+
+    // Required override to get the py object container.
+    py::object container() const override
+    {
+        return *this;
+    }
+
+    // Required override to get the key as the base interface class.
+    const nvcvpy::IKey &key() const override
+    {
+        return m_key;
+    }
+
+    static std::shared_ptr<nvcvpy::ICacheItem> fetch(std::vector<std::shared_ptr<nvcvpy::ICacheItem>> &cache)
+    {
+        assert(!cache.empty());
+        return cache[0];
+    }
+
+private:
+    Key              m_key;
+    cvcuda::HQResize m_op;
+};
+
+template<typename Op, typename Src, typename Dst, typename Call>
+auto RunGuard(Op &op, Src &src, Dst &dst, Stream &stream, Call &&call)
+{
+    ResourceGuard guard(stream);
+    guard.add(LockMode::LOCK_MODE_READ, {src});
+    guard.add(LockMode::LOCK_MODE_WRITE, {dst});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
+
+    call();
+}
+
+auto CreatePyOpHQResize()
+{
+    int deviceId;
+    NVCV_CHECK_THROW(cudaGetDevice(&deviceId));
+    return CreateOperatorEx<PyOpHQResize>(deviceId);
+}
+
+Tensor TensorHQResizeInto(Tensor &dst, Tensor &src, std::optional<bool> antialias, std::optional<Roi> maybeRoi,
+                          std::optional<NVCVInterpolationType> interpolation,
+                          std::optional<NVCVInterpolationType> minInterpolation,
+                          std::optional<NVCVInterpolationType> magInterpolation, std::optional<Stream> pstream)
+{
+    Stream stream = pstream ? *pstream : Stream::Current();
+    auto   op     = CreatePyOpHQResize();
+
+    bool                hasDepth   = src.layout().find('D') >= 0;
+    int                 resizeNDim = hasDepth ? 3 : 2;
+    RoiHelper           parsedRoi(maybeRoi, resizeNDim);
+    const HQResizeRoiF *roi = parsedRoi.NonOwningHandle().roi;
+
+    NVCVInterpolationType minInterpolationArg, magInterpolationArg;
+    GetMinMagInterpolation(minInterpolationArg, magInterpolationArg, interpolation, minInterpolation, magInterpolation);
+
+    RunGuard(op, src, dst, stream,
+             [&]()
+             {
+                 op->submit(stream.cudaHandle(), src, dst, minInterpolationArg, magInterpolationArg,
+                            antialias.value_or(false), roi);
+             });
+    return dst;
+}
+
+Tensor TensorHQResize(Tensor &src, const Shape &outShape, std::optional<bool> antialias, std::optional<Roi> roi,
+                      std::optional<NVCVInterpolationType> interpolation,
+                      std::optional<NVCVInterpolationType> minInterpolation,
+                      std::optional<NVCVInterpolationType> magInterpolation, std::optional<Stream> pstream)
+{
+    auto   resizedShape = ResizedTensorShape(src.layout(), src.shape(), outShape);
+    Tensor dst          = Tensor::Create(resizedShape, src.dtype(), src.layout());
+    return TensorHQResizeInto(dst, src, antialias, roi, interpolation, minInterpolation, magInterpolation, pstream);
+}
+
+ImageBatchVarShape VarShapeHQResizeInto(ImageBatchVarShape &dst, const ImageBatchVarShape &src,
+                                        std::optional<bool> antialias, const std::optional<Rois> &roi,
+                                        std::optional<NVCVInterpolationType> interpolation,
+                                        std::optional<NVCVInterpolationType> minInterpolation,
+                                        std::optional<NVCVInterpolationType> magInterpolation,
+                                        std::optional<Stream>                pstream)
+{
+    Stream stream = pstream ? *pstream : Stream::Current();
+    auto   op     = CreatePyOpHQResize();
+
+    RoiHelper             parsedRoi(roi, 2);
+    NVCVInterpolationType minInterpolationArg, magInterpolationArg;
+    GetMinMagInterpolation(minInterpolationArg, magInterpolationArg, interpolation, minInterpolation, magInterpolation);
+
+    RunGuard(op, src, dst, stream,
+             [&]()
+             {
+                 op->submit(stream.cudaHandle(), src, dst, minInterpolationArg, magInterpolationArg,
+                            antialias.value_or(false), parsedRoi.NonOwningHandle());
+             });
+    return dst;
+}
+
+ImageBatchVarShape VarShapeHQResize(ImageBatchVarShape &src, const std::vector<std::tuple<int, int>> &outShape,
+                                    std::optional<bool> antialias, const std::optional<Rois> &roi,
+                                    std::optional<NVCVInterpolationType> interpolation,
+                                    std::optional<NVCVInterpolationType> minInterpolation,
+                                    std::optional<NVCVInterpolationType> magInterpolation,
+                                    std::optional<Stream>                pstream)
+{
+    ImageBatchVarShape out = ImageBatchVarShape::Create(src.capacity());
+
+    int32_t numOutSizes = outShape.size();
+    if (numOutSizes != src.numImages() && numOutSizes != 1)
+    {
+        throw std::runtime_error(
+            "The list of output shapes `out_size` must either contain a single shape to be used for all output images "
+            "or its length must match the number of input samples.");
+    }
+
+    for (int i = 0; i < src.numImages(); ++i)
+    {
+        auto size  = outShape[numOutSizes == 1 ? 0 : i];
+        auto image = Image::Create({std::get<1>(size), std::get<0>(size)}, src[i].format());
+        out.pushBack(image);
+    }
+
+    return VarShapeHQResizeInto(out, src, antialias, roi, interpolation, minInterpolation, magInterpolation, pstream);
+}
+
+TensorBatch TensorBatchHQResizeInto(TensorBatch &dst, const TensorBatch &src, std::optional<bool> antialias,
+                                    const std::optional<Rois> &roi, std::optional<NVCVInterpolationType> interpolation,
+                                    std::optional<NVCVInterpolationType> minInterpolation,
+                                    std::optional<NVCVInterpolationType> magInterpolation,
+                                    std::optional<Stream>                pstream)
+{
+    Stream stream = pstream ? *pstream : Stream::Current();
+    auto   op     = CreatePyOpHQResize();
+
+    bool      hasDepth   = src.layout().find('D') >= 0;
+    int       resizeNDim = hasDepth ? 3 : 2;
+    RoiHelper parsedRoi(roi, resizeNDim);
+
+    NVCVInterpolationType minInterpolationArg, magInterpolationArg;
+    GetMinMagInterpolation(minInterpolationArg, magInterpolationArg, interpolation, minInterpolation, magInterpolation);
+
+    RunGuard(op, src, dst, stream,
+             [&]()
+             {
+                 op->submit(stream.cudaHandle(), src, dst, minInterpolationArg, magInterpolationArg,
+                            antialias.value_or(false), parsedRoi.NonOwningHandle());
+             });
+    return dst;
+}
+
+TensorBatch TensorBatchHQResize(TensorBatch &src, const std::vector<Shape> &outShape, std::optional<bool> antialias,
+                                const std::optional<Rois> &roi, std::optional<NVCVInterpolationType> interpolation,
+                                std::optional<NVCVInterpolationType> minInterpolation,
+                                std::optional<NVCVInterpolationType> magInterpolation, std::optional<Stream> pstream)
+{
+    TensorBatch out = TensorBatch::Create(src.numTensors());
+
+    int32_t numOutSizes = outShape.size();
+    if (numOutSizes != src.numTensors() && numOutSizes != 1)
+    {
+        throw std::runtime_error(
+            "The list of output shapes `out_size` must either contain a single shape to be used for all output tensors "
+            "or its length must match the number of input tensors.");
+    }
+
+    for (int i = 0; i < src.numTensors(); ++i)
+    {
+        auto        sampleShape  = outShape[numOutSizes == 1 ? 0 : i];
+        const auto &inSample     = src[i];
+        auto        resizedShape = ResizedTensorShape(inSample.layout(), inSample.shape(), sampleShape);
+        Tensor      dst          = Tensor::Create(resizedShape, src.dtype(), src.layout());
+        out.pushBack(dst);
+    }
+
+    return TensorBatchHQResizeInto(out, src, antialias, roi, interpolation, minInterpolation, magInterpolation,
+                                   pstream);
+}
+
+} // namespace
+
+void ExportOpHQResize(py::module &m)
+{
+    using namespace pybind11::literals;
+
+    m.def("hq_resize", &TensorHQResize, "src"_a, "out_size"_a, py::kw_only(), "antialias"_a = false, "roi"_a = nullptr,
+          "interpolation"_a = nullptr, "min_interpolation"_a = nullptr, "mag_interpolation"_a = nullptr,
+          "stream"_a = nullptr, R"pbdoc(
+        Executes the HQ Resize operation on the given cuda stream. The operator
+        supports resampling for 2D (images) and 3D volumetric samples.
+
+        See also:
+            Refer to the CV-CUDA C API reference for the HQ Resize operator
+            for more details and usage examples.
+
+        Args:
+            src (Tensor): Input tensor containing one or more images.
+                          The tensor layout must match: (N)(D)HW(C).
+            out_size (Shape): Tuple of 2 or 3 ints describing the output shape in (D)HW layout.
+            antialias (bool): If set to true, an antialiasing is enabled for scaling down.
+            roi(Tuple): Optional bounding box describing the input's region of interest.
+                        For 2D resampling it should be (lowH, lowW, highH, highW),
+                        for 3D: (lowD, lowH, lowW, highD, highH, highW).
+                        If, for some axis, the low bound is bigger than the high bound,
+                        the image is flipped across the axis.
+            interpolation(Interp): Interpolation type used. Used both for scaling down and up,
+                                   cannot be specified together with (min_interpolation or mag_interpolation).
+            min_interpolation(Interp): Interpolation type used for scaling down.
+            mag_interpolation(Interp): Interpolation type used for scaling up.
+            stream (Stream, optional): CUDA Stream on which to perform the operation.
+
+        Returns:
+            cvcuda.Tensor: The output tensor.
+
+        Caution:
+            Restrictions to several arguments may apply. Check the C
+            API references of the CV-CUDA operator.
+    )pbdoc");
+    m.def("hq_resize", &VarShapeHQResize, "src"_a, "out_size"_a, py::kw_only(), "antialias"_a = false,
+          "roi"_a = nullptr, "interpolation"_a = nullptr, "min_interpolation"_a = nullptr,
+          "mag_interpolation"_a = nullptr, "stream"_a = nullptr, R"pbdoc(
+        Executes the HQ Resize operation on the given cuda stream.
+
+        See also:
+            Refer to the CV-CUDA C API reference for the HQ Resize operator
+            for more details and usage examples.
+
+        Args:
+            src (ImageBatchVarShape): Input batch of images.
+            out_size (Shape): Tuple of 2 ints describing the output shape in HW layout.
+            antialias (bool): If set to true, an antialiasing is enabled for scaling down.
+            roi(List[Tuple]): Optional bounding boxes describing the input's region of interest.
+                              It should be a list of tuples. The list length must match the number
+                              of input tensors or be 1 (so that the same ROI is used for all samples).
+                              Each tuple must be of the form (lowH, lowW, highH, highW).
+                              If, for some axis, the low bound is bigger than the high bound,
+                              the image is flipped across the axis.
+            interpolation(Interp): Interpolation type used. Used both for scaling down and up,
+                                   cannot be specified together with (min_interpolation or mag_interpolation).
+            min_interpolation(Interp): Interpolation type used for scaling down.
+            mag_interpolation(Interp): Interpolation type used for scaling up.
+            stream (Stream, optional): CUDA Stream on which to perform the operation.
+
+        Returns:
+            cvcuda.ImageBatchVarShape: The batch of resized images.
+
+        Caution:
+            Restrictions to several arguments may apply. Check the C
+            API references of the CV-CUDA operator.
+    )pbdoc");
+    m.def("hq_resize", &TensorBatchHQResize, "src"_a, "out_size"_a, py::kw_only(), "antialias"_a = false,
+          "roi"_a = nullptr, "interpolation"_a = nullptr, "min_interpolation"_a = nullptr,
+          "mag_interpolation"_a = nullptr, "stream"_a = nullptr, R"pbdoc(
+        Executes the HQ Resize operation on the given cuda stream. The operator
+        supports resampling for 2D (images) and 3D volumetric samples.
+
+        See also:
+            Refer to the CV-CUDA C API reference for the HQ Resize operator
+            for more details and usage examples.
+
+        Args:
+            src (TensorBatch): Input batch containing one or more tensors of (D)HW(C) layout.
+            out_size (Shape): Tuple of 2 or 3 ints describing the output shape in (D)HW layout.
+            antialias (bool): If set to true, an antialiasing is enabled for scaling down.
+            roi(List[Tuple]): Optional bounding boxes describing the input's region of interest.
+                              It should be a list of tuples. The list length must match the number
+                              of input tensors or be 1 (so that the same ROI is used for all samples).
+                              Each tuple must be of the form:
+                                  * for 2D resampling: (lowH, lowW, highH, highW),
+                                  * for 3D: (lowD, lowH, lowW, highD, highH, highW).
+                              If, for some axis, the low bound is bigger than the high bound,
+                              the tensor is flipped across the axis.
+            interpolation(Interp): Interpolation type used. Used both for scaling down and up,
+                                   cannot be specified together with (min_interpolation or mag_interpolation).
+            min_interpolation(Interp): Interpolation type used for scaling down.
+            mag_interpolation(Interp): Interpolation type used for scaling up.
+            stream (Stream, optional): CUDA Stream on which to perform the operation.
+
+        Returns:
+            cvcuda.TensorBatch: The batch of resized tensors.
+
+        Caution:
+            Restrictions to several arguments may apply. Check the C
+            API references of the CV-CUDA operator.
+    )pbdoc");
+    m.def("hq_resize_into", &TensorHQResizeInto, "dst"_a, "src"_a, py::kw_only(), "antialias"_a = false,
+          "roi"_a = nullptr, "interpolation"_a = nullptr, "min_interpolation"_a = nullptr,
+          "mag_interpolation"_a = nullptr, "stream"_a = nullptr, R"pbdoc(
+        Executes the HQ Resize operation on the given cuda stream. The operator
+        supports resampling for 2D (images) and 3D volumetric samples.
+
+        See also:
+            Refer to the CV-CUDA C API reference for the HQ Resize operator
+            for more details and usage examples.
+
+        Args:
+            dst (Tensor): Output tensor. It's layout must match the src tensor.
+                          The size of D, H, and W extents may be different. The dst
+                          type must match the src's type or be float32.
+            src (Tensor): Input tensor containing one or more images.
+                          The tensor layout must match: (N)(D)HW(C).
+            antialias (bool): If set to true, an antialiasing is enabled for scaling down.
+            roi(Tuple): Optional bounding box describing the input's region of interest.
+                        For 2D resampling it should be (lowH, lowW, highH, highW),
+                        for 3D: (lowD, lowH, lowW, highD, highH, highW).
+                        If, for some axis, the low bound is bigger than the high bound,
+                        the image is flipped across the axis.
+            interpolation(Interp): Interpolation type used. Used both for scaling down and up,
+                                   cannot be specified together with (min_interpolation or mag_interpolation).
+            min_interpolation(Interp): Interpolation type used for scaling down.
+            mag_interpolation(Interp): Interpolation type used for scaling up.
+            stream (Stream, optional): CUDA Stream on which to perform the operation.
+
+        Returns:
+            cvcuda.Tensor: The output tensor.
+
+        Caution:
+            Restrictions to several arguments may apply. Check the C
+            API references of the CV-CUDA operator.
+    )pbdoc");
+    m.def("hq_resize_into", &VarShapeHQResizeInto, "dst"_a, "src"_a, py::kw_only(), "antialias"_a = false,
+          "roi"_a = nullptr, "interpolation"_a = nullptr, "min_interpolation"_a = nullptr,
+          "mag_interpolation"_a = nullptr, "stream"_a = nullptr, R"pbdoc(
+        Executes the HQ Resize operation on the given cuda stream.
+
+        See also:
+            Refer to the CV-CUDA C API reference for the HQ Resize operator
+            for more details and usage examples.
+
+        Args:
+            dst (ImageBatchVarShape): Output batch. The layout must match the input batch.
+                                      The size of D, H, and W extents may be different. The dst
+                                      type must match the src's type or be float32.
+            src (ImageBatchVarShape): Input batch of images.
+            antialias (bool): If set to true, an antialiasing is enabled for scaling down.
+            roi(List[Tuple]): Optional bounding boxes describing the input's region of interest.
+                              It should be a list of tuples. The list length must match the number
+                              of input tensors or be 1 (so that the same ROI is used for all samples).
+                              Each tuple must be of the form (lowH, lowW, highH, highW).
+                              If, for some axis, the low bound is bigger than the high bound,
+                              the image is flipped across the axis.
+            interpolation(Interp): Interpolation type used. Used both for scaling down and up,
+                                   cannot be specified together with (min_interpolation or mag_interpolation).
+            min_interpolation(Interp): Interpolation type used for scaling down.
+            mag_interpolation(Interp): Interpolation type used for scaling up.
+            stream (Stream, optional): CUDA Stream on which to perform the operation.
+
+        Returns:
+            cvcuda.ImageBatchVarShape: The batch of resized images.
+
+        Caution:
+            Restrictions to several arguments may apply. Check the C
+            API references of the CV-CUDA operator.
+    )pbdoc");
+    m.def("hq_resize_into", &TensorBatchHQResizeInto, "dst"_a, "src"_a, py::kw_only(), "antialias"_a = false,
+          "roi"_a = nullptr, "interpolation"_a = nullptr, "min_interpolation"_a = nullptr,
+          "mag_interpolation"_a = nullptr, "stream"_a = nullptr, R"pbdoc(
+        Executes the HQ Resize operation on the given cuda stream. The operator
+        supports resampling for 2D (images) and 3D volumetric samples.
+
+        See also:
+            Refer to the CV-CUDA C API reference for the HQ Resize operator
+            for more details and usage examples.
+
+        Args:
+            dst (TensorBatch): Output batch. The layout must match the input batch.
+                               The size of D, H, and W extents may be different. The dst
+                               type must match the src's type or be float32.
+            src (TensorBatch): Input batch containing one or more tensors of (D)HW(C) layout.
+            antialias (bool): If set to true, an antialiasing is enabled for scaling down.
+            roi(List[Tuple]): Optional bounding boxes describing the input's region of interest.
+                              It should be a list of tuples. The list length must match the number
+                              of input tensors or be 1 (so that the same ROI is used for all samples).
+                              Each tuple must be of the form:
+                                  * for 2D resampling: (lowH, lowW, highH, highW),
+                                  * for 3D: (lowD, lowH, lowW, highD, highH, highW).
+                              If, for some axis, the low bound is bigger than the high bound,
+                              the tensor is flipped across the axis.
+            interpolation(Interp): Interpolation type used. Used both for scaling down and up,
+                                   cannot be specified together with (min_interpolation or mag_interpolation).
+            min_interpolation(Interp): Interpolation type used for scaling down.
+            mag_interpolation(Interp): Interpolation type used for scaling up.
+            stream (Stream, optional): CUDA Stream on which to perform the operation.
+
+        Returns:
+            cvcuda.TensorBatch: The batch of resized tensors.
+
+        Caution:
+            Restrictions to several arguments may apply. Check the C
+            API references of the CV-CUDA operator.
+    )pbdoc");
+}
+
+} // namespace cvcudapy
diff --git a/python/mod_cvcuda/OpHistogram.cpp b/python/mod_cvcuda/OpHistogram.cpp
index a9aea9b70..715882433 100644
--- a/python/mod_cvcuda/OpHistogram.cpp
+++ b/python/mod_cvcuda/OpHistogram.cpp
@@ -45,13 +45,13 @@ Tensor HistogramInto(Tensor &histogram, Tensor &input, std::optional<Tensor> mas
     auto op = CreateOperator<cvcuda::Histogram>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {histogram});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {histogram});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     if (mask)
     {
-        guard.add(LockMode::LOCK_READ, {*mask});
+        guard.add(LockMode::LOCK_MODE_READ, {*mask});
         op->submit(pstream->cudaHandle(), input, *mask, histogram);
     }
     else
diff --git a/python/mod_cvcuda/OpHistogramEq.cpp b/python/mod_cvcuda/OpHistogramEq.cpp
index ca13fe87b..30bf40384 100644
--- a/python/mod_cvcuda/OpHistogramEq.cpp
+++ b/python/mod_cvcuda/OpHistogramEq.cpp
@@ -37,9 +37,9 @@ Tensor HistogramEqInto(Tensor &output, Tensor &input, std::optional<Stream> pstr
     auto              op    = CreateOperator<cvcuda::HistogramEq>((uint32_t)shape[0]);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*op});
 
     op->submit(pstream->cudaHandle(), input, output);
 
@@ -64,9 +64,9 @@ ImageBatchVarShape HistogramEqVarShapeInto(ImageBatchVarShape &output, ImageBatc
     auto op = CreateOperator<cvcuda::HistogramEq>((uint32_t)input.numImages());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*op});
 
     op->submit(pstream->cudaHandle(), input, output);
 
diff --git a/python/mod_cvcuda/OpInpaint.cpp b/python/mod_cvcuda/OpInpaint.cpp
index 21176e6a0..36af7a0ec 100644
--- a/python/mod_cvcuda/OpInpaint.cpp
+++ b/python/mod_cvcuda/OpInpaint.cpp
@@ -128,9 +128,9 @@ Tensor InpaintInto(Tensor &output, Tensor &input, Tensor &masks, double inpaintR
     auto              inpaint = CreateOperatorEx<PyOpInpaint>((int)shape[0], maxShape);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, masks});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*inpaint});
+    guard.add(LockMode::LOCK_MODE_READ, {input, masks});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*inpaint});
 
     inpaint->submit(pstream->cudaHandle(), input, masks, output, inpaintRadius);
 
@@ -155,9 +155,9 @@ ImageBatchVarShape InpaintVarShapeInto(ImageBatchVarShape &output, ImageBatchVar
     auto         inpaint  = CreateOperatorEx<PyOpInpaint>(input.numImages(), maxShape);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, masks});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*inpaint});
+    guard.add(LockMode::LOCK_MODE_READ, {input, masks});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*inpaint});
 
     inpaint->submit(pstream->cudaHandle(), input, masks, output, inpaintRadius);
 
diff --git a/python/mod_cvcuda/OpJointBilateralFilter.cpp b/python/mod_cvcuda/OpJointBilateralFilter.cpp
index 0298ec8f5..243054c4a 100644
--- a/python/mod_cvcuda/OpJointBilateralFilter.cpp
+++ b/python/mod_cvcuda/OpJointBilateralFilter.cpp
@@ -42,9 +42,9 @@ Tensor JointBilateralFilterInto(Tensor &output, Tensor &input, Tensor &inputColo
     auto joint_bilateral_filter = CreateOperator<cvcuda::JointBilateralFilter>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, inputColor});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*joint_bilateral_filter});
+    guard.add(LockMode::LOCK_MODE_READ, {input, inputColor});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*joint_bilateral_filter});
 
     joint_bilateral_filter->submit(pstream->cudaHandle(), input, inputColor, output, diameter, sigmaColor, sigmaSpace,
                                    borderMode);
@@ -73,9 +73,9 @@ ImageBatchVarShape VarShapeJointBilateralFilterInto(ImageBatchVarShape &output,
     auto joint_bilateral_filter = CreateOperator<cvcuda::JointBilateralFilter>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, inputColor, diameter, sigmaColor, sigmaSpace});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*joint_bilateral_filter});
+    guard.add(LockMode::LOCK_MODE_READ, {input, inputColor, diameter, sigmaColor, sigmaSpace});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*joint_bilateral_filter});
 
     joint_bilateral_filter->submit(pstream->cudaHandle(), input, inputColor, output, diameter, sigmaColor, sigmaSpace,
                                    borderMode);
diff --git a/python/mod_cvcuda/OpLabel.cpp b/python/mod_cvcuda/OpLabel.cpp
index eb89d55ba..1d45618d8 100644
--- a/python/mod_cvcuda/OpLabel.cpp
+++ b/python/mod_cvcuda/OpLabel.cpp
@@ -45,33 +45,33 @@ TupleTensor3 LabelInto(Tensor &output, std::optional<Tensor> count, std::optiona
     auto op = CreateOperator<cvcuda::Label>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     if (count)
     {
-        guard.add(LockMode::LOCK_WRITE, {*count});
+        guard.add(LockMode::LOCK_MODE_WRITE, {*count});
     }
     if (stats)
     {
-        guard.add(LockMode::LOCK_WRITE, {*stats});
+        guard.add(LockMode::LOCK_MODE_WRITE, {*stats});
     }
     if (bgLabel)
     {
-        guard.add(LockMode::LOCK_READ, {*bgLabel});
+        guard.add(LockMode::LOCK_MODE_READ, {*bgLabel});
     }
     if (minThresh)
     {
-        guard.add(LockMode::LOCK_READ, {*minThresh});
+        guard.add(LockMode::LOCK_MODE_READ, {*minThresh});
     }
     if (maxThresh)
     {
-        guard.add(LockMode::LOCK_READ, {*maxThresh});
+        guard.add(LockMode::LOCK_MODE_READ, {*maxThresh});
     }
     if (minSize)
     {
-        guard.add(LockMode::LOCK_READ, {*minSize});
+        guard.add(LockMode::LOCK_MODE_READ, {*minSize});
     }
 
     op->submit(pstream->cudaHandle(), input, output, (bgLabel ? *bgLabel : nvcv::Tensor{nullptr}),
diff --git a/python/mod_cvcuda/OpLaplacian.cpp b/python/mod_cvcuda/OpLaplacian.cpp
index c90388b52..3b5655837 100644
--- a/python/mod_cvcuda/OpLaplacian.cpp
+++ b/python/mod_cvcuda/OpLaplacian.cpp
@@ -43,9 +43,9 @@ Tensor LaplacianInto(Tensor &output, Tensor &input, const int &ksize, const floa
     auto laplacian = CreateOperator<cvcuda::Laplacian>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*laplacian});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*laplacian});
 
     laplacian->submit(pstream->cudaHandle(), input, output, ksize, scale, border);
 
@@ -71,9 +71,9 @@ ImageBatchVarShape LaplacianVarShapeInto(ImageBatchVarShape &output, ImageBatchV
     auto laplacian = CreateOperator<cvcuda::Laplacian>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, ksize, scale});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*laplacian});
+    guard.add(LockMode::LOCK_MODE_READ, {input, ksize, scale});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*laplacian});
 
     laplacian->submit(pstream->cudaHandle(), input, output, ksize, scale, border);
 
diff --git a/python/mod_cvcuda/OpMedianBlur.cpp b/python/mod_cvcuda/OpMedianBlur.cpp
index 72a2fa9d4..2122945ba 100644
--- a/python/mod_cvcuda/OpMedianBlur.cpp
+++ b/python/mod_cvcuda/OpMedianBlur.cpp
@@ -42,9 +42,9 @@ Tensor MedianBlurInto(Tensor &output, Tensor &input, const std::tuple<int, int>
     auto median_blur = CreateOperator<cvcuda::MedianBlur>(0);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*median_blur});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*median_blur});
 
     nvcv::Size2D ksizeArg{std::get<0>(ksize), std::get<1>(ksize)};
 
@@ -71,9 +71,9 @@ ImageBatchVarShape VarShapeMedianBlurInto(ImageBatchVarShape &output, ImageBatch
     auto median_blur = CreateOperator<cvcuda::MedianBlur>(input.capacity());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, ksize});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*median_blur});
+    guard.add(LockMode::LOCK_MODE_READ, {input, ksize});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*median_blur});
 
     median_blur->submit(pstream->cudaHandle(), input, output, ksize);
 
diff --git a/python/mod_cvcuda/OpMinAreaRect.cpp b/python/mod_cvcuda/OpMinAreaRect.cpp
index 9c9cdd9f0..30fcb7d6b 100644
--- a/python/mod_cvcuda/OpMinAreaRect.cpp
+++ b/python/mod_cvcuda/OpMinAreaRect.cpp
@@ -37,9 +37,9 @@ Tensor MinAreaRectInto(Tensor &output, Tensor &input, Tensor &numPointsInContour
     auto minAreaRect = CreateOperator<cvcuda::MinAreaRect>(totalContours);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, numPointsInContour});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*minAreaRect});
+    guard.add(LockMode::LOCK_MODE_READ, {input, numPointsInContour});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*minAreaRect});
 
     minAreaRect->submit(pstream->cudaHandle(), input, output, numPointsInContour, totalContours);
 
diff --git a/python/mod_cvcuda/OpMinMaxLoc.cpp b/python/mod_cvcuda/OpMinMaxLoc.cpp
index 94d55573b..eb1eaa80b 100644
--- a/python/mod_cvcuda/OpMinMaxLoc.cpp
+++ b/python/mod_cvcuda/OpMinMaxLoc.cpp
@@ -77,9 +77,9 @@ TupleTensor3 MinLocInto(Tensor &minVal, Tensor &minLoc, Tensor &numMin, InputCon
     auto op = CreateOperator<cvcuda::MinMaxLoc>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {minVal, minLoc, numMin});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {minVal, minLoc, numMin});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     op->submit(pstream->cudaHandle(), input, minVal, minLoc, numMin, nullptr, nullptr, nullptr);
 
@@ -110,9 +110,9 @@ TupleTensor3 MaxLocInto(Tensor &maxVal, Tensor &maxLoc, Tensor &numMax, InputCon
     auto op = CreateOperator<cvcuda::MinMaxLoc>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {maxVal, maxLoc, numMax});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {maxVal, maxLoc, numMax});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     op->submit(pstream->cudaHandle(), input, nullptr, nullptr, nullptr, maxVal, maxLoc, numMax);
 
@@ -143,9 +143,9 @@ TupleTensor6 MinMaxLocInto(Tensor &minVal, Tensor &minLoc, Tensor &numMin, Tenso
     auto op = CreateOperator<cvcuda::MinMaxLoc>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {minVal, minLoc, numMin, maxVal, maxLoc, numMax});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {minVal, minLoc, numMin, maxVal, maxLoc, numMax});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     op->submit(pstream->cudaHandle(), input, minVal, minLoc, numMin, maxVal, maxLoc, numMax);
 
diff --git a/python/mod_cvcuda/OpMorphology.cpp b/python/mod_cvcuda/OpMorphology.cpp
index b8cd44954..e3e91cc63 100644
--- a/python/mod_cvcuda/OpMorphology.cpp
+++ b/python/mod_cvcuda/OpMorphology.cpp
@@ -44,9 +44,9 @@ Tensor MorphologyInto(Tensor &output, Tensor &input, NVCVMorphologyType morph_ty
     auto morphology = CreateOperator<cvcuda::Morphology>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*morphology});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*morphology});
 
     nvcv::Size2D maskSizeArg{std::get<0>(maskSize), std::get<1>(maskSize)};
     int2         anchorArg;
@@ -55,7 +55,7 @@ Tensor MorphologyInto(Tensor &output, Tensor &input, NVCVMorphologyType morph_ty
 
     if (workspace)
     {
-        guard.add(LockMode::LOCK_READ, {*workspace});
+        guard.add(LockMode::LOCK_MODE_READ, {*workspace});
         morphology->submit(pstream->cudaHandle(), input, output, *workspace, morph_type, maskSizeArg, anchorArg,
                            iteration, border);
     }
@@ -90,14 +90,13 @@ ImageBatchVarShape MorphologyVarShapeInto(ImageBatchVarShape &output, ImageBatch
     auto morphology = CreateOperator<cvcuda::Morphology>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_READWRITE, {output, masks, anchors});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*morphology});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {output, masks, anchors});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*morphology});
 
     if (workspace)
     {
-        guard.add(LockMode::LOCK_READ, {*workspace});
+        guard.add(LockMode::LOCK_MODE_READ, {*workspace});
         morphology->submit(pstream->cudaHandle(), input, output, *workspace, morph_type, masks, anchors, iteration,
                            borderMode);
     }
diff --git a/python/mod_cvcuda/OpNonMaximumSuppression.cpp b/python/mod_cvcuda/OpNonMaximumSuppression.cpp
index 11bdff135..2df26ca84 100644
--- a/python/mod_cvcuda/OpNonMaximumSuppression.cpp
+++ b/python/mod_cvcuda/OpNonMaximumSuppression.cpp
@@ -42,9 +42,9 @@ Tensor NonMaximumSuppressionInto(Tensor &dst, Tensor &src, Tensor &scores, float
     auto op = CreateOperator<cvcuda::NonMaximumSuppression>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {src, scores});
-    guard.add(LockMode::LOCK_WRITE, {dst});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {src, scores});
+    guard.add(LockMode::LOCK_MODE_WRITE, {dst});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     op->submit(pstream->cudaHandle(), src, dst, scores, scoreThreshold, iouThreshold);
 
diff --git a/python/mod_cvcuda/OpNormalize.cpp b/python/mod_cvcuda/OpNormalize.cpp
index 147beaf61..4cdb2d392 100644
--- a/python/mod_cvcuda/OpNormalize.cpp
+++ b/python/mod_cvcuda/OpNormalize.cpp
@@ -54,9 +54,9 @@ Tensor NormalizeInto(Tensor &output, Tensor &input, Tensor &base, Tensor &scale,
     auto normalize = CreateOperator<cvcuda::Normalize>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, base, scale});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*normalize});
+    guard.add(LockMode::LOCK_MODE_READ, {input, base, scale});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*normalize});
 
     normalize->submit(pstream->cudaHandle(), input, base, scale, output, globalScale, globalShift, epsilon, *flags);
 
@@ -88,9 +88,9 @@ ImageBatchVarShape VarShapeNormalizeInto(ImageBatchVarShape &output, ImageBatchV
     auto normalize = CreateOperator<cvcuda::Normalize>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, base, scale});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*normalize});
+    guard.add(LockMode::LOCK_MODE_READ, {input, base, scale});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*normalize});
 
     normalize->submit(pstream->cudaHandle(), input, base, scale, output, globalScale, globalShift, epsilon, *flags);
 
diff --git a/python/mod_cvcuda/OpOSD.cpp b/python/mod_cvcuda/OpOSD.cpp
index 434769af0..fa0dcd93b 100644
--- a/python/mod_cvcuda/OpOSD.cpp
+++ b/python/mod_cvcuda/OpOSD.cpp
@@ -36,9 +36,9 @@ Tensor OSDInto(Tensor &output, Tensor &input, NVCVElements elements, std::option
     auto op = CreateOperator<cvcuda::OSD>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     op->submit(pstream->cudaHandle(), input, output, elements);
 
diff --git a/python/mod_cvcuda/OpPadAndStack.cpp b/python/mod_cvcuda/OpPadAndStack.cpp
index 35d5c8a45..295f80589 100644
--- a/python/mod_cvcuda/OpPadAndStack.cpp
+++ b/python/mod_cvcuda/OpPadAndStack.cpp
@@ -38,9 +38,9 @@ Tensor PadAndStackInto(Tensor &output, ImageBatchVarShape &input, Tensor &top, T
     auto padstack = CreateOperator<cvcuda::PadAndStack>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, top, left});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*padstack});
+    guard.add(LockMode::LOCK_MODE_READ, {input, top, left});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*padstack});
 
     padstack->submit(pstream->cudaHandle(), input, output, top, left, border, borderValue);
 
diff --git a/python/mod_cvcuda/OpPairwiseMatcher.cpp b/python/mod_cvcuda/OpPairwiseMatcher.cpp
index 2b9248d76..195c1f19c 100644
--- a/python/mod_cvcuda/OpPairwiseMatcher.cpp
+++ b/python/mod_cvcuda/OpPairwiseMatcher.cpp
@@ -50,25 +50,25 @@ TupleTensor3 PairwiseMatcherInto(Tensor &matches, std::optional<Tensor> numMatch
     auto op = CreateOperator<cvcuda::PairwiseMatcher>(algoChoice);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {set1, set2});
-    guard.add(LockMode::LOCK_WRITE, {matches});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {set1, set2});
+    guard.add(LockMode::LOCK_MODE_WRITE, {matches});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     if (numSet1)
     {
-        guard.add(LockMode::LOCK_READ, {*numSet1});
+        guard.add(LockMode::LOCK_MODE_READ, {*numSet1});
     }
     if (numSet2)
     {
-        guard.add(LockMode::LOCK_READ, {*numSet2});
+        guard.add(LockMode::LOCK_MODE_READ, {*numSet2});
     }
     if (numMatches)
     {
-        guard.add(LockMode::LOCK_WRITE, {*numMatches});
+        guard.add(LockMode::LOCK_MODE_WRITE, {*numMatches});
     }
     if (distances)
     {
-        guard.add(LockMode::LOCK_WRITE, {*distances});
+        guard.add(LockMode::LOCK_MODE_WRITE, {*distances});
     }
 
     op->submit(pstream->cudaHandle(), set1, set2, (numSet1 ? *numSet1 : nvcv::Tensor{nullptr}),
diff --git a/python/mod_cvcuda/OpPillowResize.cpp b/python/mod_cvcuda/OpPillowResize.cpp
index 75a5b9088..c66231248 100644
--- a/python/mod_cvcuda/OpPillowResize.cpp
+++ b/python/mod_cvcuda/OpPillowResize.cpp
@@ -194,9 +194,9 @@ Tensor PillowResizeInto(Tensor &output, Tensor &input, nvcv::ImageFormat format,
     auto pillowResize = CreateOperatorEx<PyOpPillowResize>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*pillowResize});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*pillowResize});
 
     pillowResize->submit(pstream->cudaHandle(), input, output, format, interp);
 
@@ -223,9 +223,9 @@ ImageBatchVarShape VarShapePillowResizeInto(ImageBatchVarShape &output, ImageBat
     auto pillowResize = CreateOperatorEx<PyOpPillowResize>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*pillowResize});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*pillowResize});
 
     pillowResize->submit(pstream->cudaHandle(), input, output, interpolation);
 
diff --git a/python/mod_cvcuda/OpRandomResizedCrop.cpp b/python/mod_cvcuda/OpRandomResizedCrop.cpp
index 1f993a1d1..da428a656 100644
--- a/python/mod_cvcuda/OpRandomResizedCrop.cpp
+++ b/python/mod_cvcuda/OpRandomResizedCrop.cpp
@@ -43,9 +43,9 @@ Tensor RandomResizedCropInto(Tensor &output, Tensor &input, double min_scale, do
         = CreateOperator<cvcuda::RandomResizedCrop>(min_scale, max_scale, min_ratio, max_ratio, batchSize, seed);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*randomResizedCrop});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*randomResizedCrop});
 
     randomResizedCrop->submit(pstream->cudaHandle(), input, output, interp);
 
@@ -74,9 +74,9 @@ ImageBatchVarShape RandomResizedCropVarShapeInto(ImageBatchVarShape &output, Ima
         = CreateOperator<cvcuda::RandomResizedCrop>(min_scale, max_scale, min_ratio, max_ratio, input.capacity(), seed);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*randomResizedCrop});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*randomResizedCrop});
 
     randomResizedCrop->submit(pstream->cudaHandle(), input, output, interp);
 
diff --git a/python/mod_cvcuda/OpReformat.cpp b/python/mod_cvcuda/OpReformat.cpp
index ba1609e90..227ba0a0f 100644
--- a/python/mod_cvcuda/OpReformat.cpp
+++ b/python/mod_cvcuda/OpReformat.cpp
@@ -36,9 +36,9 @@ Tensor ReformatInto(Tensor &output, Tensor &input, std::optional<Stream> pstream
     auto reformat = CreateOperator<cvcuda::Reformat>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*reformat});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*reformat});
 
     reformat->submit(pstream->cudaHandle(), input, output);
 
diff --git a/python/mod_cvcuda/OpRemap.cpp b/python/mod_cvcuda/OpRemap.cpp
index 84e47b627..3ad42fca7 100644
--- a/python/mod_cvcuda/OpRemap.cpp
+++ b/python/mod_cvcuda/OpRemap.cpp
@@ -46,9 +46,9 @@ Tensor RemapInto(Tensor &dst, Tensor &src, Tensor &map, NVCVInterpolationType sr
     auto op = CreateOperator<cvcuda::Remap>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {src, map});
-    guard.add(LockMode::LOCK_WRITE, {dst});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {src, map});
+    guard.add(LockMode::LOCK_MODE_WRITE, {dst});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     op->submit(pstream->cudaHandle(), src, dst, map, srcInterp, mapInterp, mapValueType, alignCorners, borderMode,
                bValue);
@@ -110,9 +110,9 @@ ImageBatchVarShape VarShapeRemapInto(ImageBatchVarShape &dst, ImageBatchVarShape
     auto op = CreateOperator<cvcuda::Remap>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {src, map});
-    guard.add(LockMode::LOCK_WRITE, {dst});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {src, map});
+    guard.add(LockMode::LOCK_MODE_WRITE, {dst});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     op->submit(pstream->cudaHandle(), src, dst, map, srcInterp, mapInterp, mapValueType, alignCorners, borderMode,
                bValue);
diff --git a/python/mod_cvcuda/OpResize.cpp b/python/mod_cvcuda/OpResize.cpp
index f5c32f2b2..7d42dcce7 100644
--- a/python/mod_cvcuda/OpResize.cpp
+++ b/python/mod_cvcuda/OpResize.cpp
@@ -39,9 +39,9 @@ Tensor ResizeInto(Tensor &output, Tensor &input, NVCVInterpolationType interp, s
     auto resize = CreateOperator<cvcuda::Resize>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*resize});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*resize});
 
     resize->submit(pstream->cudaHandle(), input, output, interp);
 
@@ -66,9 +66,9 @@ ImageBatchVarShape ResizeVarShapeInto(ImageBatchVarShape &output, ImageBatchVarS
     auto resize = CreateOperator<cvcuda::Resize>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*resize});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*resize});
 
     resize->submit(pstream->cudaHandle(), input, output, interp);
 
diff --git a/python/mod_cvcuda/OpRotate.cpp b/python/mod_cvcuda/OpRotate.cpp
index ae40328c3..a12965f4e 100644
--- a/python/mod_cvcuda/OpRotate.cpp
+++ b/python/mod_cvcuda/OpRotate.cpp
@@ -42,9 +42,9 @@ Tensor RotateInto(Tensor &output, Tensor &input, double angleDeg, const std::tup
     auto rotate = CreateOperator<cvcuda::Rotate>(0);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*rotate});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*rotate});
 
     double2 shiftArg{std::get<0>(shift), std::get<1>(shift)};
 
@@ -72,9 +72,9 @@ ImageBatchVarShape VarShapeRotateInto(ImageBatchVarShape &output, ImageBatchVarS
     auto rotate = CreateOperator<cvcuda::Rotate>(input.capacity());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, angleDeg, shift});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*rotate});
+    guard.add(LockMode::LOCK_MODE_READ, {input, angleDeg, shift});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*rotate});
 
     rotate->submit(pstream->cudaHandle(), input, output, angleDeg, shift, interpolation);
 
diff --git a/python/mod_cvcuda/OpSIFT.cpp b/python/mod_cvcuda/OpSIFT.cpp
index 65c44d4a6..f82fd1dad 100644
--- a/python/mod_cvcuda/OpSIFT.cpp
+++ b/python/mod_cvcuda/OpSIFT.cpp
@@ -162,9 +162,9 @@ TupleTensor4 SIFTInto(Tensor &featCoords, Tensor &featMetadata, Tensor &featDesc
     auto op = CreateOperatorEx<PyOpSIFT>(inShape, numOctaveLayers);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {in});
-    guard.add(LockMode::LOCK_WRITE, {featCoords, featMetadata, featDescriptors, numFeatures});
-    guard.add(LockMode::LOCK_WRITE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {in});
+    guard.add(LockMode::LOCK_MODE_WRITE, {featCoords, featMetadata, featDescriptors, numFeatures});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*op});
 
     op->submit(pstream->cudaHandle(), in, featCoords, featMetadata, featDescriptors, numFeatures, numOctaveLayers,
                contrastThreshold, edgeThreshold, initSigma, flags);
diff --git a/python/mod_cvcuda/OpStack.cpp b/python/mod_cvcuda/OpStack.cpp
index 41c7b891e..da815a876 100644
--- a/python/mod_cvcuda/OpStack.cpp
+++ b/python/mod_cvcuda/OpStack.cpp
@@ -86,9 +86,9 @@ Tensor StackIntoInternal(Tensor &output, std::vector<Tensor> &tensorList, std::o
     auto op = CreateOperator<cvcuda::Stack>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {inTensorBatch});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {inTensorBatch});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
     op->submit(pstream->cudaHandle(), inTensorBatch, output);
     return std::move(output);
 }
diff --git a/python/mod_cvcuda/OpThreshold.cpp b/python/mod_cvcuda/OpThreshold.cpp
index 3eb1b211f..37c398ca5 100644
--- a/python/mod_cvcuda/OpThreshold.cpp
+++ b/python/mod_cvcuda/OpThreshold.cpp
@@ -41,9 +41,9 @@ Tensor ThresholdInto(Tensor &output, Tensor &input, Tensor &thresh, Tensor &maxv
     auto              threshold = CreateOperator<cvcuda::Threshold>(type, (int)shape[0]);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, thresh, maxval});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*threshold});
+    guard.add(LockMode::LOCK_MODE_READ, {input, thresh, maxval});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*threshold});
 
     threshold->submit(pstream->cudaHandle(), input, output, thresh, maxval);
 
@@ -68,9 +68,9 @@ ImageBatchVarShape ThresholdVarShapeInto(ImageBatchVarShape &output, ImageBatchV
     auto threshold = CreateOperator<cvcuda::Threshold>(type, input.numImages());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, thresh, maxval});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*threshold});
+    guard.add(LockMode::LOCK_MODE_READ, {input, thresh, maxval});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*threshold});
 
     threshold->submit(pstream->cudaHandle(), input, output, thresh, maxval);
 
diff --git a/python/mod_cvcuda/OpWarpAffine.cpp b/python/mod_cvcuda/OpWarpAffine.cpp
index 9cf94a946..a07c25692 100644
--- a/python/mod_cvcuda/OpWarpAffine.cpp
+++ b/python/mod_cvcuda/OpWarpAffine.cpp
@@ -64,9 +64,9 @@ Tensor WarpAffineInto(Tensor &output, Tensor &input, const pyarray &xform, const
     auto warpAffine = CreateOperator<cvcuda::WarpAffine>(0);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*warpAffine});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*warpAffine});
 
     warpAffine->submit(pstream->cudaHandle(), input, output, xformOutput, flags, borderMode, bValue);
 
@@ -107,9 +107,9 @@ ImageBatchVarShape WarpAffineVarShapeInto(ImageBatchVarShape &output, ImageBatch
     auto warpAffine = CreateOperator<cvcuda::WarpAffine>(input.capacity());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, xform});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*warpAffine});
+    guard.add(LockMode::LOCK_MODE_READ, {input, xform});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*warpAffine});
 
     warpAffine->submit(pstream->cudaHandle(), input, output, xform, flags, borderMode, bValue);
 
diff --git a/python/mod_cvcuda/OpWarpPerspective.cpp b/python/mod_cvcuda/OpWarpPerspective.cpp
index b35cbe82a..33536467f 100644
--- a/python/mod_cvcuda/OpWarpPerspective.cpp
+++ b/python/mod_cvcuda/OpWarpPerspective.cpp
@@ -63,9 +63,9 @@ Tensor WarpPerspectiveInto(Tensor &output, Tensor &input, const pyarray &xform,
     auto warpPerspective = CreateOperator<cvcuda::WarpPerspective>(0);
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*warpPerspective});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_NONE, {*warpPerspective});
 
     warpPerspective->submit(pstream->cudaHandle(), input, output, xformOutput, flags, borderMode, bValue);
 
@@ -106,9 +106,9 @@ ImageBatchVarShape WarpPerspectiveVarShapeInto(ImageBatchVarShape &output, Image
     auto warpPerspective = CreateOperator<cvcuda::WarpPerspective>(input.capacity());
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input, xform});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_WRITE, {*warpPerspective});
+    guard.add(LockMode::LOCK_MODE_READ, {input, xform});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    guard.add(LockMode::LOCK_MODE_READWRITE, {*warpPerspective});
 
     warpPerspective->submit(pstream->cudaHandle(), input, output, xform, flags, borderMode, bValue);
 
diff --git a/python/mod_cvcuda/Operators.hpp b/python/mod_cvcuda/Operators.hpp
index 2b8886b6a..b48f11fbd 100644
--- a/python/mod_cvcuda/Operators.hpp
+++ b/python/mod_cvcuda/Operators.hpp
@@ -81,6 +81,7 @@ void ExportOpBndBox(py::module &m);
 void ExportOpBoxBlur(py::module &m);
 void ExportOpBrightnessContrast(py::module &m);
 void ExportOpColorTwist(py::module &m);
+void ExportOpHQResize(py::module &m);
 void ExportOpRemap(py::module &m);
 void ExportOpCropFlipNormalizeReformat(py::module &m);
 void ExportOpAdaptiveThreshold(py::module &m);
diff --git a/python/mod_nvcv/CAPI.cpp b/python/mod_nvcv/CAPI.cpp
index 6c5f9cd9d..e15f6eff8 100644
--- a/python/mod_nvcv/CAPI.cpp
+++ b/python/mod_nvcv/CAPI.cpp
@@ -85,19 +85,19 @@ LockMode ToLockMode(PyObject *_mode)
     std::string s = ToObj<std::string>(_mode);
     if (s.empty())
     {
-        return LockMode::LOCK_NONE;
+        return LockMode::LOCK_MODE_NONE;
     }
     else if (s == "r")
     {
-        return LockMode::LOCK_READ;
+        return LockMode::LOCK_MODE_READ;
     }
     else if (s == "w")
     {
-        return LockMode::LOCK_WRITE;
+        return LockMode::LOCK_MODE_WRITE;
     }
     else if (s == "rw")
     {
-        return LockMode::LOCK_READWRITE;
+        return LockMode::LOCK_MODE_READWRITE;
     }
     else
     {
diff --git a/python/mod_nvcv/Resource.cpp b/python/mod_nvcv/Resource.cpp
index b6b49476e..afe571569 100644
--- a/python/mod_nvcv/Resource.cpp
+++ b/python/mod_nvcv/Resource.cpp
@@ -59,11 +59,11 @@ void Resource::submitSignal(Stream &stream, LockMode mode) const
 {
     doBeforeSubmitSignal(stream, mode);
 
-    if (mode & LOCK_READ)
+    if (mode & LOCK_MODE_READ)
     {
         util::CheckThrow(cudaEventRecord(m_readEvent, stream.handle()));
     }
-    if (mode & LOCK_WRITE)
+    if (mode & LOCK_MODE_WRITE)
     {
         util::CheckThrow(cudaEventRecord(m_writeEvent, stream.handle()));
     }
@@ -78,12 +78,12 @@ void Resource::submitSync(Stream &stream, LockMode mode) const
 
 void Resource::doSubmitSync(Stream &stream, LockMode mode) const
 {
-    if (mode & LOCK_WRITE)
+    if (mode & LOCK_MODE_WRITE)
     {
         util::CheckThrow(cudaStreamWaitEvent(stream.handle(), m_writeEvent));
         util::CheckThrow(cudaStreamWaitEvent(stream.handle(), m_readEvent));
     }
-    else if (mode & LOCK_READ)
+    else if (mode & LOCK_MODE_READ)
     {
         util::CheckThrow(cudaStreamWaitEvent(stream.handle(), m_writeEvent));
     }
@@ -102,12 +102,12 @@ void Resource::doSync(LockMode mode) const
 {
     NVCV_ASSERT(PyGILState_Check() == 0);
 
-    if (mode & LOCK_WRITE)
+    if (mode & LOCK_MODE_WRITE)
     {
         util::CheckThrow(cudaEventSynchronize(m_writeEvent));
         util::CheckThrow(cudaEventSynchronize(m_readEvent));
     }
-    else if (mode & LOCK_READ)
+    else if (mode & LOCK_MODE_READ)
     {
         util::CheckThrow(cudaEventSynchronize(m_writeEvent));
     }
diff --git a/python/mod_nvcv/include/nvcv/python/LockMode.hpp b/python/mod_nvcv/include/nvcv/python/LockMode.hpp
index d9246c32b..571b10126 100644
--- a/python/mod_nvcv/include/nvcv/python/LockMode.hpp
+++ b/python/mod_nvcv/include/nvcv/python/LockMode.hpp
@@ -22,10 +22,10 @@ namespace nvcvpy {
 
 enum LockMode : uint8_t
 {
-    LOCK_NONE      = 0,
-    LOCK_READ      = 1,
-    LOCK_WRITE     = 2,
-    LOCK_READWRITE = LOCK_READ | LOCK_WRITE
+    LOCK_MODE_NONE      = 0,
+    LOCK_MODE_READ      = 1,
+    LOCK_MODE_WRITE     = 2,
+    LOCK_MODE_READWRITE = LOCK_MODE_READ | LOCK_MODE_WRITE
 };
 
 } // namespace nvcvpy
diff --git a/python/mod_nvcv/include/nvcv/python/ResourceGuard.hpp b/python/mod_nvcv/include/nvcv/python/ResourceGuard.hpp
index 51a6be4b7..40967a84b 100644
--- a/python/mod_nvcv/include/nvcv/python/ResourceGuard.hpp
+++ b/python/mod_nvcv/include/nvcv/python/ResourceGuard.hpp
@@ -45,16 +45,16 @@ class ResourceGuard
         py::object pyLockMode;
         switch (mode)
         {
-        case LockMode::LOCK_NONE:
+        case LockMode::LOCK_MODE_NONE:
             pyLockMode = py::str("");
             break;
-        case LockMode::LOCK_READ:
+        case LockMode::LOCK_MODE_READ:
             pyLockMode = py::str("r");
             break;
-        case LockMode::LOCK_WRITE:
+        case LockMode::LOCK_MODE_WRITE:
             pyLockMode = py::str("w");
             break;
-        case LockMode::LOCK_READWRITE:
+        case LockMode::LOCK_MODE_READWRITE:
             pyLockMode = py::str("rw");
             break;
         }
diff --git a/python/setup.py.in b/python/setup.py.in
new file mode 100644
index 000000000..c22e9d0ff
--- /dev/null
+++ b/python/setup.py.in
@@ -0,0 +1,81 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is a Python setuptools setup script to generate Python wheels.
+# It is in a template form with placeholder fields that looks like ${}.
+# This script will be automatically invoked by cmake when Python bindings are built.
+# Do not invoke this outside of cmake.
+
+
+from setuptools import setup, Extension
+from setuptools.command.build_ext import build_ext
+
+
+class NoBuildExtension(build_ext):
+    """
+    Since CV-CUDA Python wheels are pure pre-compiled binary distribution at this point
+    without any Python or any other source code files and since the binaries are generated
+    by cmake system outside and without the knowledge of the setuptools, we must
+    create a dummy class to build an extension here with no source code in it and
+    no build steps in it to let setuptools create a platform library instead of a
+    pure library. Without any extensions in a setup tools project setuptools will
+    end up creating a purelib package. One can compile cmake/pybind11 code here
+    as an extension but since that part is handled outside of this file for now
+    we will simply create an empty extension and a corresponding build step that
+    actually does nothing but let setuptools know that this is a pure binary distribution.
+    """
+
+    def run(self):
+        return  # Do nothing during build time.
+
+
+# Define our PyPI trove classifiers for this project. Many values here are
+# placeholders which will be filled in by cmake when this is built.
+pypi_trove_classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: GPU :: NVIDIA CUDA",
+    "Environment :: GPU :: NVIDIA CUDA :: ${CUDA_VERSION_MAJOR}",
+    "Operating System :: POSIX :: Linux",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: ${PYTHON_VERSION}",
+    "Programming Language :: Python :: Implementation :: CPython",
+]
+
+# Finally call the setup.
+setup(
+    name="cvcuda-cu${CUDA_VERSION_MAJOR}",
+    description="${CMAKE_PROJECT_DESCRIPTION}",
+    author="NVIDIA Corporation",
+    url="https://github.com/CVCUDA/CV-CUDA",
+    version="${CMAKE_PROJECT_VERSION}${PROJECT_VERSION_SUFFIX}",
+    packages=[""],  # Must be empty to support current CV-CUDA style distribution
+    package_dir={"": "."},
+    package_data={
+        "": ["*.so", "cvcuda.libs/*.*"]
+    },  # Includes the binding .so + core .so files
+    include_package_data=True,
+    install_requires=["numpy>=1.23.5"],
+    python_requires="==${PYTHON_VERSION}.*",
+    zip_safe=False,
+    cmdclass={
+        "build_ext": NoBuildExtension,  # This allows us to make it a platlib.
+    },
+    ext_modules=[
+        Extension(
+            name="UnusedEmptyExtension", sources=[]
+        ),  # This allows us to make it a platlib.
+    ],
+    classifiers=pypi_trove_classifiers,
+)
diff --git a/samples/NOTICE.md b/samples/NOTICE.md
index 854ce26b9..496ac11dc 100644
--- a/samples/NOTICE.md
+++ b/samples/NOTICE.md
@@ -1,3 +1,18 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
 The sample data are obtained from the following sources :
 
 - Weimaraner.jpg image is obtained from [wikimedia](https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg) under Creative Commons Attribution-Share Alike 3.0 Unported license.
diff --git a/samples/README.md b/samples/README.md
index a0c6a150e..8c32d4d28 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -1,86 +1,107 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
 # CV-CUDA Samples
 
 ## Description
 
-These are some sample applications showcasing various CV-CUDA APIs. Sample applications are available in C++ and Python.
+CV-CUDA samples are written to showcase the use of various CV-CUDA APIs to construct fully functional end-to-end deep learning inference pipelines. Sample applications are available in C++ and Python.
 
 ## Pre-requisites
 
-- Recommended linux distros:
+- Recommended Linux distributions:
     - Ubuntu >= 20.04 (tested with 20.04 and 22.04)
     - WSL2 with Ubuntu >= 20.04 (tested with 20.04)
-- NVIDIA driver
-    - Linux: Driver version 520.56.06 or higher
-- TensorRT == 8.5.2.2
-- NVIDIA Video Processing Framework (https://github.com/NVIDIA/VideoProcessingFramework)
-    - Follow the instructions from Github (https://github.com/NVIDIA/VideoProcessingFramework/blob/master/README.md) to install it via pip.
-    - Note: ffmpeg is a VPF dependency. It can be built from source by following these steps (https://docs.nvidia.com/video-technologies/video-codec-sdk/12.0/ffmpeg-with-nvidia-gpu/index.html). The version of ffmpeg that comes/installs via apt-get in Ubuntu 20.04 may not be sufficient for VPF.
-    - Note: When installing VPF in a docker image like TensorRT, there is no need to install `libnvidia-encode` and `libnvidia-decode` as those already come preinstalled. Other docker images may require an installation of these libraries.
-- NVIDIA TAO Converter == 4.0.0
+- NVIDIA driver:
+    - Linux: Driver version >= 535
+- NVIDIA TensorRT >= 8.6.1
+- NVIDIA nvImageCodec (https://github.com/NVIDIA/nvImageCodec)
+- NVIDIA PyNvVideoCodec (https://catalog.ngc.nvidia.com/orgs/nvidia/resources/py_nvvideocodec)
+- NVIDIA Video Processing Framework (only if running the Triton sample) (https://github.com/NVIDIA/VideoProcessingFramework)
+    - Note: When installing VPF in a docker image like TensorRT, there is no need to install `libnvidia-encode` and `libnvidia-decode` as those already come pre-installed. Other docker images may require an installation of these libraries.
+- NVIDIA TAO Converter >= 4.0.0
 - NVIDIA NSIGHT == 2023.2.1 (only if you wish to run the benchmarking code)
-- Python Packages:
-    - torch == 1.13.0
-    - torchvision == 0.14.0
-    - torchnvjpeg (https://github.com/itsliupeng/torchnvjpeg)
-    - av == 10.0.0
-    - pycuda == 2022.1
-    - nvtx == 0.2.5
+- Additional Python packages requirements listed in the `requirements.txt` file under the `samples/scripts/` folder.
+
+
 
-Setting up the following is only required if you want to setup and run the samples in a docker container:
-- nvidia-docker v2.11.0
-- A working NVIDIA NGC account (visit https://ngc.nvidia.com/setup to get started using NGC) and follow through the NGC documentation here https://docs.nvidia.com/ngc/ngc-catalog-user-guide/index.html#ngc-image-prerequisites
-- docker CLI logged into nvcr.io (NGC's docker registry) to be able to pull docker images.
+## Setting up the environment
 
+1. We strongly recommend working in a docker container to set things up. This would greatly simplify the process of installing dependencies, compiling and running the samples. The following is required to work in a docker container with CV-CUDA samples:
+   1. nvidia-docker >= 2.11.0
+   2. A working NVIDIA NGC account (visit https://ngc.nvidia.com/setup to get started using NGC) and follow through the NGC documentation on https://docs.nvidia.com/ngc/ngc-catalog-user-guide/index.html#ngc-image-prerequisites
+   3. docker CLI logged into nvcr.io (NGC's docker registry) to be able to pull docker image. (e.g. using `docker login nvcr.io`)
 
-## Steps to compile the samples from source
+2. Clone this CV-CUDA git repository. We would call the location where it is stored as `CVCUDA_ROOT`.
 
-1. Get your CUDA and TensorRT installations ready. If you wish to install CUDA and TensorRT on your existing system you may do so by downloading those packages from NVIDIA's website. Or if you wish to work with in a docker container, you can use the TensorRT docker from NVIDIA NGC's catalog. It comes with CUDA and TensorRT pre-installed. Make sure you have setup NGC account properly and that your local docker installation has been logged into nvcr.io domain to be able to pull from that registry. Run the following command to start the container and continue rest of the installation steps in that container. Fill in the local_mount_path and docker_mount_path to reflect any paths on your system which you want to mount inside the container as well. This container comes with Ubuntu 20.04 with Python 3.8.10.
+3. Make sure your CUDA and TensorRT installations are ready. If you wish to install CUDA and TensorRT on your existing system, you may do so by downloading those packages from NVIDIA's website. If you are using docker, use the TensorRT container from NVIDIA NGC. It comes with CUDA and TensorRT pre-installed:
+   1. Run the following command to start the container and continue rest of the steps in that container. Fill in the `CVCUDA_ROOT` with the location where you have cloned this CV-CUDA repository. This will make the samples available inside the container at the `/workspace/cvcuda_samples` path. Also fill in the `CVCUDA_INSTALL` with the location where CV-CUDA installation packages (.deb or .whl files) are stored. This container comes with Ubuntu v22.04, Python v3.10.12 and TensorRT v8.6.1.
 
       ```bash
-      docker run -it --gpus=all -v <local_mount_path>:<docker_mount_path> nvcr.io/nvidia/tensorrt:22.09-py3
+      docker run -it --gpus=all -v <CVCUDA_ROOT>/samples:/workspace/cvcuda_samples -v <CVCUDA_INSTALL>:/workspace/cvcuda_install nvcr.io/nvidia/tensorrt:24.01-py3
       ```
 
-2. Make sure that the other helper scripts present in the `samples/scripts` folder is executable by executing following chmod commands.
+3. Make sure the scripts present in the `/workspace/cvcuda_samples/scripts` directory is executable by executing following chmod commands:
 
    ```bash
-   cd samples
-   chmod a+x scripts/*.sh
-   chmod a+x scripts/*.py
+   cd /workspace/cvcuda_samples/  # Assuming this is where the samples are
+   chmod a+x ./scripts/*.sh
+   chmod a+x ./scripts/*.py
    ```
 
-3. Install all the dependencies required to run the samples. These are mentioned above in the prerequisites section. A convenient script to install all the dependencies is available at `scripts/install_dependencies.sh`. This script may require sudo privileges depending on your setup.
+4. Install all dependencies required to build and/or run the samples. These are mentioned above in the prerequisites section. A convenient script to install all the dependencies is available at `scripts/install_dependencies.sh`.
 
    ```bash
+   cd /workspace/cvcuda_samples/  # Assuming this is where the samples are
    ./scripts/install_dependencies.sh
    ```
 
-4. Install the CV-CUDA packages. Please note that since the above container comes with Python 3.8.10, we will install nvcv-python3.8-0 package as mentioned below. If you have any other Python distributions, you would need to use the appropriate nvcv-python Debian package below.
+5. Install CV-CUDA packages. If you are only interested in running the Python samples, you would be fine installing just the Python wheel. If you are interested in building the non-Python samples from source, the Debian packages are required. Since our docker container has Ubuntu 22.04, CUDA 12 and Python 3.10.12, we will install the corresponding CV-CUDA package as shown below:
+   1. Using the Python wheel (only works for the Python samples):
+      ```bash
+      cd /workspace/cvcuda_install/  # Assuming this is where the installation files are
+      pip install cvcuda_cu12-0.6.0b0-cp310-cp310-linux_x86_64.whl
+      ```
 
-   ```bash
-   dpkg -i nvcv-lib-0.5.0_beta-cuda11-x86_64-linux.deb
-   dpkg -i nvcv-dev-0.5.0_beta-cuda11-x86_64-linux.deb
-   dpkg -i cvcuda-samples-0.5.0_beta-cuda11-x86_64-linux.deb
-   dpkg -i nvcv-python3.8-0.5.0_beta-cuda11-x86_64-linux.deb
-   ```
-5. Copy the samples folder to the target directory.
+   2. OR using the Debian packages (required to build the non-Python samples from source, also works for the Python samples):
 
-   ```bash
-   cp -rf /opt/nvidia/cvcuda*/samples ~/
-   cd ~/samples
-   ```
+      ```bash
+      cd /workspace/cvcuda_install/  # Assuming this is where the installation files are
+      dpkg -i cvcuda-lib-0.6.0_beta-cuda12-x86_64-linux.deb
+      dpkg -i cvcuda-dev-0.6.0_beta-cuda12-x86_64-linux.deb
+      dpkg -i cvcuda-python3.10-0.6.0_beta-cuda12-x86_64-linux.deb
+      ```
+
+## Build the samples from source (Not required for Python samples)
 
-6. Build the samples (whichever sample requires a build)
+1. After following the [Setting up the environment](#setting-up-the-environment) section, execute the following command to compile the samples from source. This only applies to C++ samples. Python samples do not require any compilation.
 
    ```bash
-   ./scripts/build_samples.sh
+   cd /workspace/cvcuda_samples/  # Assuming this is where the samples are
+   ./scripts/build_samples.sh  # Writes build files in /workspace/cvcuda_samples/build
    ```
 
-7. Run all the samples on by one. The `run_samples.sh` script conveniently runs all the samples in one shot. Some samples may use the TensorRT backend to run the inference and it may require a serialization step to convert a PyTorch model into a TensorRT model. This step should take some time depending on the GPUs used but usually it is only done once during the first run of the sample. The `run_samples.sh` script is supplied to serve only as a basic test case to test the samples under most frequently used command line parameters. It does not cover all the settings and command line parameters a sample may have to offer. Please explore and run the samples individually to explore all the capabilities of the samples.
+## Run the samples
+
+1. After following the [Setting up the environment](#setting-up-the-environment) section and compiling them from source, one can run the samples manually one by one or use the `scripts/run_samples.sh` script to run all samples in one shot. Some samples uses the TensorRT back-end to run the inference and it may require a serialization step to convert a PyTorch model into a TensorRT model. This step should take some time depending on the GPU used but usually it is only done once during the first run of the sample. The `scripts/run_samples.sh` script is supplied to serve only as a basic test case to test the samples under most frequently used command line parameters. It does not cover all the settings and command line parameters a sample may have to offer. Please explore and run the samples individually to explore all the capabilities of the samples.
 
    ```bash
+   cd /workspace/cvcuda_samples/  # Assuming this is where the samples are and built samples are in /workspace/cvcuda_samples/build
    ./scripts/run_samples.sh
    ```
 
-## Performance Benchmarking
+## Performance Benchmarking of the samples
 
-See the [Performance Benchmarking](scripts/README.md) documentation.
+See the [Performance Benchmarking](scripts/README.md) documentation to understand how to benchmark the samples.
diff --git a/samples/classification/CMakeLists.txt b/samples/classification/CMakeLists.txt
index 3b27da7b5..a74715d35 100644
--- a/samples/classification/CMakeLists.txt
+++ b/samples/classification/CMakeLists.txt
@@ -18,13 +18,13 @@ find_package(CUDA REQUIRED)
 set(CMAKE_CXX_FLAGS "-Wno-deprecated-enum-enum-conversion")
 
 # tag: Build classification sample
-add_executable(nvcv_samples_classification Main.cpp)
-target_link_libraries(nvcv_samples_classification nvcv_types cvcuda CUDA::cudart nvcv_samples_common)
+add_executable(cvcuda_sample_classification Main.cpp)
+target_link_libraries(cvcuda_sample_classification nvcv_types cvcuda CUDA::cudart cvcuda_samples_common)
 
-target_include_directories(nvcv_samples_classification
+target_include_directories(cvcuda_sample_classification
 	PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
 
-install(TARGETS nvcv_samples_classification
-        EXPORT nvcv_samples_classification
+install(TARGETS cvcuda_sample_classification
+        EXPORT cvcuda_sample_classification
         COMPONENT samples
         DESTINATION samples/bin)
diff --git a/samples/classification/Main.cpp b/samples/classification/Main.cpp
index ba9c3620f..073c1716f 100644
--- a/samples/classification/Main.cpp
+++ b/samples/classification/Main.cpp
@@ -198,7 +198,7 @@ int main(int argc, char *argv[])
     std::string labelPath = "./engines/imagenet-classes.txt";
     uint32_t    batchSize = 1;
 
-    // Parse the command line paramaters to override the default parameters
+    // Parse the command line parameters to override the default parameters
     int retval = ParseArgs(argc, argv, modelPath, imagePath, labelPath, batchSize);
     if (retval != 0)
     {
diff --git a/samples/classification/python/main.py b/samples/classification/python/main.py
index 5e3d53dc1..f12c95f5a 100644
--- a/samples/classification/python/main.py
+++ b/samples/classification/python/main.py
@@ -38,10 +38,9 @@
     parse_validate_default_args,
 )
 
-from torch_utils import ImageBatchDecoderPyTorch  # noqa: E402
-
-from vpf_utils import (  # noqa: E402
-    VideoBatchDecoderVPF,
+from nvcodec_utils import (  # noqa: E402
+    VideoBatchDecoder,
+    ImageBatchDecoder,
 )
 
 from pipelines import (  # noqa: E402
@@ -92,7 +91,7 @@ def run_sample(
 
     if os.path.splitext(input_path)[1] == ".jpg" or os.path.isdir(input_path):
         # Treat this as data modality of images
-        decoder = ImageBatchDecoderPyTorch(
+        decoder = ImageBatchDecoder(
             input_path,
             batch_size,
             device_id,
@@ -102,7 +101,7 @@ def run_sample(
 
     else:
         # Treat this as data modality of videos
-        decoder = VideoBatchDecoderVPF(
+        decoder = VideoBatchDecoder(
             input_path,
             batch_size,
             device_id,
diff --git a/samples/common/CMakeLists.txt b/samples/common/CMakeLists.txt
index 3435f6a03..a114213e8 100644
--- a/samples/common/CMakeLists.txt
+++ b/samples/common/CMakeLists.txt
@@ -13,15 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-project(nvcv_samples_common LANGUAGES CXX)
+project(cvcuda_samples_common LANGUAGES CXX)
 
-add_library(nvcv_samples_common SHARED
+add_library(cvcuda_samples_common SHARED
                                 TRTUtils.cpp
 			        NvDecoder.cpp)
-target_compile_options(nvcv_samples_common PRIVATE -Wno-deprecated-declarations -Wno-missing-declarations)
-target_link_libraries(nvcv_samples_common nvcv_types cvcuda CUDA::cudart TensorRT::nvinfer CUDA::nvjpeg)
+target_compile_options(cvcuda_samples_common PRIVATE -Wno-deprecated-declarations -Wno-missing-declarations)
+target_link_libraries(cvcuda_samples_common nvcv_types cvcuda CUDA::cudart TensorRT::nvinfer CUDA::nvjpeg)
 
-install(TARGETS nvcv_samples_common
-        EXPORT nvcv_samples_common
+install(TARGETS cvcuda_samples_common
+        EXPORT cvcuda_samples_common
         COMPONENT samples
 	DESTINATION samples/lib)
diff --git a/samples/common/python/nvcodec_utils.py b/samples/common/python/nvcodec_utils.py
new file mode 100644
index 000000000..2a300d385
--- /dev/null
+++ b/samples/common/python/nvcodec_utils.py
@@ -0,0 +1,641 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+nvcodec_utils
+
+This file hosts various helpers for NV codecs exist.
+"""
+
+
+import os
+import sys
+import av
+import logging
+import glob
+import numpy as np
+import torch
+import nvcv
+import cvcuda
+from fractions import Fraction
+import itertools
+import PyNvVideoCodec as nvvc
+from nvidia import nvimgcodec
+
+from pathlib import Path
+
+# Bring module folders from the samples directory into our path so that
+# we can import modules from it.
+samples_dir = Path(os.path.abspath(__file__)).parents[2]  # samples/
+sys.path.insert(0, os.path.join(samples_dir, ""))
+
+from common.python.batch import Batch  # noqa: E402
+
+pixel_format_to_cvcuda_code = {
+    nvvc.Pixel_Format.YUV444: cvcuda.ColorConversion.YUV2RGB,
+    nvvc.Pixel_Format.NV12: cvcuda.ColorConversion.YUV2RGB_NV12,
+}
+
+
+class AppCAI:
+    def __init__(self, shape, stride, typestr, gpualloc):
+        self.__cuda_array_interface__ = {
+            "shape": shape,
+            "strides": stride,
+            "data": (int(gpualloc), False),
+            "typestr": typestr,
+            "version": 3,
+        }
+
+
+# docs_tag: begin_videobatchdecoder_pyvideocodec
+class VideoBatchDecoder:
+    def __init__(
+        self,
+        input_path,
+        batch_size,
+        device_id,
+        cuda_ctx,
+        cvcuda_perf,
+    ):
+        # docs_tag: begin_init_videobatchdecoder_pyvideocodec
+        self.logger = logging.getLogger(__name__)
+        self.input_path = input_path
+        self.batch_size = batch_size
+        self.device_id = device_id
+        self.cuda_ctx = cuda_ctx
+        self.cuda_stream = cvcuda.Stream().current
+        self.cvcuda_perf = cvcuda_perf
+        self.total_decoded = 0
+        self.batch_idx = 0
+        self.decoder = None
+        self.cvcuda_RGBtensor_batch = None
+        nvDemux = nvvc.PyNvDemuxer(self.input_path)
+        self.fps = nvDemux.FrameRate()
+        self.logger.info("Using PyNvVideoCodec decoder version: %s" % nvvc.__version__)
+        # docs_tag: end_init_videobatchdecoder_pyvideocodec
+
+    # docs_tag: begin_call_videobatchdecoder_pyvideocodec
+    def __call__(self):
+        self.cvcuda_perf.push_range("decoder.pyVideoCodec")
+
+        # docs_tag: begin_alloc_videobatchdecoder_pyvideocodec
+        # Check if we need to allocate the decoder for its first use.
+        if self.decoder is None:
+            self.decoder = nvVideoDecoder(
+                self.input_path, self.device_id, self.cuda_ctx, self.cuda_stream
+            )
+        # docs_tag: end_alloc_videobatchdecoder_pyvideocodec
+
+        # docs_tag: begin_decode_videobatchdecoder_pyvideocodec
+        # Get the NHWC YUV tensor from the decoder
+        cvcuda_YUVtensor = self.decoder.get_next_frames(self.batch_size)
+
+        # Check if we are done decoding
+        if cvcuda_YUVtensor is None:
+            self.cvcuda_perf.pop_range()
+            return None
+
+        # Check the code for the color conversion based in the pixel format
+        cvcuda_code = pixel_format_to_cvcuda_code.get(self.decoder.pixelFormat)
+        if cvcuda_code is None:
+            raise ValueError(f"Unsupported pixel format: {self.decoder.pixelFormat}")
+
+        # Check layout to make sure it is what we expected
+        if cvcuda_YUVtensor.layout != "NHWC":
+            raise ValueError("Unexpected tensor layout, NHWC expected.")
+
+        # this may be different than batch size since last frames may not be a multiple of batch size
+        actual_batch_size = cvcuda_YUVtensor.shape[0]
+
+        # docs_tag: end_decode_videobatchdecoder_pyvideocodec
+
+        # docs_tag: begin_convert_videobatchdecoder_pyvideocodec
+        # Create a CVCUDA tensor for color conversion YUV->RGB
+        # Allocate only for the first time or for the last batch.
+        if not self.cvcuda_RGBtensor_batch or actual_batch_size != self.batch_size:
+            self.cvcuda_RGBtensor_batch = cvcuda.Tensor(
+                (actual_batch_size, self.decoder.h, self.decoder.w, 3),
+                nvcv.Type.U8,
+                nvcv.TensorLayout.NHWC,
+            )
+
+        # Convert from YUV to RGB. Conversion code is based on the pixel format.
+        cvcuda.cvtcolor_into(self.cvcuda_RGBtensor_batch, cvcuda_YUVtensor, cvcuda_code)
+
+        self.total_decoded += actual_batch_size
+        # docs_tag: end_convert_videobatchdecoder_pyvideocodec
+
+        # docs_tag: begin_batch_videobatchdecoder_pyvideocodec
+        # Create a batch instance and set its properties.
+        batch = Batch(
+            batch_idx=self.batch_idx,
+            data=self.cvcuda_RGBtensor_batch,
+            fileinfo=self.input_path,
+        )
+        self.batch_idx += 1
+
+        self.cvcuda_perf.pop_range()
+        return batch
+        # docs_tag: end_call_videobatchdecoder_pyvideocodec
+
+    def start(self):
+        pass
+
+    def join(self):
+        pass
+
+
+# docs_tag: end_videobatchdecoder_pyvideocodec
+
+# docs_tag: begin_imp_nvvideodecoder
+class nvVideoDecoder:
+    def __init__(self, enc_file, device_id, cuda_ctx, stream):
+        """
+        Create instance of HW-accelerated video decoder.
+        :param enc_file: Full path to the MP4 file that needs to be decoded.
+        :param device_id: id of video card which will be used for decoding & processing.
+        :param cuda_ctx: A cuda context object.
+        """
+        self.device_id = device_id
+        self.cuda_ctx = cuda_ctx
+        self.input_path = enc_file
+        self.stream = stream
+        # Demuxer is instantiated only to collect required information about
+        # certain video file properties.
+        self.nvDemux = nvvc.PyNvDemuxer(self.input_path)
+        self.nvDec = nvvc.CreateDecoder(
+            gpuid=0,
+            codec=self.nvDemux.GetNvCodecId(),
+            cudacontext=self.cuda_ctx.handle,
+            cudastream=self.stream.handle,
+            enableasyncallocations=False,
+        )
+
+        self.w, self.h = self.nvDemux.Width(), self.nvDemux.Height()
+        self.pixelFormat = self.nvDec.GetPixelFormat()
+        # In case sample aspect ratio isn't 1:1 we will re-scale the decoded
+        # frame to maintain uniform 1:1 ratio across the pipeline.
+        sar = 8.0 / 9.0
+        self.fixed_h = self.h
+        self.fixed_w = int(self.w * sar)
+
+    # frame iterator
+    def generate_decoded_frames(self):
+        for packet in self.nvDemux:
+            for decodedFrame in self.nvDec.Decode(packet):
+                nvcvTensor = nvcv.as_tensor(
+                    nvcv.as_image(decodedFrame.nvcv_image(), nvcv.Format.U8)
+                )
+                if nvcvTensor.layout == "NCHW":
+                    # This will re-format the NCHW tensor to a NHWC tensor which will create
+                    # a copy in the CUDA device decoded frame will go out of scope and the
+                    # backing memory will be available by the decoder.
+                    yield cvcuda.reformat(nvcvTensor, "NHWC")
+                else:
+                    raise ValueError("Unexpected tensor layout, NCHW expected.")
+
+    def get_next_frames(self, N):
+        decoded_frames = list(itertools.islice(self.generate_decoded_frames(), N))
+        if len(decoded_frames) == 0:
+            return None
+        elif len(decoded_frames) == 1:  # this case we dont need stack the tensor
+            return decoded_frames[0]
+        else:
+            # convert from list of tensors to a single tensor (NHWC)
+            tensorNHWC = cvcuda.stack(decoded_frames)
+            return tensorNHWC
+
+
+# docs_tag: end_imp_nvvideodecoder
+
+# docs_tag: begin_init_videobatchencoder_pyvideocodec
+class VideoBatchEncoder:
+    def __init__(
+        self,
+        output_path,
+        fps,
+        device_id,
+        cuda_ctx,
+        cvcuda_perf,
+    ):
+        self.logger = logging.getLogger(__name__)
+        self.output_path = output_path
+        self.fps = fps
+        self.device_id = device_id
+        self.cuda_ctx = cuda_ctx
+        self.cuda_stream = cvcuda.Stream().current
+        self.cvcuda_perf = cvcuda_perf
+
+        self.encoder = None
+        self.cvcuda_HWCtensor_batch = None
+        self.cvcuda_YUVtensor_batch = None
+        self.input_layout = "NCHW"
+        self.gpu_input = True
+        self.output_file_name = None
+
+        self.logger.info("Using PyNvVideoCodec encoder version: %s" % nvvc.__version__)
+        # docs_tag: end_init_videobatchencoder_pyvideocodec
+
+    # docs_tag: begin_call_videobatchencoder_pyvideocodec
+    def __call__(self, batch):
+        self.cvcuda_perf.push_range("encoder.pyVideoCodec")
+
+        # Get the name of the original video file read by the decoder. We would use
+        # the same filename to save the output video.
+        file_name = os.path.splitext(os.path.basename(batch.fileinfo))[0]
+        self.output_file_name = os.path.join(self.output_path, "out_%s.mp4" % file_name)
+
+        assert isinstance(batch.data, torch.Tensor)
+
+        # docs_tag: begin_alloc_cvcuda_videobatchencoder_pyvideocodec
+        # Check if we need to allocate the encoder for its first use.
+        if self.encoder is None:
+            self.encoder = nvVideoEncoder(
+                self.device_id,
+                batch.data.shape[3],
+                batch.data.shape[2],
+                self.fps,
+                self.output_file_name,
+                self.cuda_ctx,
+                self.cuda_stream,
+                "NV12",
+            )
+        # docs_tag: end_alloc_cvcuda_videobatchencoder_pyvideocodec
+
+        # docs_tag: begin_convert_videobatchencoder_pyvideocodec
+
+        # Create 2 CVCUDA tensors: reformat NCHW->NHWC and color conversion RGB->YUV
+        current_batch_size = batch.data.shape[0]
+        height, width = batch.data.shape[2], batch.data.shape[3]
+
+        # Allocate only for the first time or for the last batch.
+        if (
+            not self.cvcuda_HWCtensor_batch
+            or current_batch_size != self.cvcuda_HWCtensor_batch.shape[0]
+        ):
+            self.cvcuda_HWCtensor_batch = cvcuda.Tensor(
+                (current_batch_size, height, width, 3),
+                nvcv.Type.U8,
+                nvcv.TensorLayout.NHWC,
+            )
+            self.cvcuda_YUVtensor_batch = cvcuda.Tensor(
+                (current_batch_size, (height // 2) * 3, width, 1),
+                nvcv.Type.U8,
+                nvcv.TensorLayout.NHWC,
+            )
+
+        # Convert RGB to NV12, in batch, before sending it over to pyVideoCodec.
+        # Convert to CVCUDA tensor
+        cvcuda_tensor = cvcuda.as_tensor(batch.data, nvcv.TensorLayout.NCHW)
+
+        # Reformat NCHW to NHWC
+        cvcuda.reformat_into(self.cvcuda_HWCtensor_batch, cvcuda_tensor)
+
+        # Color convert from RGB to YUV_NV12
+        cvcuda.cvtcolor_into(
+            self.cvcuda_YUVtensor_batch,
+            self.cvcuda_HWCtensor_batch,
+            cvcuda.ColorConversion.RGB2YUV_NV12,
+        )
+
+        # Convert back to torch tensor we are NV12
+        tensor = torch.as_tensor(self.cvcuda_YUVtensor_batch.cuda(), device="cuda")
+        # docs_tag: end_convert_videobatchencoder_pyvideocodec
+
+        # docs_tag: begin_encode_videobatchencoder_pyvideocodec
+        # Encode frames from the batch one by one using pyVideoCodec.
+        for img_idx in range(tensor.shape[0]):
+            img = tensor[img_idx]
+            self.encoder.encode_from_tensor(img)
+
+        self.cvcuda_perf.pop_range()
+
+    def start(self):
+        pass
+
+    def join(self):
+        self.encoder.flush()
+        self.logger.info("Wrote: %s" % self.output_file_name)
+
+
+# docs_tag: end_init_videobatchencoder_pyvideocodec
+
+# docs_tag: begin_imp_nvvideoencoder
+class nvVideoEncoder:
+    def __init__(
+        self,
+        device_id,
+        width,
+        height,
+        fps,
+        enc_file,
+        cuda_ctx,
+        cuda_stream,
+        format,
+    ):
+        """
+        Create instance of HW-accelerated video encoder.
+        :param device_id: id of video card which will be used for encoding & processing.
+        :param width: encoded frame width.
+        :param height: encoded frame height.
+        :param fps: The FPS at which the encoding should happen.
+        :param enc_file: path to encoded video file.
+        :param cuda_ctx: A cuda context object
+        :param format: The format of the encoded video file.
+                (e.g. "NV12", "YUV444" see NvPyVideoEncoder docs for more info)
+        """
+        self.device_id = device_id
+        self.fps = round(Fraction(fps), 6)
+        self.enc_file = enc_file
+        self.cuda_ctx = cuda_ctx
+        self.cuda_stream = cuda_stream
+
+        self.pts_time = 0
+        self.delta_t = 1  # Increment the packets' timestamp by this much.
+        self.encoded_frame = np.ndarray(shape=(0), dtype=np.uint8)
+        self.container = av.open(enc_file, "w")
+        self.avstream = self.container.add_stream("h264", rate=self.fps)
+
+        aligned_value = 0
+        if width % 16 != 0:
+            aligned_value = 16 - (width % 16)
+        aligned_width = width + aligned_value
+        width = aligned_width
+
+        self.avstream.width = width
+        self.avstream.height = height
+
+        self.avstream.time_base = 1 / Fraction(self.fps)
+        self.surface = None
+        self.surf_plane = None
+
+        self.tmpTensor = None
+
+        self.nvEnc = nvvc.CreateEncoder(
+            self.avstream.width,
+            self.avstream.height,
+            format,
+            codec="h264",
+            preset="P4",
+            cudastream=cuda_stream.handle,
+        )
+
+    def width(self):
+        """
+        Gets the actual video frame width from the encoder.
+        """
+        return self.nvEnc.Width()
+
+    def height(self):
+        """
+        Gets the actual video frame height from the encoder.
+        """
+        return self.nvEnc.Height()
+
+    # docs_tag: begin_imp_nvvideoencoder
+
+    def encode_from_tensor(self, tensor):
+
+        # Create a CUDA array interface object wit 2 planes one for luma and CrCb for NV12
+        objCAI = []
+        # Need to compute the address of the Y plane and the interleaved chroma plane
+        data = (
+            tensor.storage().data_ptr()
+            + tensor.storage_offset() * tensor.element_size()
+        )
+        objCAI.append(
+            AppCAI(
+                (self.avstream.height, self.avstream.width, 1),
+                (self.avstream.width, 1, 1),
+                "|u1",
+                data,
+            )
+        )
+        chromaAlloc = int(data) + self.avstream.width * self.avstream.height
+        objCAI.append(
+            AppCAI(
+                (int(self.avstream.height / 2), int(self.avstream.width / 2), 2),
+                (self.avstream.width, 2, 1),
+                "|u1",
+                chromaAlloc,
+            )
+        )
+        # Encode the frame takes CUDA array interface object as input
+        self.encoded_frame = self.nvEnc.Encode(objCAI)
+        self.write_frame(
+            self.encoded_frame,
+            self.pts_time,
+            self.fps,
+            self.avstream,
+            self.container,
+        )
+        self.pts_time += self.delta_t
+
+    # docs_tag: end_imp_nvvideoencoder
+
+    # docs_tag: begin_writeframe_nvvideoencoder
+    def write_frame(self, encoded_frame, pts_time, fps, stream, container):
+        encoded_bytes = bytearray(encoded_frame)
+        pkt = av.packet.Packet(encoded_bytes)
+        pkt.pts = pts_time
+        pkt.dts = pts_time
+        pkt.stream = stream
+        pkt.time_base = 1 / Fraction(fps)
+        container.mux(pkt)
+
+    # docs_tag: end_writeframe_nvvideoencoder
+
+    def flush(self):
+        encoded_bytes = self.nvEnc.EndEncode()
+        if encoded_bytes:
+            self.write_frame(
+                encoded_bytes,
+                self.pts_time,
+                self.fps,
+                self.avstream,
+                self.container,
+            )
+        self.pts_time += self.delta_t
+        self.container.close()
+
+
+# docs_tag: end_imp_nvvideoencoder
+
+# docs_tag: begin_imagebatchdecoder_nvimagecodec
+class ImageBatchDecoder:
+    def __init__(
+        self,
+        input_path,
+        batch_size,
+        device_id,
+        cuda_ctx,
+        cvcuda_perf,
+    ):
+
+        # docs_tag: begin_init_imagebatchdecoder_nvimagecodec
+        self.logger = logging.getLogger(__name__)
+        self.batch_size = batch_size
+        self.input_path = input_path
+        self.device_id = device_id
+        self.total_decoded = 0
+        self.batch_idx = 0
+        self.cuda_ctx = cuda_ctx
+        self.cuda_stream = cvcuda.Stream().current
+        self.cvcuda_perf = cvcuda_perf
+        self.decoder = nvimgcodec.Decoder(device_id=device_id)
+
+        # docs_tag: begin_parse_imagebatchdecoder_nvimagecodec
+        if os.path.isfile(self.input_path):
+            if os.path.splitext(self.input_path)[1] == ".jpg":
+                # Read the input image file.
+                self.file_names = [self.input_path] * self.batch_size
+                # We will use the nvImageCodec based decoder on the GPU in case of images.
+                # This will be allocated once during the first run or whenever a batch
+                # size change happens.
+            else:
+                raise ValueError("Unable to read file %s as image." % self.input_path)
+
+        elif os.path.isdir(self.input_path):
+            # It is a directory. Grab file names of all JPG images.
+            self.file_names = glob.glob(os.path.join(self.input_path, "*.jpg"))
+            self.logger.info("Found a total of %d JPEG images." % len(self.file_names))
+
+        else:
+            raise ValueError(
+                "Unknown expression given as input_path: %s." % self.input_path
+            )
+
+        # docs_tag: end_parse_imagebatchdecoder_nvimagecodec
+
+        # docs_tag: begin_batch_imagebatchdecoder_nvimagecodec
+        self.file_name_batches = [
+            self.file_names[i : i + self.batch_size]  # noqa: E203
+            for i in range(0, len(self.file_names), self.batch_size)
+        ]
+        # docs_tag: end_batch_imagebatchdecoder_nvimagecodec
+
+        self.max_image_size = 1024 * 1024 * 3  # Maximum possible image size.
+
+        self.logger.info(
+            "Using nvImageCodec decoder version: %s" % nvimgcodec.__version__
+        )
+
+        # docs_tag: end_init_imagebatchdecoder_nvimagecodec
+
+    def __call__(self):
+        if self.total_decoded == len(self.file_names):
+            return None
+
+        # docs_tag: begin_call_imagebatchdecoder_nvimagecodec
+        self.cvcuda_perf.push_range("decoder.nvimagecodec")
+
+        file_name_batch = self.file_name_batches[self.batch_idx]
+
+        data_batch = [open(path, "rb").read() for path in file_name_batch]
+
+        # docs_tag: begin_decode_imagebatchdecoder_nvimagecodec
+
+        tensor_list = []
+        image_list = self.decoder.decode(data_batch, cuda_stream=self.cuda_stream)
+
+        # Convert the decoded images to nvcv tensors in a list.
+        for i in range(len(image_list)):
+            tensor_list.append(cvcuda.as_tensor(image_list[i], "HWC"))
+
+        # Stack the list of tensors to a single NHWC tensor.
+        cvcuda_decoded_tensor = cvcuda.stack(tensor_list)
+        self.total_decoded += len(tensor_list)
+        # docs_tag: end_decode_imagebatchdecoder_nvimagecodec
+
+        # docs_tag: begin_return_imagebatchdecoder_nvimagecodec
+        batch = Batch(
+            batch_idx=self.batch_idx,
+            data=cvcuda_decoded_tensor,
+            fileinfo=file_name_batch,
+        )
+        self.batch_idx += 1
+
+        # docs_tag: end_return_imagebatchdecoder_nvimagecodec
+
+        self.cvcuda_perf.pop_range()
+        # docs_tag: end_call_imagebatchdecoder_nvimagecodec
+        return batch
+
+    def start(self):
+        pass
+
+    def join(self):
+        pass
+
+
+# docs_tag: end_imagebatchdecoder_nvimagecodec
+
+# docs_tag: begin_imagebatchencoder_nvimagecodec
+class ImageBatchEncoder:
+    def __init__(
+        self,
+        output_path,
+        device_id,
+        cvcuda_perf,
+    ):
+        # docs_tag: begin_init_imagebatchencoder_nvimagecodec
+        self.logger = logging.getLogger(__name__)
+        self.encoder = nvimgcodec.Encoder(device_id=device_id)
+        self.input_layout = "NHWC"
+        self.gpu_input = True
+        self.output_path = output_path
+        self.device_id = device_id
+        self.cvcuda_perf = cvcuda_perf
+
+        self.logger.info(
+            "Using nvImageCodec encoder version: %s" % nvimgcodec.__version__
+        )
+        # docs_tag: end_init_init_imagebatchencoder_nvimagecodec
+
+    # docs_tag: begin_call_imagebatchencoder_nvimagecodec
+    def __call__(self, batch):
+        self.cvcuda_perf.push_range("encoder.nvimagecodec")
+
+        assert isinstance(batch.data, torch.Tensor)
+
+        image_tensors_nchw = batch.data
+        # Create an empty list to store filenames
+        filenames = []
+        chwtensor_list = []
+        # Iterate through each image to prepare the filenames
+        for img_idx in range(image_tensors_nchw.shape[0]):
+            img_name = os.path.splitext(os.path.basename(batch.fileinfo[img_idx]))[0]
+            results_path = os.path.join(self.output_path, f"out_{img_name}.jpg")
+            self.logger.info(f"Preparing to save the image to: {results_path}")
+            # Add the filename to the list
+            filenames.append(results_path)
+            # Add the image tensor CAI to a CAI list from an NCHW tensor
+            # (this was a stacked tensor if N images)
+            chwtensor_list.append(image_tensors_nchw[img_idx].cuda())
+
+        # Pass the image tensors and filenames to the encoder.
+        self.encoder.write(filenames, chwtensor_list)
+        self.cvcuda_perf.pop_range()
+        # docs_tag: end_call_imagebatchencoder_nvimagecodec
+
+    def start(self):
+        pass
+
+    def join(self):
+        pass
+
+
+# docs_tag: end_imagebatchencoder_nvimagecodec
diff --git a/samples/common/python/perf_utils.py b/samples/common/python/perf_utils.py
index 1563afd91..7c32a2bdc 100644
--- a/samples/common/python/perf_utils.py
+++ b/samples/common/python/perf_utils.py
@@ -21,12 +21,22 @@
 import sys
 import json
 import logging
+from datetime import datetime
 import argparse
 import subprocess
 from collections import deque
 import cvcuda
 import torch
 import nvtx
+import pandas
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    format="[%(name)s:%(lineno)d] %(asctime)s %(levelname)-6s %(message)s",
+    level=logging.INFO,
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
 
 
 class CvCudaPerf:
@@ -76,6 +86,7 @@ def __init__(
         self.timing_info = {}
         self.batch_info = {}
         self.inside_batch_info = []
+        self.deleted_range_info = []
         self.is_inside_batch = 0
         self.total_batches_processed = {}
         # Check if the benchmark.py script was used to run this. We do so
@@ -116,24 +127,37 @@ def push_range(
             self.stack.append((message, batch_idx))
             self.stack_path = os.path.join(self.stack_path, message)
 
-    def pop_range(self, domain=None, total_items=None):
+    def pop_range(self, domain=None, total_items=None, delete_range=False):
         """
         Pops a code range off of the stack for performance benchmarking.
         :param domain: Name of a domain under which the code range is scoped.
         :param total_items: The number of items processed in this range.
+        :param delete_range: Flag specifying whether the range should be completely deleted
+         instead of just popping it out. This will remove all traces of this range from
+         the benchmarks. Useful if the code being benchmarked fails and one wants to
+         remove its range in that case.
         """
         if self.should_benchmark:
             # Grab the message and optional batch index from the stack.
             message, batch_idx = self.stack.pop()
 
-            self.timing_info[self.stack_path] = (
-                0,
-                0,
-            )  # Placeholders for CPU and GPU times respectively.
-            # Actual timing information will be recorded and pulled from NSYS by a
-            # script like benchmark.py.
-
-            if self.is_inside_batch > 0:
+            if not delete_range:
+                # Add only if this range was not meant for deletion.
+                self.timing_info[self.stack_path] = (
+                    0,
+                    0,
+                )  # Placeholders for CPU and GPU times respectively.
+                # Actual timing information will be recorded and pulled from NSYS by a
+                # script like benchmark.py.
+            else:
+                # This range was meant for deletion. We did not add it to the timing_info
+                # but all the previously added children of this range must also be deleted.
+                # We will do that later in the finalize to avoid costing us time here.
+                # For that, we will save this stack path so that we can remove all the
+                # orphan nodes later.
+                self.deleted_range_info.append(self.stack_path)
+
+            if self.is_inside_batch > 0 and not delete_range:
                 self.inside_batch_info.append(self.stack_path)
 
             # Record the batch information if it was present.
@@ -145,15 +169,19 @@ def pop_range(self, domain=None, total_items=None):
                         "push a batch first by using the batch_idx in the push_range()."
                     )
 
-                self.batch_info[self.stack_path] = (batch_idx, total_items)
-                self.is_inside_batch -= 1
+                self.is_inside_batch -= 1  # Decrement this by one.
 
-                if total_items > 0:
-                    batch_level_prefix = os.path.dirname(self.stack_path)
+                if not delete_range:
+                    # Add to batch info only if this range was not meant for deletion.
+                    self.batch_info[self.stack_path] = (batch_idx, total_items)
 
-                    if batch_level_prefix not in self.total_batches_processed:
-                        self.total_batches_processed[batch_level_prefix] = 0
-                    self.total_batches_processed[batch_level_prefix] += 1
+                    # Maintain a count of the number of items processed in various batches.
+                    if total_items > 0:
+                        batch_level_prefix = os.path.dirname(self.stack_path)
+
+                        if batch_level_prefix not in self.total_batches_processed:
+                            self.total_batches_processed[batch_level_prefix] = 0
+                        self.total_batches_processed[batch_level_prefix] += 1
 
             # Unwind the stack to point to the previous path(i.e. directory like expression)
             # e.g. one level above.
@@ -174,6 +202,15 @@ def finalize(self):
                     " item(s) still not popped." % len(self.stack)
                 )
 
+            # Remove the keys from the timing_info which starts with any key in the
+            # deleted_range_info. That makes sure that we not only delete the current
+            # key but also all of its previous children which were added but not deleted.
+            timing_info_keys = list(self.timing_info.keys())
+            for key_delete in self.deleted_range_info:
+                for k in timing_info_keys:
+                    if k.startswith(key_delete):
+                        self.timing_info.pop(k, None)
+
             # Build a dictionary containing the timing information and some metadata
             # about this run.
             # The overall structure of this would be:
@@ -711,3 +748,210 @@ def parse_validate_default_args(parser):
             raise ValueError("target_img_width must be a value >=10.")
 
     return args
+
+
+def summarize_runs(
+    baseline_run_root,
+    baseline_run_name="baseline",
+    compare_run_roots=[],
+    compare_run_names=[],
+):
+    """
+    Summarizes one or more benchmark runs and prepares a pandas table showing the run per sample run-time
+     and speed-up numbers.
+    :param baseline_run_root: Folder containing one sub-folder per sample in which the benchmark.py
+     styled JSON of the baseline run is stored.
+    :param baseline_run_name: The display name of the column representing the first run in the table.
+    :param compare_run_roots: Optional. A list of folder containing one sub-folder per sample in which the
+     benchmark.py styled JSON of the other runs are stored. These runs are compared with the baseline run.
+    :param compare_run_names: A list of display names of the column representing the comparison runs
+     in the table. This must be of the same length as the `compare_run_json_paths`.
+    :returns: A pandas table with the sample's name and its run time from the baseline run.
+     If compare runs are given, it also returns their run times and the speed-up
+     compared to the baseline run. The speedup is simply the run time of the sample from the compare run
+     divided by its run time from the baseline run. If an sample's run time or speedup factor is not
+     available, it simply puts "N/A".
+    """
+
+    def _parse_json_for_time(json_data):
+        mean_all_batches = json_data["mean_all_batches"]
+        sample_name_key = list(mean_all_batches.keys())[0]
+
+        cpu_time_minus_warmup_per_item = mean_all_batches[sample_name_key][
+            "run_sample"
+        ]["pipeline"]["cpu_time_minus_warmup_per_item"]
+
+        return cpu_time_minus_warmup_per_item
+
+    baseline_perf = {}
+    if os.path.isdir(baseline_run_root):
+        for path in os.listdir(baseline_run_root):
+            if os.path.isdir(os.path.join(baseline_run_root, path)):
+                json_path = os.path.join(baseline_run_root, path, "benchmark_mean.json")
+                if os.path.isfile(json_path):
+                    with open(json_path, "r") as f:
+                        json_data = json.loads(
+                            f.read()
+                        )  # Storing by the name of the sample
+
+                        baseline_perf[path] = _parse_json_for_time(json_data)
+    else:
+        raise ValueError("baseline_run_root does not exist: %s" % baseline_run_root)
+
+    if len(compare_run_roots) != len(compare_run_names):
+        raise ValueError(
+            "Length mismatch between the number of given paths for comparison and"
+            "their run names. %d v/s %d. Each path must have its corresponding run name."
+            % (len(compare_run_roots), len(compare_run_names))
+        )
+
+    # Read all the comparison related JSON files, one by one, if any.
+    compare_perfs = {}
+    for compare_run_root, compare_run_name in zip(compare_run_roots, compare_run_names):
+        if os.path.isdir(compare_run_root):
+            compare_perfs[compare_run_name] = {}
+
+            for path in os.listdir(compare_run_root):
+                if os.path.isdir(os.path.join(compare_run_root, path)):
+                    compare_perfs[compare_run_name][path] = {}
+
+                    json_path = os.path.join(
+                        compare_run_root, path, "benchmark_mean.json"
+                    )
+                    if os.path.isfile(json_path):
+                        with open(json_path, "r") as f:
+                            json_data = json.loads(
+                                f.read()
+                            )  # Storing by the name of the sample
+
+                            compare_perfs[compare_run_name][
+                                path
+                            ] = _parse_json_for_time(json_data)
+        else:
+            raise ValueError("compare_run_root does not exist: %s" % compare_run_root)
+
+    results = []
+
+    for sample_name in baseline_perf:
+        row_dict = {}
+
+        # Fetch the time and parameters from the JSON for baseline run.
+        baseline_run_time = baseline_perf[sample_name]
+
+        row_dict["sample name"] = sample_name
+        row_dict["%s time (ms)" % baseline_run_name] = baseline_run_time
+
+        if compare_perfs:
+            # Fetch the time from the JSON for all comparison runs.
+            for compare_run_name in compare_perfs:
+                # Check if the sample was present.
+                if sample_name in compare_perfs[compare_run_name]:
+                    compare_run_time = compare_perfs[compare_run_name][sample_name]
+                else:
+                    compare_run_time = None
+
+                row_dict["%s time (ms)" % compare_run_name] = (
+                    compare_run_time if compare_run_time else "N/A"
+                )
+
+                if baseline_run_time and compare_run_time:
+                    speedup = round(compare_run_time / baseline_run_time, 3)
+                else:
+                    speedup = "N/A"
+                row_dict[
+                    "%s v/s %s speed-up" % (compare_run_name, baseline_run_name)
+                ] = speedup
+
+        results.append(row_dict)
+
+    df = pandas.DataFrame.from_dict(results)
+
+    return df
+
+
+def main():
+    """
+    The main function. This will run the comparison function to compare two benchmarking runs.
+    """
+    parser = argparse.ArgumentParser("Summarize and compare benchmarking runs.")
+
+    parser.add_argument(
+        "-o",
+        "--output-dir",
+        type=str,
+        required=True,
+        help="The output directory where you want to store the result summary as a CSV file.",
+    )
+
+    parser.add_argument(
+        "-b",
+        "--baseline-root",
+        type=str,
+        required=True,
+        help="Root folder containing one sub-folder per sample in which benchmark.py styled JSONs"
+        " of the baseline runs of those samples are stored.",
+    )
+    parser.add_argument(
+        "-bn",
+        "--baseline-name",
+        type=str,
+        required=True,
+        help="The name of the column representing the baseline run in the output table.",
+    )
+    parser.add_argument(
+        "-c",
+        "--compare-roots",
+        action="append",
+        required=False,
+        help="Optional. List of folders containing one sub-folder per sample in which benchmark.py"
+        " styled JSONs of the comparison runs of those samples are stored.",
+    )
+    parser.add_argument(
+        "-cn",
+        "--compare-names",
+        action="append",
+        required=False,
+        help="Optional. List of names of the column representing the comparison runs in the "
+        "output table",
+    )
+
+    args = parser.parse_args()
+
+    if not os.path.isdir(args.output_dir):
+        raise ValueError("output-dir does not exist: %s" % args.output_dir)
+
+    if not os.path.isdir(args.baseline_root):
+        raise ValueError("baseline-root does not exist: %s" % args.baseline_json)
+
+    if len(args.compare_roots) != len(args.compare_names):
+        raise ValueError(
+            "Length mismatch between the number of given paths for comparison and"
+            "their run names. %d v/s %d. Each path must have its corresponding run name."
+            % (len(args.compare_roots), len(args.compare_names))
+        )
+
+    logger.info(
+        "Summarizing a total of %d runs. All times are in milliseconds"
+        % (len(args.compare_roots) + 1)
+    )
+
+    df = summarize_runs(
+        baseline_run_root=args.baseline_root,
+        baseline_run_name=args.baseline_name,
+        compare_run_roots=args.compare_roots,
+        compare_run_names=args.compare_names,
+    )
+
+    csv_path = os.path.join(
+        args.output_dir,
+        "summarize_runs.%s.csv" % datetime.now(),
+    )
+    df.to_csv(csv_path)
+
+    logger.info("Wrote comparison CSV to: %s" % csv_path)
+
+
+if __name__ == "__main__":
+    # If this was called on its own, we will run the summarize_runs function to summarize and
+    # compare two runs.
+    main()
diff --git a/samples/common/python/torch_utils.py b/samples/common/python/torch_utils.py
deleted file mode 100644
index efc3fa801..000000000
--- a/samples/common/python/torch_utils.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import glob
-import logging
-import torch
-import torchnvjpeg
-import torchvision.transforms.functional as F
-
-from pathlib import Path
-
-# Bring module folders from the samples directory into our path so that
-# we can import modules from it.
-samples_dir = Path(os.path.abspath(__file__)).parents[2]  # samples/
-sys.path.insert(0, os.path.join(samples_dir, ""))
-
-from common.python.batch import Batch  # noqa: E402
-
-# docs_tag: begin_init_imagebatchdecoder_pytorch
-
-
-class ImageBatchDecoderPyTorch:
-    def __init__(
-        self,
-        input_path,
-        batch_size,
-        device_id,
-        cuda_ctx,
-        cvcuda_perf,
-    ):
-        self.logger = logging.getLogger(__name__)
-        self.batch_size = batch_size
-        self.input_path = input_path
-        self.device_id = device_id
-        self.total_decoded = 0
-        self.batch_idx = 0
-        self.cuda_ctx = cuda_ctx
-        self.cvcuda_perf = cvcuda_perf
-
-        if os.path.isfile(self.input_path):
-            if os.path.splitext(self.input_path)[1] == ".jpg":
-                # Read the input image file.
-                self.file_names = [self.input_path] * self.batch_size
-                # We will use the torchnvjpeg based decoder on the GPU in case of images.
-                # This will be allocated once during the first run or whenever a batch
-                # size change happens.
-                self.decoder = None
-            else:
-                raise ValueError("Unable to read file %s as image." % self.input_path)
-
-        elif os.path.isdir(self.input_path):
-            # It is a directory. Grab file names of all JPG images.
-            self.decoder = None
-            self.file_names = glob.glob(os.path.join(self.input_path, "*.jpg"))
-            self.logger.info("Found a total of %d JPEG images." % len(self.file_names))
-
-        else:
-            raise ValueError(
-                "Unknown expression given as input_path: %s." % self.input_path
-            )
-
-        # docs_tag: end_parse_imagebatchdecoder_pytorch
-
-        # docs_tag: begin_batch_imagebatchdecoder_pytorch
-        self.file_name_batches = [
-            self.file_names[i : i + self.batch_size]  # noqa: E203
-            for i in range(0, len(self.file_names), self.batch_size)
-        ]
-
-        self.max_image_size = 1024 * 1024 * 3  # Maximum possible image size.
-
-        self.logger.info("Using torchnvjpeg as decoder.")
-
-        # docs_tag: end_init_imagebatchdecoder_pytorch
-
-    def __call__(self):
-        if self.total_decoded == len(self.file_names):
-            return None
-
-        # docs_tag: begin_call_imagebatchdecoder_pytorch
-        self.cvcuda_perf.push_range("decoder.torch")
-
-        file_name_batch = self.file_name_batches[self.batch_idx]
-        effective_batch_size = len(file_name_batch)
-        data_batch = [open(path, "rb").read() for path in file_name_batch]
-
-        # docs_tag: end_read_imagebatchdecoder_pytorch
-
-        # docs_tag: begin_decode_imagebatchdecoder_pytorch
-        if not self.decoder or effective_batch_size != self.batch_size:
-            decoder = torchnvjpeg.Decoder(
-                device_padding=0,
-                host_padding=0,
-                gpu_huffman=True,
-                device_id=self.device_id,
-                bath_size=effective_batch_size,
-                max_cpu_threads=8,  # this is max_cpu_threads parameter. Not used internally.
-                max_image_size=self.max_image_size,
-                stream=None,
-            )
-
-        image_tensor_list = decoder.batch_decode(data_batch)
-
-        # Convert the list of tensors to a tensor itself.
-        image_tensors_nhwc = torch.stack(image_tensor_list)
-
-        self.total_decoded += len(image_tensor_list)
-        # docs_tag: end_decode_imagebatchdecoder_pytorch
-
-        # docs_tag: begin_return_imagebatchdecoder_pytorch
-        batch = Batch(
-            batch_idx=self.batch_idx, data=image_tensors_nhwc, fileinfo=file_name_batch
-        )
-        self.batch_idx += 1
-
-        self.cvcuda_perf.pop_range()
-
-        return batch
-        # docs_tag: end_return_imagebatchdecoder_pytorch
-
-    def start(self):
-        pass
-
-    def join(self):
-        pass
-
-
-# docs_tag: begin_init_imagebatchencoder_pytorch
-class ImageBatchEncoderPyTorch:
-    def __init__(
-        self,
-        output_path,
-        fps,
-        device_id,
-        cuda_ctx,
-        cvcuda_perf,
-    ):
-        self.logger = logging.getLogger(__name__)
-        self._encoder = None
-        self.input_layout = "NCHW"
-        self.gpu_input = True
-        self.output_path = output_path
-        self.device_id = device_id
-        self.cvcuda_perf = cvcuda_perf
-
-        self.logger.info("Using PyTorch/PIL as encoder.")
-        # docs_tag: end_init_imagebatchencoder_pytorch
-
-    # docs_tag: begin_call_imagebatchencoder_pytorch
-    def __call__(self, batch):
-        self.cvcuda_perf.push_range("encoder.torch")
-
-        image_tensors_nchw = batch.data
-
-        # Bring the image_tensors_nchw to CPU and convert it to a PIL
-        # image and save those.
-        for img_idx in range(image_tensors_nchw.shape[0]):
-            img_name = os.path.splitext(os.path.basename(batch.fileinfo[img_idx]))[0]
-            results_path = os.path.join(self.output_path, "out_%s.jpg" % img_name)
-            self.logger.info("Saving the overlay result to: %s" % results_path)
-            overlay_cpu = image_tensors_nchw[img_idx].detach().cpu()
-            overlay_pil = F.to_pil_image(overlay_cpu)
-            overlay_pil.save(results_path)
-
-        self.cvcuda_perf.pop_range()
-
-        # docs_tag: end_call_imagebatchencoder_pytorch
-
-    def start(self):
-        pass
-
-    def join(self):
-        pass
diff --git a/samples/common/python/vpf_utils.py b/samples/common/python/vpf_utils.py
index da626887f..d688d7944 100644
--- a/samples/common/python/vpf_utils.py
+++ b/samples/common/python/vpf_utils.py
@@ -39,525 +39,6 @@
 samples_dir = Path(os.path.abspath(__file__)).parents[2]  # samples/
 sys.path.insert(0, os.path.join(samples_dir, ""))
 
-from common.python.batch import Batch  # noqa: E402
-
-# docs_tag: begin_init_videobatchdecoder_vpf
-
-
-class VideoBatchDecoderVPF:
-    def __init__(
-        self,
-        input_path,
-        batch_size,
-        device_id,
-        cuda_ctx,
-        cvcuda_perf,
-    ):
-        self.logger = logging.getLogger(__name__)
-        self.input_path = input_path
-        self.batch_size = batch_size
-        self.device_id = device_id
-        self.cuda_ctx = cuda_ctx
-        self.cvcuda_perf = cvcuda_perf
-
-        # Demuxer is instantiated only to collect required information about
-        # certain video file properties.
-        nvDemux = nvc.PyFFmpegDemuxer(self.input_path)
-        self.fps = nvDemux.Framerate()
-        self.total_frames = nvDemux.Numframes()
-        self.total_decoded = 0
-        self.batch_idx = 0
-
-        # We use VPF to do video decoding. This instance will be allocated when the first
-        # batch comes in.
-        self.decoder = None
-
-        # We would use VPF for video encoding/decoding, and CVCUDA to do color conversions
-        # to and from RGB to NV12 format. These formats are required by VPF to encode/decode
-        # video streams. Since CVCUDA can do these conversions much faster on a batch level
-        # and since VPF does not work on batches, we would perform these conversions here
-        # in this class using CVCUDA. We would pre-allocate the memory required by these
-        # conversions upon the first use or whenever the batch size changes. This would allow
-        # us to use the 'into' versions of CVCUDA operators without allocating/de-allocating
-        # memory on every batch. We need to be mindful of the following things when dealing
-        # with NV12 format in CVCUDA:
-        # NV12 is a complex format and it is not tensor friendly so libraries use a workaround
-        # to put the NV12 in a "matrix" form. They put the YUV from NV12 as 3/2 height
-        # 1 height is Y luma that is full resolution
-        # 1/2 height is UV chroma that is 2x2 down-scaled
-        # Hence you would see YUV's H dimension 1.5 times the RGB's H dimension.
-        self.cvcuda_RGBtensor_batch = None
-
-        self.logger.info("Using VPF as decoder.")
-        # docs_tag: end_init_videobatchdecoder_vpf
-
-    # docs_tag: begin_call_videobatchdecoder_vpf
-    def __call__(self):
-        # Check if we have reached the end of the stream. If so, simply return None.
-        if self.total_decoded == self.total_frames:
-            return None
-
-        self.cvcuda_perf.push_range("decoder.vpf")
-
-        # Check if we need to allocate the decoder for its first use.
-        if self.decoder is None:
-            self.decoder = nvdecoder(
-                self.input_path,
-                self.device_id,
-                self.cuda_ctx,
-            )
-
-        # docs_tag: end_alloc_videobatchdecoder_vpf
-
-        # docs_tag: begin_decode_videobatchdecoder_vpf
-        # If we are in the last batch size, the total frames left to decode may be
-        # less than equal to the batch size.
-        if self.total_decoded + self.batch_size > self.total_frames:
-            actual_batch_size = self.total_frames - self.total_decoded
-        else:
-            actual_batch_size = self.batch_size
-
-        # Decode each frame one by one and put them in a list.
-        frame_list = [self.decoder.decode_to_tensor() for x in range(actual_batch_size)]
-
-        # Convert 3D list to 4D torch tensor.
-        image_tensor_nhwc = torch.stack(frame_list)
-        # docs_tag: end_decode_videobatchdecoder_vpf
-
-        # docs_tag: begin_convert_videobatchdecoder_vpf
-        # Create a CVCUDA tensor for color conversion YUV->RGB
-        # Allocate only for the first time or for the last batch.
-        if not self.cvcuda_RGBtensor_batch or actual_batch_size != self.batch_size:
-            self.cvcuda_RGBtensor_batch = cvcuda.Tensor(
-                (actual_batch_size, self.decoder.h, self.decoder.w, 3),
-                nvcv.Type.U8,
-                nvcv.TensorLayout.NHWC,
-            )
-
-        # Add the batch dim at the end to make it W,H,1 from W,H
-        image_tensor_nhwc = torch.unsqueeze(image_tensor_nhwc, -1)
-        # Make it a CVCUDA Tensor, C will be 1.
-        cvcuda_YUVtensor = cvcuda.as_tensor(image_tensor_nhwc, nvcv.TensorLayout.NHWC)
-        # Convert from YUV to RGB. This will be NHWC.
-        cvcuda.cvtcolor_into(
-            self.cvcuda_RGBtensor_batch, cvcuda_YUVtensor, self.decoder.cvcuda_code
-        )
-        self.total_decoded += len(frame_list)
-        # docs_tag: end_convert_videobatchdecoder_vpf
-
-        # docs_tag: begin_batch_videobatchdecoder_vpf
-        # Create a batch instance and set its properties.
-        batch = Batch(
-            batch_idx=self.batch_idx,
-            data=self.cvcuda_RGBtensor_batch,
-            fileinfo=self.input_path,
-        )
-        self.batch_idx += 1
-
-        self.cvcuda_perf.pop_range()
-
-        return batch
-        # docs_tag: end_batch_videobatchdecoder_vpf
-
-    def start(self):
-        pass
-
-    def join(self):
-        pass
-
-
-# docs_tag: begin_init_videobatchencoder_vpf
-class VideoBatchEncoderVPF:
-    def __init__(
-        self,
-        output_path,
-        fps,
-        device_id,
-        cuda_ctx,
-        cvcuda_perf,
-    ):
-        self.logger = logging.getLogger(__name__)
-        self.output_path = output_path
-        self.fps = fps
-        self.device_id = device_id
-        self.cuda_ctx = cuda_ctx
-        self.cvcuda_perf = cvcuda_perf
-
-        # We use VPF to do video encoding. This instance will be allocated when the first
-        # batch comes in.
-        self.encoder = None
-
-        # We would use VPF for video encoding/decoding, and CVCUDA to do color conversions
-        # to and from RGB to NV12 format. These formats are required by VPF to encode/decode
-        # video streams. Since CVCUDA can do these conversions much faster on a batch level
-        # and since VPF does not work on batches, we would perform these conversions here
-        # in this class using CVCUDA. We would pre-allocate the memory required by these
-        # conversions upon the first use or whenever the batch size changes. This would allow
-        # us to use the 'into' versions of CVCUDA operators without allocating/deallocating
-        # memory on every batch. We need to be mindful of the following things when dealing
-        # with NV12 format in CVCUDA:
-        # NV12 is a complex format and it is not tensor friendly so libraries use a workaround
-        # to put the NV12 in a "matrix" form. They put the YUV from NV12 as 3/2 height
-        # 1 height is Y luma that is full resolution
-        # 1/2 height is UV chroma that is 2x2 down-scaled
-        # Hence you would see YUV's H dimension 1.5 times the RGB's H dimension.
-        self.cvcuda_HWCtensor_batch = None
-        self.cvcuda_YUVtensor_batch = None
-        self.input_layout = "NCHW"
-        self.gpu_input = True
-        self.output_file_name = None
-
-        self.logger.info("Using VPF as encoder.")
-        # docs_tag: end_init_videobatchencoder_vpf
-
-    # docs_tag: begin_call_videobatchencoder_vpf
-    def __call__(self, batch):
-        self.cvcuda_perf.push_range("encoder.vpf")
-
-        # Get the name of the original video file read by the decoder. We would use
-        # the same filename to save the output video.
-        file_name = os.path.splitext(os.path.basename(batch.fileinfo))[0]
-        self.output_file_name = os.path.join(self.output_path, "out_%s.mp4" % file_name)
-
-        # Check if we need to allocate the encoder for its first use.
-        if self.encoder is None:
-            self.encoder = nvencoder(
-                self.device_id,
-                batch.data.shape[3],
-                batch.data.shape[2],
-                self.fps,
-                self.output_file_name,
-                self.cuda_ctx,
-            )
-
-        # docs_tag: end_alloc_videobatchdecoder_vpf
-
-        # docs_tag: begin_alloc_cvcuda_videobatchdecoder_vpf
-        # Create 2 CVCUDA tensors: reformat NCHW->NHWC and color conversion RGB->YUV
-        current_batch_size = batch.data.shape[0]
-        height, width = batch.data.shape[2], batch.data.shape[3]
-        # Allocate only for the first time or for the last batch.
-        if (
-            not self.cvcuda_HWCtensor_batch
-            or current_batch_size != self.cvcuda_HWCtensor_batch.shape[0]
-        ):
-            self.cvcuda_HWCtensor_batch = cvcuda.Tensor(
-                (current_batch_size, height, width, 3),
-                nvcv.Type.U8,
-                nvcv.TensorLayout.NHWC,
-            )
-            self.cvcuda_YUVtensor_batch = cvcuda.Tensor(
-                (current_batch_size, (height // 2) * 3, width, 1),
-                nvcv.Type.U8,
-                nvcv.TensorLayout.NHWC,
-            )
-        # docs_tag: end_alloc_cvcuda_videobatchdecoder_vpf
-
-        # docs_tag: begin_convert_videobatchencoder_vpf
-        # Convert RGB to NV12, in batch, before sending it over to VPF.
-        # Convert to CVCUDA tensor
-        cvcuda_tensor = cvcuda.as_tensor(batch.data, nvcv.TensorLayout.NCHW)
-        # Reformat
-        cvcuda.reformat_into(self.cvcuda_HWCtensor_batch, cvcuda_tensor)
-        # Color convert from RGB to YUV_NV12
-        cvcuda.cvtcolor_into(
-            self.cvcuda_YUVtensor_batch,
-            self.cvcuda_HWCtensor_batch,
-            cvcuda.ColorConversion.RGB2YUV_NV12,
-        )
-
-        # Convert back to torch tensor
-        tensor = torch.as_tensor(self.cvcuda_YUVtensor_batch.cuda(), device="cuda")
-
-        # docs_tag: end_convert_videobatchencoder_vpf
-
-        # docs_tag: begin_encode_videobatchencoder_vpf
-        # Encode frames from the batch one by one using VPF.
-        for img_idx in range(tensor.shape[0]):
-            img = tensor[img_idx]
-            self.encoder.encode_from_tensor(img)
-
-        self.cvcuda_perf.pop_range()
-
-        # docs_tag: end_encode_videobatchencoder_vpf
-
-    def start(self):
-        pass
-
-    def join(self):
-        self.encoder.flush()
-        self.logger.info("Wrote: %s" % self.output_file_name)
-
-
-class nvdecoder:
-    def __init__(
-        self,
-        enc_file,
-        device_id,
-        cuda_ctx,
-    ):
-        """
-        Create instance of HW-accelerated video decoder.
-        :param enc_file: Full path to the MP4 file that needs to be decoded.
-        :param device_id: id of video card which will be used for decoding & processing.
-        :param cuda_ctx: A cuda context object.
-        """
-        self.device_id = device_id
-        self.cuda_ctx = cuda_ctx
-        # Demuxer is instantiated only to collect required information about
-        # certain video file properties.
-        nvDemux = nvc.PyFFmpegDemuxer(enc_file)
-        self.w, self.h = nvDemux.Width(), nvDemux.Height()
-        self.fps = nvDemux.Framerate()
-        self.total_frames = nvDemux.Numframes()
-
-        # In case sample aspect ratio isn't 1:1 we will re-scale the decoded
-        # frame to maintain uniform 1:1 ratio across the pipeline.
-        sar = 8.0 / 9.0
-        self.fixed_h = self.h
-        self.fixed_w = int(self.w * sar)
-
-        self.pix_fmt = nvDemux.Format()
-        is_yuv420 = (
-            nvc.PixelFormat.YUV420 == self.pix_fmt
-            or nvc.PixelFormat.NV12 == self.pix_fmt
-        )
-        is_yuv444 = nvc.PixelFormat.YUV444 == self.pix_fmt
-
-        # Set CVCUDA color conversion code to do YUV->RGB
-        self.cvcuda_code = None
-        if is_yuv420:
-            self.cvcuda_code = cvcuda.ColorConversion.YUV2RGB_NV12
-        elif is_yuv444:
-            self.cvcuda_code = cvcuda.ColorConversion.YUV2RGB
-
-        codec = nvDemux.Codec()
-        is_hevc = nvc.CudaVideoCodec.HEVC == codec
-
-        # YUV420 or YUV444 sampling formats are supported by Nvdec
-        self.is_hw_dec = is_yuv420 or is_yuv444
-
-        # But YUV444 HW decode is supported for HEVC only
-        if self.is_hw_dec and is_yuv444 and not is_hevc:
-            self.is_hw_dec = False
-
-        if self.is_hw_dec:
-            # Nvdec supports NV12 (resampled YUV420) and YUV444 formats
-            if self.cuda_ctx:
-                self.nvDec = nvc.PyNvDecoder(
-                    input=enc_file,
-                    context=self.cuda_ctx.handle,
-                    stream=cvcuda.Stream.current.handle,
-                )
-            else:
-                self.nvDec = nvc.PyNvDecoder(
-                    input=enc_file,
-                    gpu_id=self.device_id,
-                )
-        else:
-            raise ValueError(
-                "Current combination of hardware and the video file being read does not "
-                "hardware accelerated decoding."
-            )
-
-    # docs_tag: begin_imp_nvdecoder
-    def decode_hw(self, seek_ctx=None):
-        """
-        Decode single video frame with Nvdec, convert it to planar RGB.
-        """
-        # Decode with HW decoder
-        if seek_ctx is None:
-            dec_surface = self.nvDec.DecodeSingleSurface()
-        else:
-            dec_surface = self.nvDec.DecodeSingleSurface(seek_ctx)
-        if not dec_surface or dec_surface.Empty():
-            raise RuntimeError("Can not decode frame.")
-
-        return dec_surface
-
-    def decode_to_tensor(self, *args, **kwargs):
-        """
-        Decode single video frame, convert it to torch.cuda.FloatTensor.
-        Image will be planar RGB normalized to range [0.0; 1.0].
-        """
-        if self.is_hw_dec:
-            dec_surface = self.decode_hw(*args, **kwargs)
-        else:
-            raise ValueError(
-                "Current combination of hardware and the video file being read does not "
-                "hardware accelerated decoding."
-            )
-
-        if not dec_surface or dec_surface.Empty():
-            raise RuntimeError("Can not decode surface.")
-
-        surf_plane = dec_surface.PlanePtr()
-
-        img_tensor = pnvc.makefromDevicePtrUint8(
-            surf_plane.GpuMem(),
-            surf_plane.Width(),
-            surf_plane.Height(),
-            surf_plane.Pitch(),
-            surf_plane.ElemSize(),
-        )
-        if img_tensor is None:
-            raise RuntimeError("Can not export to tensor.")
-
-        return img_tensor
-
-    # docs_tag: end_imp_nvdecoder
-
-
-class nvencoder:
-    def __init__(
-        self,
-        device_id,
-        width,
-        height,
-        fps,
-        enc_file,
-        cuda_ctx,
-    ):
-        """
-        Create instance of HW-accelerated video encoder.
-        :param device_id: id of video card which will be used for encoding & processing.
-        :param width: encoded frame width.
-        :param height: encoded frame height.
-        :param fps: The FPS at which the encoding should happen.
-        :param enc_file: path to encoded video file.
-        :param cuda_ctx: A cuda context object
-        """
-        self.device_id = device_id
-        self.fps = round(Fraction(fps), 6)
-        self.enc_file = enc_file
-        self.cuda_ctx = cuda_ctx
-
-        opts = {
-            "preset": "P5",
-            "tuning_info": "high_quality",
-            "codec": "h264",
-            "fps": str(self.fps),
-            "s": str(width) + "x" + str(height),
-            "bitrate": "10M",
-        }
-
-        self.nvEnc = nvc.PyNvEncoder(
-            opts,
-            self.cuda_ctx.handle,
-            cvcuda.Stream.current.handle,
-        )
-        self.pts_time = 0
-        self.delta_t = 1  # Increment the packets' timestamp by this much.
-        self.encoded_frame = np.ndarray(shape=(0), dtype=np.uint8)
-        self.container = av.open(enc_file, "w")
-        self.avstream = self.container.add_stream("h264", rate=fps)
-        self.avstream.width = width
-        self.avstream.height = height
-        # 1/fps would be our scale.
-        self.avstream.time_base = 1 / Fraction(fps)
-        self.surface = None
-        self.surf_plane = None
-
-    def width(self):
-        """
-        Gets the actual video frame width from the encoder.
-        """
-        return self.nvEnc.Width()
-
-    def height(self):
-        """
-        Gets the actual video frame height from the encoder.
-        """
-        return self.nvEnc.Height()
-
-    # docs_tag: begin_imp_nvencoder
-    def tensor_to_surface(self, img_tensor):
-        """
-        Converts torch float tensor into a planar RGB surface.
-        """
-        if not self.surface:
-            if self.cuda_ctx:
-                self.surface = nvc.Surface.Make(
-                    format=nvc.PixelFormat.NV12,
-                    width=self.width(),
-                    height=self.height(),
-                    context=self.cuda_ctx.handle,
-                )
-            else:
-                self.surface = nvc.Surface.Make(
-                    format=nvc.PixelFormat.NV12,
-                    width=self.width(),
-                    height=self.height(),
-                    gpu_id=self.device_id,
-                )
-            self.surf_plane = self.surface.PlanePtr()
-
-        pnvc.TensorToDptr(
-            img_tensor,
-            self.surf_plane.GpuMem(),
-            self.surf_plane.Width(),
-            self.surf_plane.Height(),
-            self.surf_plane.Pitch(),
-            self.surf_plane.ElemSize(),
-        )
-
-        return self.surface
-
-    def encode_from_tensor(self, tensor):
-        """
-        Encode single video frame from torch.cuda.FloatTensor.
-        Tensor must have planar RGB format and be normalized to range [0.0; 1.0].
-        Shape of the tensor must be (3, height, width).
-        """
-        assert tensor.dim() == 3
-        assert tensor.device.index == self.device_id
-
-        dst_surface = self.tensor_to_surface(tensor)
-
-        if dst_surface.Empty():
-            raise RuntimeError("Can not convert to yuv444.")
-
-        success = self.nvEnc.EncodeSingleSurface(dst_surface, self.encoded_frame)
-
-        if success:
-            self.write_frame(
-                self.encoded_frame,
-                self.pts_time,
-                self.fps,
-                self.avstream,
-                self.container,
-            )
-            self.pts_time += self.delta_t
-
-    # docs_tag: end_imp_nvencoder
-
-    # docs_tag: begin_writeframe_nvencoder
-    def write_frame(self, encoded_frame, pts_time, fps, stream, container):
-        encoded_bytes = bytearray(encoded_frame)
-        pkt = av.packet.Packet(encoded_bytes)
-        pkt.pts = pts_time
-        pkt.dts = pts_time
-        pkt.stream = stream
-        pkt.time_base = 1 / Fraction(fps)
-        container.mux(pkt)
-
-    def flush(self):
-        packets = np.ndarray(shape=(0), dtype=np.uint8)
-
-        success = self.nvEnc.Flush(packets)
-        if success:
-            self.write_frame(
-                self.encoded_frame,
-                self.pts_time,
-                self.fps,
-                self.avstream,
-                self.container,
-            )
-            self.pts_time += self.delta_t
-
-    # docs_tag: end_writeframe_nvencoder
-
-
 """
 Streaming video version of the Video Batch Decoder using VPF.
 """
diff --git a/samples/cropandresize/CMakeLists.txt b/samples/cropandresize/CMakeLists.txt
index 938b6bb1e..3e09936f5 100644
--- a/samples/cropandresize/CMakeLists.txt
+++ b/samples/cropandresize/CMakeLists.txt
@@ -19,14 +19,14 @@ set(CMAKE_CXX_FLAGS "-Wno-deprecated-enum-enum-conversion")
 
 # tag: Build crop and resize sample
 
-add_executable(nvcv_samples_cropandresize Main.cpp)
-target_link_libraries(nvcv_samples_cropandresize nvcv_types cvcuda CUDA::cudart nvcv_samples_common)
+add_executable(cvcuda_sample_cropandresize Main.cpp)
+target_link_libraries(cvcuda_sample_cropandresize nvcv_types cvcuda CUDA::cudart cvcuda_samples_common)
 
-target_include_directories(nvcv_samples_cropandresize
+target_include_directories(cvcuda_sample_cropandresize
 	PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
 
 # tag: Install binaries
-install(TARGETS nvcv_samples_cropandresize
-        EXPORT nvcv_samples_cropandresize
+install(TARGETS cvcuda_sample_cropandresize
+        EXPORT cvcuda_sample_cropandresize
         COMPONENT samples
         DESTINATION samples/bin)
diff --git a/samples/label/python/main.py b/samples/label/python/main.py
index 575a2a2a4..aeff0f85a 100644
--- a/samples/label/python/main.py
+++ b/samples/label/python/main.py
@@ -38,7 +38,7 @@
     parse_validate_default_args,
 )
 
-from torch_utils import ImageBatchDecoderPyTorch, ImageBatchEncoderPyTorch  # noqa: E402
+from nvcodec_utils import ImageBatchDecoder, ImageBatchEncoder  # noqa: E402
 from interop_utils import to_cpu_numpy_buffer, to_cuda_buffer  # noqa: E402
 
 # docs_tag: end_python_imports
@@ -59,7 +59,7 @@ def save_batch(images, label, encoder, batch):
         batch : Batch object to save the images
 
     Returns:
-        nvcv Tensor: RGB color, random for each label
+        n/a
     """
     # Function to modify filenames in the batch
     def modify_filenames(suffix):
@@ -70,13 +70,22 @@ def modify_filenames(suffix):
             modified_filenames.append(modified_filename)
         return modified_filenames
 
-    # convert to NCHW
-    imagesNCHW = cvcuda.reformat(images, "NCHW")
+    # Check if the format is what we expect
+    if encoder.input_layout != "NHWC":
+        raise ValueError(
+            "Expected input layout to be 'NHWC', but found '{}'".format(
+                encoder.input_layout
+            )
+        )
+
+    # Convert to RGB if the input is grayscale the encoder expects RGB
+    if images.shape[3] == 1:
+        images = cvcuda.cvtcolor(images, cvcuda.ColorConversion.GRAY2RGB)
 
     # Modify filenames with "_labels" suffix
     oldFileNames = batch.fileinfo
     batch.fileinfo = modify_filenames(label)
-    batch.data = torch.as_tensor(imagesNCHW.cuda())
+    batch.data = torch.as_tensor(images.cuda())
     encoder(batch)
     batch.fileinfo = oldFileNames
 
@@ -158,14 +167,12 @@ def run_sample(
     # Now define the object that will handle pre-processing
     if os.path.splitext(input_path)[1] == ".jpg" or os.path.isdir(input_path):
         # Treat this as data modality of images
-        decoder = ImageBatchDecoderPyTorch(
+        decoder = ImageBatchDecoder(
             input_path, batch_size, device_id, cuda_ctx, cvcuda_perf
         )
-        encoder = ImageBatchEncoderPyTorch(
+        encoder = ImageBatchEncoder(
             output_dir,
-            fps=0,
             device_id=device_id,
-            cuda_ctx=cuda_ctx,
             cvcuda_perf=cvcuda_perf,
         )
     else:
@@ -204,7 +211,9 @@ def run_sample(
             # 1) CVCUDA tensor --> Nothing needs to be done.
             # 2) Numpy Array --> Convert to torch tensor first and then CVCUDA tensor
             # 3) Torch Tensor --> Convert to CVCUDA tensor
-            if isinstance(batch.data, torch.Tensor):
+            if isinstance(batch.data, cvcuda.Tensor):
+                cvcudaTensorNHWC = batch.data
+            elif isinstance(batch.data, torch.Tensor):
                 cvcudaTensorNHWC = cvcuda.as_tensor(batch.data, "NHWC")
             elif isinstance(batch.data, np.ndarray):
                 cvcudaTensorNHWC = cvcuda.as_tensor(
@@ -213,11 +222,12 @@ def run_sample(
                     ),
                     "NHWC",
                 )
+            else:
+                raise ValueError("Unknown input type: %s" % type(batch.data))
             # docs_tag: end_tensor_conversion
 
             # Convert to grayscale
             out = cvcuda.cvtcolor(cvcudaTensorNHWC, cvcuda.ColorConversion.RGB2GRAY)
-
             save_batch(out, "grayscale", encoder, batch)
 
             # Histogram eq the image
diff --git a/samples/object_detection/python/main.py b/samples/object_detection/python/main.py
index 9f37d97c4..935e121a1 100644
--- a/samples/object_detection/python/main.py
+++ b/samples/object_detection/python/main.py
@@ -38,11 +38,11 @@
     parse_validate_default_args,
 )
 
-from torch_utils import ImageBatchDecoderPyTorch, ImageBatchEncoderPyTorch  # noqa: E402
-
-from vpf_utils import (  # noqa: E402
-    VideoBatchDecoderVPF,
-    VideoBatchEncoderVPF,
+from nvcodec_utils import (  # noqa: E402
+    VideoBatchDecoder,
+    VideoBatchEncoder,
+    ImageBatchDecoder,
+    ImageBatchEncoder,
 )
 
 from pipelines import (  # noqa: E402
@@ -95,24 +95,22 @@ def run_sample(
 
     if os.path.splitext(input_path)[1] == ".jpg" or os.path.isdir(input_path):
         # Treat this as data modality of images
-        decoder = ImageBatchDecoderPyTorch(
+        decoder = ImageBatchDecoder(
             input_path, batch_size, device_id, cuda_ctx, cvcuda_perf
         )
 
-        encoder = ImageBatchEncoderPyTorch(
+        encoder = ImageBatchEncoder(
             output_dir,
-            fps=0,
             device_id=device_id,
-            cuda_ctx=cuda_ctx,
             cvcuda_perf=cvcuda_perf,
         )
     else:
         # Treat this as data modality of videos
-        decoder = VideoBatchDecoderVPF(
+        decoder = VideoBatchDecoder(
             input_path, batch_size, device_id, cuda_ctx, cvcuda_perf
         )
 
-        encoder = VideoBatchEncoderVPF(
+        encoder = VideoBatchEncoder(
             output_dir, decoder.fps, device_id, cuda_ctx, cvcuda_perf
         )
 
diff --git a/samples/scripts/README.md b/samples/scripts/README.md
index c2b279b4a..3adf38ef5 100644
--- a/samples/scripts/README.md
+++ b/samples/scripts/README.md
@@ -1,3 +1,18 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
 # Performance Benchmarking
 
 CV-CUDA samples ships with the following scripts that can help track and report the performance of the Python samples.
@@ -31,7 +46,7 @@ This file holds the data structures and functions most commonly used during the
 
 With these tools, the benchmarking flow involves the following two steps:
 
-1. Annotating the code of the sample using classes and functions from the `perf_utils.py` so that it can be profiled.
+1. Annotating the code of the sample using classes and functions from the `perf_utils.py` so that it can be profiled. This is already done for you in the CV-CUDA Python samples. Here is how you can do it for any Python code:
     1. Import the necessary classes and functions first
         ```python
         from perf_utils import CvCudaPerf, get_default_arg_parser, parse_validate_default_args
@@ -75,7 +90,7 @@ With these tools, the benchmarking flow involves the following two steps:
        # Once everything is done, we must call the finalize().
        cvcuda_perf.finalize()
        ```
-2. Use the sample with the `benchmark.py` to launch the benchmarking. `benchmark.py` can launch any script that uses `perf_utils`'s functionality and benchmark it using NSYS. It can also launch it in a multi-CPU multi-GPU fashion to compute the throughput.
+2. Launch the sample with the `benchmark.py` to do the benchmarking. `benchmark.py` can launch any script that uses `perf_utils`'s functionality and benchmark it using NSYS. It can also launch it in a multi-CPU multi-GPU fashion to compute the throughput.
 
     1. To benchmark the object detection sample, for example, we can use the following command:
 
diff --git a/samples/scripts/benchmark.py b/samples/scripts/benchmark.py
index 3bcb62ebc..fe252d263 100644
--- a/samples/scripts/benchmark.py
+++ b/samples/scripts/benchmark.py
@@ -127,8 +127,8 @@ def parse_nvtx_pushpop_trace_json(json_path):
         thread_id = row["TID"]
 
         # Process a bit. Conversion from nano to milliseconds.
-        start_ms = round(start_ns / 10**6, 3)
-        end_ms = round(end_ns / 10**6, 3)
+        start_ms = round(start_ns / 10**6, 4)
+        end_ms = round(end_ns / 10**6, 4)
         parent_range_id = None if parent_range_id == "None" else parent_range_id
 
         # Save it in our dictionary at the process id and thread id level.
@@ -212,11 +212,11 @@ def parse_nvtx_gpu_proj_trace_json(json_path):
         thread_id = row["TID"]
 
         # Process a bit. Conversion from nano to milliseconds.
-        cpu_start_ms = round(cpu_start_ns / 10**6, 3)
-        cpu_end_ms = round(cpu_end_ns / 10**6, 3)
+        cpu_start_ms = round(cpu_start_ns / 10**6, 4)
+        cpu_end_ms = round(cpu_end_ns / 10**6, 4)
 
-        gpu_start_ms = round(gpu_start_ns / 10**6, 3)
-        gpu_end_ms = round(gpu_end_ns / 10**6, 3)
+        gpu_start_ms = round(gpu_start_ns / 10**6, 4)
+        gpu_end_ms = round(gpu_end_ns / 10**6, 4)
 
         # Save it in our dictionary at the process id and thread id level.
         if process_id not in range_info:
@@ -385,8 +385,8 @@ def calc_mean_ranges(all_range_info):
             cpu_ranges_list = mean_range_info[range_name][0]
             gpu_ranges_list = mean_range_info[range_name][1]
 
-            avg_cpu_time = round(sum(cpu_ranges_list) / len(cpu_ranges_list), 3)
-            avg_gpu_time = round(sum(gpu_ranges_list) / len(gpu_ranges_list), 3)
+            avg_cpu_time = round(sum(cpu_ranges_list) / len(cpu_ranges_list), 4)
+            avg_gpu_time = round(sum(gpu_ranges_list) / len(gpu_ranges_list), 4)
 
             mean_range_info[range_name] = (avg_cpu_time, avg_gpu_time)
         else:
@@ -481,12 +481,12 @@ def recurse_divide_dict(input_dict, divide_by=None):
 
                 for i in range(len(input_dict[key].value)):
                     input_dict[key].value[i] /= divide_by
-                    input_dict[key].value[i] = round(input_dict[key].value[i], 3)
+                    input_dict[key].value[i] = round(input_dict[key].value[i], 4)
             else:
                 divide_by = divide_by if divide_by else input_dict[key].len
 
                 input_dict[key].value /= divide_by
-                input_dict[key].value = round(input_dict[key].value, 3)
+                input_dict[key].value = round(input_dict[key].value, 4)
 
             # Remove the MeanDictInfo object and store the value directly.
             input_dict[key] = input_dict[key].value
@@ -500,11 +500,11 @@ def recurse_divide_dict(input_dict, divide_by=None):
 
             for i in range(len(input_dict[key].value)):
                 input_dict[key][i] /= divide_by
-                input_dict[key][i] = round(input_dict[key][i], 3)
+                input_dict[key][i] = round(input_dict[key][i], 4)
 
         else:
             input_dict[key] /= divide_by
-            input_dict[key] = round(input_dict[key], 3)
+            input_dict[key] = round(input_dict[key], 4)
 
 
 def unflatten_process_benchmark_dict(benchmark_dict, warmup_batches):
@@ -631,11 +631,11 @@ def unflatten_process_benchmark_dict(benchmark_dict, warmup_batches):
             # Computer per item.
             if batch_size > 0:
                 current_dict[parts[-1]]["cpu_time_per_item"] = round(
-                    current_dict[parts[-1]]["cpu_time"] / batch_size, 3
+                    current_dict[parts[-1]]["cpu_time"] / batch_size, 4
                 )
 
                 current_dict[parts[-1]]["gpu_time_per_item"] = round(
-                    current_dict[parts[-1]]["gpu_time"] / batch_size, 3
+                    current_dict[parts[-1]]["gpu_time"] / batch_size, 4
                 )
 
             # Maintain global counts of various batch level stats
@@ -689,11 +689,11 @@ def unflatten_process_benchmark_dict(benchmark_dict, warmup_batches):
                 if total_items[path] > 0:
                     current_dict[parts[-1]]["cpu_time_per_item"] = round(
                         current_dict[parts[-1]]["cpu_time"] / total_items[path],
-                        3,
+                        4,
                     )
                     current_dict[parts[-1]]["gpu_time_per_item"] = round(
                         current_dict[parts[-1]]["gpu_time"] / total_items[path],
-                        3,
+                        4,
                     )
                 current_dict[parts[-1]]["total_items"] = total_items[path]
 
@@ -709,11 +709,11 @@ def unflatten_process_benchmark_dict(benchmark_dict, warmup_batches):
                 if total_items_above_level > 0:
                     current_dict[parts[-1]]["cpu_time_per_item"] = round(
                         current_dict[parts[-1]]["cpu_time"] / total_items_above_level,
-                        3,
+                        4,
                     )
                     current_dict[parts[-1]]["gpu_time_per_item"] = round(
                         current_dict[parts[-1]]["gpu_time"] / total_items_above_level,
-                        3,
+                        4,
                     )
                 current_dict[parts[-1]]["total_items"] = total_items_above_level
 
@@ -726,10 +726,10 @@ def unflatten_process_benchmark_dict(benchmark_dict, warmup_batches):
         batch_dict = batch_dicts[batch_level_prefix]
 
         batch_dict["cpu_time_minus_warmup"] = round(
-            (batch_dict["cpu_time"] - total_warmup_cpu_time[batch_level_prefix]), 3
+            (batch_dict["cpu_time"] - total_warmup_cpu_time[batch_level_prefix]), 4
         )
         batch_dict["gpu_time_minus_warmup"] = round(
-            (batch_dict["gpu_time"] - total_warmup_gpu_time[batch_level_prefix]), 3
+            (batch_dict["gpu_time"] - total_warmup_gpu_time[batch_level_prefix]), 4
         )
 
         batch_dict["cpu_time_minus_warmup_per_item"] = 0
@@ -739,12 +739,12 @@ def unflatten_process_benchmark_dict(benchmark_dict, warmup_batches):
             batch_dict["cpu_time_minus_warmup_per_item"] = round(
                 batch_dict["cpu_time_minus_warmup"]
                 / total_items_minus_warmup[batch_level_prefix],
-                3,
+                4,
             )
             batch_dict["gpu_time_minus_warmup_per_item"] = round(
                 batch_dict["gpu_time_minus_warmup"]
                 / total_items_minus_warmup[batch_level_prefix],
-                3,
+                4,
             )
 
         batch_dict["total_items_minus_warmup"] = total_items_minus_warmup[
@@ -1117,7 +1117,7 @@ def main():
                     proc_args,
                 ),
             )
-            logger.info("Launched process: %d on gpu: %d" % (process_idx, gpu_idx))
+            logger.info("Launched process: %d. gpu-idx: %d" % (process_idx, gpu_idx))
             results.append(result)
 
     # Close the pool and wait everything to finish.
diff --git a/samples/scripts/benchmark_samples.sh b/samples/scripts/benchmark_samples.sh
index 07851442a..7b97c3f78 100755
--- a/samples/scripts/benchmark_samples.sh
+++ b/samples/scripts/benchmark_samples.sh
@@ -15,7 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Usage: benchmark_samples.sh
+# Usage: benchmark_samples.sh <OUTPUT_DIR>
 
 # Performs benchmarking of all Python samples.
 # Since some samples may involve creation of a TensorRT model on the first run and since it takes
@@ -23,62 +23,106 @@
 # Only the results of the second run will be used. The model artifacts from the first run will
 # help us run the second run easily.
 
-mkdir -p /tmp/benchmarking/classification
-mkdir -p /tmp/benchmarking/segmentation
-mkdir -p /tmp/benchmarking/detection
+
+set -e  # Stops this script if any one command fails.
+
+if [ "$#" -lt 1 ]; then
+    echo "Usage: benchmark_samples.sh <OUTPUT_DIR> {USE_TENSORRT: True}"
+    exit 1
+fi
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+SAMPLES_ROOT="$(dirname "$SCRIPT_DIR")"  # removes the scripts dir
+OUTPUT_DIR="$1"
+USE_TRT=${2:-True}
+CLASSIFICATION_OUT_DIR="$OUTPUT_DIR/classification"
+SEGMENTATION_OUT_DIR="$OUTPUT_DIR/segmentation"
+DETECTION_OUT_DIR="$OUTPUT_DIR/detection"
+
+mkdir -p "$CLASSIFICATION_OUT_DIR"
+mkdir -p "$SEGMENTATION_OUT_DIR"
+mkdir -p "$DETECTION_OUT_DIR"
+
+echo "OUTPUT_DIR: $OUTPUT_DIR"
+echo "CLASSIFICATION_OUT_DIR: $CLASSIFICATION_OUT_DIR"
+echo "SEGMENTATION_OUT_DIR: $SEGMENTATION_OUT_DIR"
+echo "DETECTION_OUT_DIR: $DETECTION_OUT_DIR"
+if [ "$USE_TRT" = "True" ]; then
+    echo "Using TensorRT as the inference back-end in all the runs."
+    CLASSIFICATION_BACKEND="tensorrt"
+    SEGMENTATION_BACKEND="tensorrt"
+    DETECTION_BACKEND="tensorrt"
+else
+    echo "Not using TensorRT as the inference back-end in all the runs."
+    CLASSIFICATION_BACKEND="pytorch"
+    SEGMENTATION_BACKEND="pytorch"
+    DETECTION_BACKEND="tensorflow"
+fi
 
 # 1. The Classification sample
 # First dry run with 2 processes and 1 batch from start and end used as a warm-up batch.
-python ./scripts/benchmark.py \
+echo "Running the classification sample (warm-up run)..."
+python3 "$SCRIPT_DIR/benchmark.py" \
     -np 2 \
     -w 1 \
-    -o /tmp/benchmarking/classification \
-    ./classification/python/main.py \
+    -o "$CLASSIFICATION_OUT_DIR" \
+    "$SAMPLES_ROOT/classification/python/main.py" \
     -b 4 \
-    -i ./assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4
+    -bk $CLASSIFICATION_BACKEND \
+    -i "$SAMPLES_ROOT/assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4"
 # Second run - the actual run.
-python ./scripts/benchmark.py \
+echo "Running the classification sample (actual run)..."
+python3 "$SCRIPT_DIR/benchmark.py" \
     -np 2 \
     -w 1 \
-    -o /tmp/benchmarking/classification \
-    ./classification/python/main.py \
+    -o "$CLASSIFICATION_OUT_DIR" \
+    "$SAMPLES_ROOT/classification/python/main.py" \
     -b 4 \
-    -i ./assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4
+    -bk $CLASSIFICATION_BACKEND \
+    -i "$SAMPLES_ROOT/assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4"
 
 # 2. The Segmentation sample
 # First dry run with 2 processes and 1 batch from start and end used as a warm-up batch.
-python ./scripts/benchmark.py \
+echo "Running the segmentation sample (warm-up run)..."
+python3 "$SCRIPT_DIR/benchmark.py" \
     -np 2 \
     -w 1 \
-    -o /tmp/benchmarking/segmentation \
-    ./segmentation/python/main.py \
+    -o "$SEGMENTATION_OUT_DIR" \
+    "$SAMPLES_ROOT/segmentation/python/main.py" \
     -b 4 \
-    -i ./assets/videos/pexels-ilimdar-avgezer-7081456.mp4
+    -bk $SEGMENTATION_BACKEND \
+    -i "$SAMPLES_ROOT/assets/videos/pexels-ilimdar-avgezer-7081456.mp4"
 # Second run - the actual run.
-python ./scripts/benchmark.py \
+echo "Running the segmentation sample (actual run)..."
+python3 "$SCRIPT_DIR/benchmark.py" \
     -np 2 \
     -w 1 \
-    -o /tmp/benchmarking/segmentation \
-    ./segmentation/python/main.py \
+    -o "$SEGMENTATION_OUT_DIR" \
+    "$SAMPLES_ROOT/segmentation/python/main.py" \
     -b 4 \
-    -i ./assets/videos/pexels-ilimdar-avgezer-7081456.mp4
+    -bk $SEGMENTATION_BACKEND \
+    -i "$SAMPLES_ROOT/assets/videos/pexels-ilimdar-avgezer-7081456.mp4"
 
 # 3. The Object Detection sample
 # First dry run with 2 processes and 1 batch from start and end used as a warm-up batch.
-python ./scripts/benchmark.py \
-    -np 2 \
+echo "Running the detection sample (warm-up run)..."
+python3 "$SCRIPT_DIR/benchmark.py" \
+    -np 1 \
     -w 1 \
-    -o /tmp/benchmarking/detection \
-    ./object_detection/python/main.py \
+    -o "$DETECTION_OUT_DIR" \
+    "$SAMPLES_ROOT/object_detection/python/main.py" \
     -b 4 \
-    -i ./assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4
+    -bk $DETECTION_BACKEND \
+    -i "$SAMPLES_ROOT/assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4"
 # Second run - the actual run.
-python ./scripts/benchmark.py \
-    -np 2 \
+echo "Running the detection sample (actual run)..."
+python3 "$SCRIPT_DIR/benchmark.py" \
+    -np 1 \
     -w 1 \
-    -o /tmp/benchmarking/detection \
-    ./object_detection/python/main.py \
+    -o "$DETECTION_OUT_DIR" \
+    "$SAMPLES_ROOT/object_detection/python/main.py" \
     -b 4 \
-    -i ./assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4
+    -bk $DETECTION_BACKEND \
+    -i "$SAMPLES_ROOT/assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4"
 
 # Done.
diff --git a/samples/scripts/install_dependencies.sh b/samples/scripts/install_dependencies.sh
index bb3a4f24d..cd2e6fb72 100755
--- a/samples/scripts/install_dependencies.sh
+++ b/samples/scripts/install_dependencies.sh
@@ -18,6 +18,33 @@
 # This script installs all the dependencies required to run the CVCUDA samples.
 # It uses the /tmp folder to download temporary data and libraries.
 
+# SCRIPT_DIR is the directory where this script is located.
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+
+# Check CUDA version. Begin by checking if nvcc command exists.
+if command -v nvcc >/dev/null 2>&1; then
+    # Get CUDA version from nvcc output
+    CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $5}')
+
+    # Extract major version number
+    CUDA_MAJOR_VERSION=$(echo "$CUDA_VERSION" | cut -d. -f1)
+
+    # Check major version to determine CUDA version
+    if [ "$CUDA_MAJOR_VERSION" -eq 11 ]; then
+        echo "CUDA 11 is installed."
+    elif [ "$CUDA_MAJOR_VERSION" -eq 12 ]; then
+        echo "CUDA 12 is installed."
+    else
+        echo "Unknown/Unsupported CUDA version."
+        exit 1
+    fi
+else
+    echo "CUDA is not installed."
+    exit 1
+fi
+
+set -e  # Exit script if any command fails
+
 # Install basic packages first.
 cd /tmp
 apt-get update && apt-get install -y --no-install-recommends \
@@ -41,7 +68,7 @@ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11
 update-alternatives --set gcc /usr/bin/gcc-11
 update-alternatives --set g++ /usr/bin/g++-11
 
-# Install python and gtest
+# Install Python and gtest
 apt-get update && apt-get install -y --no-install-recommends \
     libgtest-dev \
     libgmock-dev \
@@ -50,16 +77,7 @@ apt-get update && apt-get install -y --no-install-recommends \
     mlocate && updatedb \
     && rm -rf /var/lib/apt/lists/*
 
-# Install pip and all the python packages.
-pip3 install --upgrade pip
-pip3 install torch==1.13.0 torchvision==0.14.0 av==10.0.0 pycuda==2022.1 nvtx==0.2.5 tensorflow==2.11.1
-cd /tmp
-[ ! -d 'torchnvjpeg' ] && git clone https://github.com/itsliupeng/torchnvjpeg.git
-cd torchnvjpeg && python3 setup.py bdist_wheel && cd dist && pip3 install torchnvjpeg-0.1.0-*-linux_x86_64.whl
-echo "export PATH=$PATH:/opt/tensorrt/bin" >> ~/.bashrc
-
-# Install VPF and its dependencies.
-# 1. ffmpeg and other libraries needed for VPF.
+# Install ffmpeg and other libraries needed for VPF.
 # Note: We are not installing either libnv-encode or decode libraries here.
 apt-get update && apt-get install -y --no-install-recommends \
     ffmpeg \
@@ -69,10 +87,11 @@ apt-get update && apt-get install -y --no-install-recommends \
     libswresample-dev \
     libavutil-dev\
     && rm -rf /var/lib/apt/lists/*
+
+# Install libssl 1.1.1
 cd /tmp
-[ ! -d 'VideoProcessingFramework' ] && git clone https://github.com/NVIDIA/VideoProcessingFramework.git
-pip3 install /tmp/VideoProcessingFramework
-pip3 install /tmp/VideoProcessingFramework/src/PytorchNvCodec
+wget http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.0g-2ubuntu4_amd64.deb
+dpkg -i libssl1.1_1.1.0g-2ubuntu4_amd64.deb
 
 # Install tao-converter which parses the .etlt model file, and generates an optimized TensorRT engine
 wget 'https://api.ngc.nvidia.com/v2/resources/nvidia/tao/tao-converter/versions/v4.0.0_trt8.5.1.7_x86/files/tao-converter' --directory-prefix=/usr/local/bin
@@ -91,4 +110,30 @@ apt-get update && apt-get install -y \
     /tmp/nsight-systems-2023.2.1_2023.2.1.122-1_amd64.deb \
     && rm -rf /var/lib/apt/lists/*
 
+echo "export PATH=$PATH:/opt/tensorrt/bin" >> ~/.bashrc
+
+# Upgrade pip and install all required Python packages.
+pip3 install --upgrade pip
+pip3 install -r "$SCRIPT_DIR/requirements.txt"
+
+# Install VPF
+cd /tmp
+[ ! -d 'VideoProcessingFramework' ] && git clone https://github.com/NVIDIA/VideoProcessingFramework.git
+# HotFix: Must change the PyTorch version used by PytorchNvCodec to match the one we are using.
+# Since we are using 2.2.0 we must use that.
+sed -i 's/torch/torch==2.2.0/g' /tmp/VideoProcessingFramework/src/PytorchNvCodec/pyproject.toml
+sed -i 's/"torch"/"torch==2.2.0"/g' /tmp/VideoProcessingFramework/src/PytorchNvCodec/setup.py
+pip3 install /tmp/VideoProcessingFramework
+pip3 install /tmp/VideoProcessingFramework/src/PytorchNvCodec
+
+# Install NvImageCodec
+pip3 install nvidia-nvimgcodec-cu${CUDA_MAJOR_VERSION}
+pip3 install nvidia-pyindex
+pip3 install nvidia-nvjpeg-cu${CUDA_MAJOR_VERSION}
+
+# Install NvPyVideoCodec
+cd /tmp
+wget --content-disposition https://api.ngc.nvidia.com/v2/resources/nvidia/py_nvvideocodec/versions/0.0.9/zip -O py_nvvideocodec_0.0.9.zip
+pip3 install py_nvvideocodec_0.0.9.zip
+
 # Done
diff --git a/samples/scripts/requirements.txt b/samples/scripts/requirements.txt
new file mode 100644
index 000000000..f5a6af782
--- /dev/null
+++ b/samples/scripts/requirements.txt
@@ -0,0 +1,9 @@
+torch==2.2.0
+torchvision==0.17.0
+onnx==1.15.0
+av==11.0.0
+pycuda==2024.1
+nvtx==0.2.8
+tensorflow==2.15.0.post1
+pandas==2.0.3
+matplotlib==3.7.4
diff --git a/samples/scripts/run_samples.sh b/samples/scripts/run_samples.sh
index 2679e5468..dea98a584 100755
--- a/samples/scripts/run_samples.sh
+++ b/samples/scripts/run_samples.sh
@@ -20,69 +20,90 @@
 # NOTE: This script may take a long time to finish since some samples may need to create
 #	    TensorRT models as they run for the first time.
 
+set -e
+
+export CUDA_MODULE_LOADING="LAZY"
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+SAMPLES_DIR="$(dirname "$SCRIPT_DIR")"
+CLASSIFICATION_OUT_DIR=/tmp/classification
+SEGMENTATION_OUT_DIR="/tmp/segmentation"
+DETECTION_OUT_DIR="/tmp/object_detection"
+DISTANCE_LABEL_OUT_DIR="/tmp/distance_label"
+
+echo "SAMPLES_DIR: $SAMPLES_DIR"
+echo "CLASSIFICATION_OUT_DIR: $CLASSIFICATION_OUT_DIR"
+echo "SEGMENTATION_OUT_DIR: $SEGMENTATION_OUT_DIR"
+echo "DETECTION_OUT_DIR: $DETECTION_OUT_DIR"
+echo "DISTANCE_LABEL_OUT_DIR: $DISTANCE_LABEL_OUT_DIR"
+
 # Crop and Resize Sample
 # Batch size 2
-LD_LIBRARY_PATH=./lib ./bin/nvcv_samples_cropandresize -i ./assets/images/ -b 2
-export CUDA_MODULE_LOADING="LAZY"
+LD_LIBRARY_PATH=$SAMPLES_DIR/lib $SAMPLES_DIR/build/cropandresize/cvcuda_sample_cropandresize -i $SAMPLES_DIR/assets/images/ -b 2
 
-# Run the classification Python sample first. This will save the necessary TensorRT model
+# Run the classification Python sample. This will save the necessary TensorRT model
 # and labels in the output directory. The C++ sample can then use those directly.
 # Run the segmentation Python sample with default settings, without any command-line args.
-find /tmp/ -maxdepth 1 -type f -delete
-python3 ./classification/python/main.py
+rm -rf "$CLASSIFICATION_OUT_DIR"
+mkdir "$CLASSIFICATION_OUT_DIR"
+python3 $SAMPLES_DIR/classification/python/main.py -o "$CLASSIFICATION_OUT_DIR"
 # Run it on a specific image with batch size 1 with PyTorch backend.
-python3 ./classification/python/main.py -i ./assets/images/tabby_tiger_cat.jpg -b 1 -bk pytorch
+python3 $SAMPLES_DIR/classification/python/main.py -i $SAMPLES_DIR/assets/images/tabby_tiger_cat.jpg -b 1 -bk pytorch -o "$CLASSIFICATION_OUT_DIR"
 # # Run it on a specific image with batch size 4 with PyTorch backend. Uses Same image multiple times
-python3 ./classification/python/main.py -i ./assets/images/tabby_tiger_cat.jpg -b 4 -bk pytorch
+python3 $SAMPLES_DIR/classification/python/main.py -i $SAMPLES_DIR/assets/images/tabby_tiger_cat.jpg -b 4 -bk pytorch -o "$CLASSIFICATION_OUT_DIR"
 # Run it on a folder worth of images with batch size 2 with PyTorch backend.
-python3 ./classification/python/main.py -i ./assets/images/ -b 2 -bk pytorch
+python3 $SAMPLES_DIR/classification/python/main.py -i $SAMPLES_DIR/assets/images/ -b 2 -bk pytorch -o "$CLASSIFICATION_OUT_DIR"
 # Run it on a specific image with batch size 1 with TensorRT backend with saving the output in a specific directory.
-mkdir /tmp/classification
-python3 ./classification/python/main.py -i ./assets/images/tabby_tiger_cat.jpg -b 1 -bk tensorrt -o /tmp/classification
+
+python3 $SAMPLES_DIR/classification/python/main.py -i $SAMPLES_DIR/assets/images/tabby_tiger_cat.jpg -b 1 -bk tensorrt -o "$CLASSIFICATION_OUT_DIR"
 # Run it on a specific image with batch size 1 with TensorRT backend with saving the output in a specific directory.
-python3 ./classification/python/main.py -i ./assets/images/tabby_tiger_cat.jpg -b 2 -bk tensorrt -o /tmp/classification
+python3 $SAMPLES_DIR/classification/python/main.py -i $SAMPLES_DIR/assets/images/tabby_tiger_cat.jpg -b 2 -bk tensorrt -o "$CLASSIFICATION_OUT_DIR"
 # Run it on a video with batch size 1 with TensorRT backend with saving the output in a specific directory.
-python3 ./classification/python/main.py -i ./assets/videos/pexels-ilimdar-avgezer-7081456.mp4 -b 1 -bk tensorrt -o /tmp/classification
+python3 $SAMPLES_DIR/classification/python/main.py -i $SAMPLES_DIR/assets/videos/pexels-ilimdar-avgezer-7081456.mp4 -b 1 -bk tensorrt -o "$CLASSIFICATION_OUT_DIR"
 # Run the classification C++ sample. Since the Python sample was already run, we can reuse the TensorRT model
 # and the labels file generated by it.
 # Batch size 1
-LD_LIBRARY_PATH=./lib ./bin/nvcv_samples_classification -e /tmp/classification/model.1.224.224.trtmodel -i ./assets/images/tabby_tiger_cat.jpg -l /tmp/classification/labels.txt -b 1
+LD_LIBRARY_PATH=$SAMPLES_DIR/lib $SAMPLES_DIR/build/classification/cvcuda_sample_classification -e /tmp/classification/model.1.224.224.trtmodel -i $SAMPLES_DIR/assets/images/tabby_tiger_cat.jpg -l /tmp/classification/labels.txt -b 1
 # Batch size 2
-LD_LIBRARY_PATH=./lib ./bin/nvcv_samples_classification -e /tmp/classification/model.2.224.224.trtmodel -i ./assets/images/tabby_tiger_cat.jpg -l /tmp/classification/labels.txt -b 2
+LD_LIBRARY_PATH=$SAMPLES_DIR/lib $SAMPLES_DIR/build/classification/cvcuda_sample_classification -e /tmp/classification/model.2.224.224.trtmodel -i $SAMPLES_DIR/assets/images/tabby_tiger_cat.jpg -l /tmp/classification/labels.txt -b 2
 
 
 # Run the segmentation Python sample with default settings, without any command-line args.
-find /tmp/ -maxdepth 1 -type f -delete
-python3 ./segmentation/python/main.py
+rm -rf "$SEGMENTATION_OUT_DIR"
+mkdir "$SEGMENTATION_OUT_DIR"
+python3 $SAMPLES_DIR/segmentation/python/main.py -o "$SEGMENTATION_OUT_DIR"
 # Run the segmentation sample with default settings for PyTorch backend.
-python3 ./segmentation/python/main.py -bk pytorch
+python3 $SAMPLES_DIR/segmentation/python/main.py -bk pytorch -o "$SEGMENTATION_OUT_DIR"
 # Run it on a single image with high batch size for the background class writing to a specific directory with PyTorch backend
-python3 ./segmentation/python/main.py -i ./assets/images/tabby_tiger_cat.jpg -o /tmp -b 5 -c __background__ -bk pytorch
+python3 $SAMPLES_DIR/segmentation/python/main.py -i $SAMPLES_DIR/assets/images/tabby_tiger_cat.jpg -o "$SEGMENTATION_OUT_DIR" -b 5 -c __background__ -bk pytorch
 # Run it on a folder worth of images with the default tensorrt backend
-python3 ./segmentation/python/main.py -i ./assets/images/ -o /tmp -b 4 -c __background__
+python3 $SAMPLES_DIR/segmentation/python/main.py -i $SAMPLES_DIR/assets/images/ -o "$SEGMENTATION_OUT_DIR" -b 4 -c __background__
 # Run it on a folder worth of images with PyTorch
-python3 ./segmentation/python/main.py -i ./assets/images/ -o /tmp -b 5 -c __background__ -bk pytorch
+python3 $SAMPLES_DIR/segmentation/python/main.py -i $SAMPLES_DIR/assets/images/ -o "$SEGMENTATION_OUT_DIR" -b 5 -c __background__ -bk pytorch
 # Run on a single image with custom resized input given to the sample for the dog class
-python3 ./segmentation/python/main.py -i ./assets/images/Weimaraner.jpg -o /tmp -b 1 -c dog -th 512 -tw 512
+python3 $SAMPLES_DIR/segmentation/python/main.py -i $SAMPLES_DIR/assets/images/Weimaraner.jpg -o "$SEGMENTATION_OUT_DIR" -b 1 -c dog -th 512 -tw 512
 # Run it on a video for class background.
-python ./segmentation/python/main.py -i ./assets/videos/pexels-ilimdar-avgezer-7081456.mp4 -b 4 -c __background__
+python3 $SAMPLES_DIR/segmentation/python/main.py -i $SAMPLES_DIR/assets/videos/pexels-ilimdar-avgezer-7081456.mp4 -b 4 -c __background__ -o "$SEGMENTATION_OUT_DIR"
 # Run it on a video for class background with the PyTorch backend.
-python ./segmentation/python/main.py -i ./assets/videos/pexels-ilimdar-avgezer-7081456.mp4 -b 4 -c __background__ -bk pytorch
+python3 $SAMPLES_DIR/segmentation/python/main.py -i $SAMPLES_DIR/assets/videos/pexels-ilimdar-avgezer-7081456.mp4 -b 4 -c __background__ -bk pytorch -o "$SEGMENTATION_OUT_DIR"
+
 
 # Run the object detection Python sample with default settings, without any command-line args.
-find /tmp/ -maxdepth 1 -type f -delete
-python3 ./object_detection/python/main.py
+rm -rf "$DETECTION_OUT_DIR"
+mkdir "$DETECTION_OUT_DIR"
+python3 $SAMPLES_DIR/object_detection/python/main.py -o "$DETECTION_OUT_DIR"
 # Run it with batch size 1 on a single image
-python3 ./object_detection/python/main.py -i ./assets/images/peoplenet.jpg  -b 1
+python3 $SAMPLES_DIR/object_detection/python/main.py -i $SAMPLES_DIR/assets/images/peoplenet.jpg  -b 1 -o "$DETECTION_OUT_DIR"
 # Run it with batch size 4 on a video
-python3 ./object_detection/python/main.py -i ./assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4 -b 4
+python3 $SAMPLES_DIR/object_detection/python/main.py -i $SAMPLES_DIR/assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4 -b 4 -o "$DETECTION_OUT_DIR"
 # Run it with batch size 2 on a folder of images
-python3 ./object_detection/python/main.py -i ./assets/images/ -b 3
+python3 $SAMPLES_DIR/object_detection/python/main.py -i $SAMPLES_DIR/assets/images/ -b 3 -o "$DETECTION_OUT_DIR"
 # RUn it with the TensorFlow backend
-python3 ./object_detection/python/main.py -i ./assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4 -b 4 -bk tensorflow
+python3 $SAMPLES_DIR/object_detection/python/main.py -i $SAMPLES_DIR/assets/videos/pexels-chiel-slotman-4423925-1920x1080-25fps.mp4 -b 4 -bk tensorflow -o "$DETECTION_OUT_DIR"
+
 
-# Run the label Python sample with default settings, without any command-line args.
-find /tmp/ -maxdepth 1 -type f -delete
-python3 ./label/python/main.py
+# Run the distance label Python sample with default settings, without any command-line args.
+rm -rf "$DISTANCE_LABEL_OUT_DIR"
+mkdir "$DISTANCE_LABEL_OUT_DIR"
+python3 $SAMPLES_DIR/label/python/main.py -o "$DISTANCE_LABEL_OUT_DIR"
 # Run it with batch size 1 on a single image
-python3 ./label/python/main.py -i ./assets/images/peoplenet.jpg  -b 1
+python3 $SAMPLES_DIR/label/python/main.py -i $SAMPLES_DIR/assets/images/peoplenet.jpg  -b 1 -o "$DISTANCE_LABEL_OUT_DIR"
diff --git a/samples/segmentation/python/README.md b/samples/segmentation/python/README.md
index d35d0dc1e..d9bd7537c 100644
--- a/samples/segmentation/python/README.md
+++ b/samples/segmentation/python/README.md
@@ -1,3 +1,18 @@
+
+[//]: # "SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
+[//]: # "SPDX-License-Identifier: Apache-2.0"
+[//]: # ""
+[//]: # "Licensed under the Apache License, Version 2.0 (the 'License');"
+[//]: # "you may not use this file except in compliance with the License."
+[//]: # "You may obtain a copy of the License at"
+[//]: # "http://www.apache.org/licenses/LICENSE-2.0"
+[//]: # ""
+[//]: # "Unless required by applicable law or agreed to in writing, software"
+[//]: # "distributed under the License is distributed on an 'AS IS' BASIS"
+[//]: # "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
+[//]: # "See the License for the specific language governing permissions and"
+[//]: # "limitations under the License."
+
 # Semantic Segmentation : Locally and using Triton
 
 ## Pre-requisites
@@ -66,12 +81,12 @@ Triton has different public [Docker images](https://catalog.ngc.nvidia.com/orgs/
       ./samples/scripts/install_dependencies.sh
       pip3 install tensorrt
       ```
-3. Install the CV-CUDA packages. Pre-built packages `.deb`, `.tar.xz`, `.whl` are only available on Github, so need to download from there. Otherwise, please build from source. Please note that since the above container comes with Python 3.8.10, we will install nvcv-python3.8-0 package as mentioned below. If you have any other Python distributions, you would need to use the appropriate nvcv-python packages below.
+3. Install the CV-CUDA packages. Pre-built packages `.deb`, `.tar.xz`, `.whl` are only available on Github, so need to download from there. Otherwise, please build from source. Please note that since the above container comes with Python 3.8.10, we will install cvcuda-python3.8-0 package as mentioned below. If you have any other Python distributions, you would need to use the appropriate cvcuda-python packages below.
 
       ```bash
-      wget https://github.com/CVCUDA/CV-CUDA/releases/download/v0.3.0-beta/nvcv-lib-0.3.0_beta-cuda11-x86_64-linux.deb \
-            https://github.com/CVCUDA/CV-CUDA/releases/download/v0.3.0-beta/nvcv-python3.8-0.3.0_beta-cuda11-x86_64-linux.deb \
-            https://github.com/CVCUDA/CV-CUDA/releases/download/v0.3.0-beta/nvcv_python-0.3.x_beta-cp38-cp38-linux_x86_64.whl \
+      wget https://github.com/CVCUDA/CV-CUDA/releases/download/v0.6.0-beta/cvcuda-lib-0.6.0_beta-cuda11-x86_64-linux.deb \
+            https://github.com/CVCUDA/CV-CUDA/releases/download/v0.6.0-beta/cvcuda-python3.8-0.6.0_beta-cuda11-x86_64-linux.deb \
+            https://github.com/CVCUDA/CV-CUDA/releases/download/v0.6.0-beta/cvcuda_cu11-0.6.0b0-cp310-cp310-linux_x86_64.whl \
             -P /tmp/cvcuda && \
       apt-get install -y /tmp/cvcuda/*.deb && \
       pip3 install /tmp/cvcuda/*.whl
diff --git a/samples/segmentation/python/main.py b/samples/segmentation/python/main.py
index 7412e444d..02c8a9820 100644
--- a/samples/segmentation/python/main.py
+++ b/samples/segmentation/python/main.py
@@ -22,27 +22,27 @@
 import logging
 import cvcuda
 import torch
-from pathlib import Path
 
-# Bring module folders from the samples directory into our path so that
+# Bring the commons folder from the samples directory into our path so that
 # we can import modules from it.
-samples_dir = Path(os.path.abspath(__file__)).parents[2]  # samples/
-sys.path.insert(0, os.path.join(samples_dir, ""))
+common_dir = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
+    "common",
+    "python",
+)
+sys.path.insert(0, common_dir)
 
-from common.python.perf_utils import (  # noqa: E402
+from perf_utils import (  # noqa: E402
     CvCudaPerf,
     get_default_arg_parser,
     parse_validate_default_args,
 )
 
-from common.python.torch_utils import (  # noqa: E402
-    ImageBatchDecoderPyTorch,
-    ImageBatchEncoderPyTorch,
-)
-
-from common.python.vpf_utils import (  # noqa: E402
-    VideoBatchDecoderVPF,
-    VideoBatchEncoderVPF,
+from nvcodec_utils import (  # noqa: E402
+    VideoBatchDecoder,
+    VideoBatchEncoder,
+    ImageBatchDecoder,
+    ImageBatchEncoder,
 )
 
 from pipelines import (  # noqa: E402
@@ -95,7 +95,7 @@ def run_sample(
 
     if os.path.splitext(input_path)[1] == ".jpg" or os.path.isdir(input_path):
         # Treat this as data modality of images
-        decoder = ImageBatchDecoderPyTorch(
+        decoder = ImageBatchDecoder(
             input_path,
             batch_size,
             device_id,
@@ -103,16 +103,14 @@ def run_sample(
             cvcuda_perf,
         )
 
-        encoder = ImageBatchEncoderPyTorch(
+        encoder = ImageBatchEncoder(
             output_dir,
-            fps=0,
             device_id=device_id,
-            cuda_ctx=cuda_ctx,
             cvcuda_perf=cvcuda_perf,
         )
     else:
         # Treat this as data modality of videos
-        decoder = VideoBatchDecoderVPF(
+        decoder = VideoBatchDecoder(
             input_path,
             batch_size,
             device_id,
@@ -120,7 +118,7 @@ def run_sample(
             cvcuda_perf,
         )
 
-        encoder = VideoBatchEncoderVPF(
+        encoder = VideoBatchEncoder(
             output_dir,
             decoder.fps,
             device_id,
diff --git a/samples/segmentation/python/model_inference.py b/samples/segmentation/python/model_inference.py
index 8b271bb5f..84a3ee538 100644
--- a/samples/segmentation/python/model_inference.py
+++ b/samples/segmentation/python/model_inference.py
@@ -23,14 +23,16 @@
 from torchvision.models import segmentation as segmentation_models
 import tensorrt as trt
 
-from pathlib import Path
-
-# Bring module folders from the samples directory into our path so that
+# Bring the commons folder from the samples directory into our path so that
 # we can import modules from it.
-samples_dir = Path(os.path.abspath(__file__)).parents[2]  # samples/
-sys.path.insert(0, os.path.join(samples_dir, ""))
+common_dir = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
+    "common",
+    "python",
+)
+sys.path.insert(0, common_dir)
 
-from common.python.trt_utils import (  # noqa: E402
+from trt_utils import (  # noqa: E402
     convert_onnx_to_tensorrt,
     setup_tensort_bindings,
 )
diff --git a/samples/segmentation/python/triton_client.py b/samples/segmentation/python/triton_client.py
index d902a2675..7802fec2d 100644
--- a/samples/segmentation/python/triton_client.py
+++ b/samples/segmentation/python/triton_client.py
@@ -41,14 +41,14 @@
     parse_validate_default_args,
 )
 
-from common.python.torch_utils import (  # noqa: E402
-    ImageBatchDecoderPyTorch,
-    ImageBatchEncoderPyTorch,
+from common.python.nvcodec_utils import (  # noqa: E402
+    VideoBatchDecoder,
+    VideoBatchEncoder,
+    ImageBatchDecoder,
+    ImageBatchEncoder,
 )
 
 from common.python.vpf_utils import (  # noqa: E402
-    VideoBatchDecoderVPF,
-    VideoBatchEncoderVPF,
     VideoBatchStreamingDecoderVPF,
     VideoBatchStreamingEncoderVPF,
 )
@@ -123,7 +123,7 @@ def run_sample(
     # docs_tag: begin_init_dataloader
     if os.path.splitext(input_path)[1] == ".jpg" or os.path.isdir(input_path):
         # Treat this as data modality of images
-        decoder = ImageBatchDecoderPyTorch(
+        decoder = ImageBatchDecoder(
             input_path,
             batch_size,
             device_id,
@@ -131,11 +131,9 @@ def run_sample(
             cvcuda_perf,
         )
 
-        encoder = ImageBatchEncoderPyTorch(
+        encoder = ImageBatchEncoder(
             output_dir,
-            fps=0,
             device_id=device_id,
-            cuda_ctx=cuda_ctx,
             cvcuda_perf=cvcuda_perf,
         )
     else:
@@ -163,7 +161,7 @@ def run_sample(
                 decoder.decoder.fps,
             )
         else:
-            decoder = VideoBatchDecoderVPF(
+            decoder = VideoBatchDecoder(
                 input_path,
                 batch_size,
                 device_id,
@@ -171,7 +169,7 @@ def run_sample(
                 cvcuda_perf,
             )
 
-            encoder = VideoBatchEncoderVPF(
+            encoder = VideoBatchEncoder(
                 output_dir, decoder.fps, device_id, cuda_ctx, cvcuda_perf
             )
 
diff --git a/src/cvcuda/CMakeLists.txt b/src/cvcuda/CMakeLists.txt
index 9da865081..4a21a4c56 100644
--- a/src/cvcuda/CMakeLists.txt
+++ b/src/cvcuda/CMakeLists.txt
@@ -33,6 +33,7 @@ set(CV_CUDA_OP_FILES
     OpRemap.cpp
     OpColorTwist.cpp
     OpCropFlipNormalizeReformat.cpp
+    OpHQResize.cpp
     OpNonMaximumSuppression.cpp
     OpReformat.cpp
     OpResize.cpp
diff --git a/src/cvcuda/OpHQResize.cpp b/src/cvcuda/OpHQResize.cpp
new file mode 100644
index 000000000..fd9c3ec28
--- /dev/null
+++ b/src/cvcuda/OpHQResize.cpp
@@ -0,0 +1,139 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cvcuda/OpHQResize.h"
+
+#include "priv/OpHQResize.hpp"
+#include "priv/SymbolVersioning.hpp"
+
+#include <nvcv/Exception.hpp>
+#include <nvcv/ImageBatch.hpp>
+#include <nvcv/Tensor.hpp>
+#include <util/Assert.h>
+
+namespace priv = cvcuda::priv;
+
+CVCUDA_DEFINE_API(0, 6, NVCVStatus, cvcudaHQResizeCreate, (NVCVOperatorHandle * handle))
+{
+    return nvcv::ProtectCall(
+        [&]
+        {
+            if (handle == nullptr)
+            {
+                throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                      "Pointer to NVCVOperator handle must not be NULL");
+            }
+
+            *handle = reinterpret_cast<NVCVOperatorHandle>(new priv::HQResize());
+        });
+}
+
+CVCUDA_DEFINE_API(0, 6, NVCVStatus, cvcudaHQResizeTensorGetWorkspaceRequirements,
+                  (NVCVOperatorHandle handle, int batchSize, const HQResizeTensorShapeI inputShape,
+                   const HQResizeTensorShapeI outputShape, const NVCVInterpolationType minInterpolation,
+                   const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoiF *roi,
+                   NVCVWorkspaceRequirements *reqOut))
+{
+    if (!reqOut)
+        return NVCV_ERROR_INVALID_ARGUMENT;
+
+    return nvcv::ProtectCall(
+        [&]
+        {
+            *reqOut = priv::ToDynamicRef<priv::HQResize>(handle).getWorkspaceRequirements(
+                batchSize, inputShape, outputShape, minInterpolation, magInterpolation, antialias, roi);
+        });
+}
+
+CVCUDA_DEFINE_API(0, 6, NVCVStatus, cvcudaHQResizeTensorBatchGetWorkspaceRequirements,
+                  (NVCVOperatorHandle handle, int batchSize, const HQResizeTensorShapesI inputShapes,
+                   const HQResizeTensorShapesI outputShapes, const NVCVInterpolationType minInterpolation,
+                   const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF roi,
+                   NVCVWorkspaceRequirements *reqOut))
+{
+    if (!reqOut)
+        return NVCV_ERROR_INVALID_ARGUMENT;
+
+    return nvcv::ProtectCall(
+        [&]
+        {
+            *reqOut = priv::ToDynamicRef<priv::HQResize>(handle).getWorkspaceRequirements(
+                batchSize, inputShapes, outputShapes, minInterpolation, magInterpolation, antialias, roi);
+        });
+}
+
+CVCUDA_DEFINE_API(0, 6, NVCVStatus, cvcudaHQResizeGetMaxWorkspaceRequirements,
+                  (NVCVOperatorHandle handle, int maxBatchSize, const HQResizeTensorShapeI maxShape,
+                   NVCVWorkspaceRequirements *reqOut))
+{
+    if (!reqOut)
+        return NVCV_ERROR_INVALID_ARGUMENT;
+
+    return nvcv::ProtectCall(
+        [&] { *reqOut = priv::ToDynamicRef<priv::HQResize>(handle).getWorkspaceRequirements(maxBatchSize, maxShape); });
+}
+
+CVCUDA_DEFINE_API(0, 6, NVCVStatus, cvcudaHQResizeSubmit,
+                  (NVCVOperatorHandle handle, cudaStream_t stream, const NVCVWorkspace *ws, NVCVTensorHandle in,
+                   NVCVTensorHandle out, const NVCVInterpolationType minInterpolation,
+                   const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoiF *roi))
+{
+    if (!ws)
+        return NVCV_ERROR_INVALID_ARGUMENT;
+
+    return nvcv::ProtectCall(
+        [&]
+        {
+            nvcv::TensorWrapHandle _in(in), _out(out);
+            priv::ToDynamicRef<priv::HQResize>(handle)(stream, *ws, _in, _out, minInterpolation, magInterpolation,
+                                                       antialias, roi);
+        });
+}
+
+CVCUDA_DEFINE_API(0, 6, NVCVStatus, cvcudaHQResizeImageBatchSubmit,
+                  (NVCVOperatorHandle handle, cudaStream_t stream, const NVCVWorkspace *ws, NVCVImageBatchHandle in,
+                   NVCVImageBatchHandle out, const NVCVInterpolationType minInterpolation,
+                   const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF roi))
+{
+    if (!ws)
+        return NVCV_ERROR_INVALID_ARGUMENT;
+
+    return nvcv::ProtectCall(
+        [&]
+        {
+            nvcv::ImageBatchVarShapeWrapHandle _in(in), _out(out);
+            priv::ToDynamicRef<priv::HQResize>(handle)(stream, *ws, _in, _out, minInterpolation, magInterpolation,
+                                                       antialias, roi);
+        });
+}
+
+CVCUDA_DEFINE_API(0, 6, NVCVStatus, cvcudaHQResizeTensorBatchSubmit,
+                  (NVCVOperatorHandle handle, cudaStream_t stream, const NVCVWorkspace *ws, NVCVTensorBatchHandle in,
+                   NVCVTensorBatchHandle out, const NVCVInterpolationType minInterpolation,
+                   const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF roi))
+{
+    if (!ws)
+        return NVCV_ERROR_INVALID_ARGUMENT;
+
+    return nvcv::ProtectCall(
+        [&]
+        {
+            nvcv::TensorBatchWrapHandle _in(in), _out(out);
+            priv::ToDynamicRef<priv::HQResize>(handle)(stream, *ws, _in, _out, minInterpolation, magInterpolation,
+                                                       antialias, roi);
+        });
+}
diff --git a/src/cvcuda/include/cvcuda/OpErase.h b/src/cvcuda/include/cvcuda/OpErase.h
index d64425d28..b3b9a3bb1 100644
--- a/src/cvcuda/include/cvcuda/OpErase.h
+++ b/src/cvcuda/include/cvcuda/OpErase.h
@@ -102,25 +102,25 @@ CVCUDA_PUBLIC NVCVStatus cvcudaEraseCreate(NVCVOperatorHandle *handle, int32_t m
  *
  *  anchor Tensor
  *
- *      Must be 'N' (dim = 1) with N = number of eraing area.
+ *      Must be 'N' (dim = 1) with N = number of erasing area.
  *      Data Type must be 32bit Signed.
  *      DataType must be TYPE_2S32.
  *
  *  erasing Tensor
  *
- *      Must be 'N' (dim = 1) with N = number of eraing area.
+ *      Must be 'N' (dim = 1) with N = number of erasing area.
  *      Data Type must be 32bit Signed.
  *      DataType must be TYPE_3S32.
  *
  *  imgIdx Tensor
  *
- *      Must be 'N' (dim = 1) with N = number of eraing area.
+ *      Must be 'N' (dim = 1) with N = number of erasing area.
  *      Data Type must be 32bit Signed.
  *      DataType must be TYPE_S32.
  *
  *  values Tensor
  *
- *      Must be 'N' (dim = 1) with W = number of eraing area * 4.
+ *      Must be 'N' (dim = 1) with W = number of erasing area * 4.
  *      Data Type must be 32bit Float.
  *      DataType must be TYPE_F32.
  *
@@ -133,9 +133,9 @@ CVCUDA_PUBLIC NVCVStatus cvcudaEraseCreate(NVCVOperatorHandle *handle, int32_t m
  *
  * @param [out] out output tensor / image batch.
  *
- * @param [in] anchor an array of size num_erasing_area that gives the x coordinate and y coordinate of the top left point in the eraseing areas.
+ * @param [in] anchor an array of size num_erasing_area that gives the x coordinate and y coordinate of the top left point in the erasing areas.
  *
- * @param [in] eraisng an array of size num_erasing_area that gives the widths of the eraseing areas, the heights of the eraseing areas and
+ * @param [in] erasing an array of size num_erasing_area that gives the widths of the erasing areas, the heights of the erasing areas and
  *              integers in range 0-15, each of whose bits indicates whether or not the corresponding channel need to be erased.
  *
  * @param [in] values an array of size num_erasing_area*4 that gives the filling value for each erase area.
diff --git a/src/cvcuda/include/cvcuda/OpHQResize.h b/src/cvcuda/include/cvcuda/OpHQResize.h
new file mode 100644
index 000000000..d6715e138
--- /dev/null
+++ b/src/cvcuda/include/cvcuda/OpHQResize.h
@@ -0,0 +1,406 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file OpHQResize.h
+ *
+ * @brief Defines types and functions to handle the HQResize operation.
+ * @defgroup NVCV_C_ALGORITHM_HQ_RESIZE HQ Resize
+ * @{
+ */
+
+#ifndef CVCUDA_HQ_RESIZE_H
+#define CVCUDA_HQ_RESIZE_H
+
+#include "Operator.h"
+#include "Types.h"
+#include "Workspace.h"
+#include "detail/Export.h"
+
+#include <cuda_runtime.h>
+#include <nvcv/ImageBatch.h>
+#include <nvcv/Rect.h>
+#include <nvcv/Status.h>
+#include <nvcv/Tensor.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define NVCV_HQ_RESIZE_MAX_RESIZED_NDIM (3)
+
+typedef struct
+{
+    int32_t extent[NVCV_HQ_RESIZE_MAX_RESIZED_NDIM];
+    int32_t ndim;
+    int32_t numChannels;
+} HQResizeTensorShapeI;
+
+typedef struct
+{
+    HQResizeTensorShapeI *shape;
+    int32_t               size;        // the number of valid elements in the `shape` array
+    int32_t               ndim;        // the number of spatial extents in each `shapes` element
+    int32_t               numChannels; // the number of innermost channels, -1 if they differ between samples
+} HQResizeTensorShapesI;
+
+typedef struct
+{
+    float lo[NVCV_HQ_RESIZE_MAX_RESIZED_NDIM];
+    float hi[NVCV_HQ_RESIZE_MAX_RESIZED_NDIM];
+} HQResizeRoiF;
+
+typedef struct
+{
+    int32_t       size; // the number of valid elements in the `roi` array
+    int32_t       ndim; // the number of valid extents in each `roi` element
+    HQResizeRoiF *roi;
+} HQResizeRoisF;
+
+/** Constructs an instance of the HQResize operator.
+ *
+ * @param [out] handle Where the image instance handle will be written to.
+ *                     + Must not be NULL.
+ *
+ * @retval #NVCV_ERROR_INVALID_ARGUMENT Handle is null.
+ * @retval #NVCV_ERROR_OUT_OF_MEMORY    Not enough memory to create the operator.
+ * @retval #NVCV_SUCCESS                Operation executed successfully.
+ */
+CVCUDA_PUBLIC NVCVStatus cvcudaHQResizeCreate(NVCVOperatorHandle *handle);
+
+/** Calculates the workspace requirements for Tensor input/ouput.
+ *
+ * @param [in] handle Where the image instance handle will be written to.
+ *                     + Must not be NULL.
+ *
+ * @param [in] batchSize The number of samples in the tensor (the size of N extent).
+ *
+ * @param [in] inputShape The HW or DHW extents of the input tensor, the number of resized extents,
+ *                        and the number of channels.
+ *                        Supported number of resizes extents are 2 and 3.
+ *                        For ndim = 2, a tensor of layout (N)HW(C) is expected to be processed,
+ *                        for ndim = 3, a tensor of layout (N)DHW(C) is expected to be processed.
+ *
+ * @param [in] outputShape The HW or DHW extents of the output tensor and the number of channels.
+ *                         The number of extents and channels must be the same as in inputShape.
+ *
+ * @param [in] minInterpolation The type of interpolation to be used when downsampling an extent
+ *                              (i.e. when output extent is smaller than the corresponding input extent).
+ *
+ * @param [in] magInterpolation The type of interpolation to be used when upsampling an extent
+ *                              (i.e. when output extent is bigger than the corresponding input extent).
+ *
+ * @param [in] antialias Whether to use antialiasing when downsampling.
+ *
+ * @param [in] roi Optional region of interest for the input, in (D)HW layout.
+ *
+ * @param [out] reqOut The pointer for workspace requirements struct that will be filled by the call.
+ *
+ * @retval #NVCV_ERROR_INVALID_ARGUMENT Handle is null or one of the arguments is out of range.
+ * @retval #NVCV_SUCCESS                Operation executed successfully.
+ */
+CVCUDA_PUBLIC NVCVStatus cvcudaHQResizeTensorGetWorkspaceRequirements(NVCVOperatorHandle handle, int batchSize,
+                                                                      const HQResizeTensorShapeI  inputShape,
+                                                                      const HQResizeTensorShapeI  outputShape,
+                                                                      const NVCVInterpolationType minInterpolation,
+                                                                      const NVCVInterpolationType magInterpolation,
+                                                                      bool antialias, const HQResizeRoiF *roi,
+                                                                      NVCVWorkspaceRequirements *reqOut);
+
+/** Calculates the workspace requirements for TensorBatch/ImageBatchVarShape input/ouput.
+ *
+ * @param [in] handle Where the image instance handle will be written to.
+ *                     + Must not be NULL.
+ *
+ * @param [in] batchSize The number of samples in the tensor batch/image batch.
+ *
+ * @param [in] inputShapes The list of shapes (HW or DHW extents) in the input batch,
+ *                         the number of channels, and the number of extents to be resampled (2 or 3).
+ *                         The number of channels can be specified once for the whole batch or each sample
+ *                         separately.
+ *
+ * @param [in] outputShapes The list of shapes (HW or DHW extents) in the output batch,
+ *                          the number of channels, and the number of extents to be resampled (2 or 3).
+ *                          The number of channels must match the number of channels in the input.
+ *
+ * @param [in] minInterpolation The type of interpolation to be used when downsampling an extent
+ *                              (i.e. when output extent is smaller than the corresponding input extent).
+ *
+ * @param [in] magInterpolation The type of interpolation to be used when upsampling an extent
+ *                              (i.e. when output extent is bigger than the corresponding input extent).
+ *
+ * @param [in] antialias Whether to use antialiasing when downsampling.
+ *
+ * @param [in] roi Optional region of interest for the input, in (D)HW layout. The roi can be described
+ *                 as a list for each sample or contain a single element to be used for all the samples
+ *                 in the batch.
+ *
+ * @param [out] reqOut The pointer for workspace requirements struct that will be filled by the call.
+ *
+ * @retval #NVCV_ERROR_INVALID_ARGUMENT Handle is null or one of the arguments is out of range.
+ * @retval #NVCV_SUCCESS                Operation executed successfully.
+ */
+CVCUDA_PUBLIC NVCVStatus cvcudaHQResizeTensorBatchGetWorkspaceRequirements(NVCVOperatorHandle handle, int batchSize,
+                                                                           const HQResizeTensorShapesI inputShapes,
+                                                                           const HQResizeTensorShapesI outputShapes,
+                                                                           const NVCVInterpolationType minInterpolation,
+                                                                           const NVCVInterpolationType magInterpolation,
+                                                                           bool antialias, const HQResizeRoisF roi,
+                                                                           NVCVWorkspaceRequirements *reqOut);
+
+/** Calculates the upper bound for workspace requirements. The workspace that meets the returned
+ * requirements can be used with any call to the operator as long as: the input dimensionality
+ * (2 or 3) matches, the number of samples does not exceed the maxBatchSize, and all the input
+ * and output shapes do not exceed the maxShape in any extent (including number of channels).
+ *
+ * @param [in] handle Where the image instance handle will be written to.
+ *                     + Must not be NULL.
+ *
+ * @param [in] maxBatchSize The maximal number of samples in the tensor/tensor batch/image batch.
+ *
+ * @param [in] maxShape The maximal shape of any input or output sample. The number of channels must
+ *                      be an upper bound for number of channels in any sample.
+ *
+ * @param [out] reqOut The pointer for workspace requirements struct that will be filled by the call.
+ *
+ * @retval #NVCV_ERROR_INVALID_ARGUMENT Handle is null or one of the arguments is out of range.
+ * @retval #NVCV_SUCCESS                Operation executed successfully.
+ */
+CVCUDA_PUBLIC NVCVStatus cvcudaHQResizeGetMaxWorkspaceRequirements(NVCVOperatorHandle handle, int maxBatchSize,
+                                                                   const HQResizeTensorShapeI maxShape,
+                                                                   NVCVWorkspaceRequirements *reqOut);
+
+/** Executes the HQResize operation on the given cuda stream. This operation does not wait for completion.
+ *
+ *  Limitations:
+ *
+ *  Input, Output:
+ *       Data Layout:         NVCV_TENSOR_[N][D]HW[C]
+ *
+ *       Number of channels:  Positive integer
+ *
+ *       Data Type      | Allowed
+ *       -------------- | -------------
+ *       8bit  Unsigned | Yes
+ *       8bit  Signed   | No
+ *       16bit Unsigned | Yes
+ *       16bit Signed   | Yes
+ *       32bit Unsigned | No
+ *       32bit Signed   | No
+ *       32bit Float    | Yes
+ *       64bit Float    | No
+ *
+ *  Input/Output dependency
+ *       Property      |  Input == Output
+ *      -------------- | -------------
+ *       Data Layout   | Yes
+ *       Data Type     | No (output can be the same or float32).
+ *       Channels      | Yes
+ *       Width         | No
+ *       Height        | No
+ *       Samples       | Yes
+ *
+ *
+ * @param [in] handle Handle to the operator.
+ *                    + Must not be NULL.
+ * @param [in] stream Handle to a valid CUDA stream.
+ *
+ * @param [in] workspace The workspace with memory for intermediate results. The requirements for a given input
+ *                       can be acquired with a call to `cvcudaHQResizeTensorGetWorkspaceRequirements` or
+ *                       `cvcudaHQResizeGetMaxWorkspaceRequirements`.
+ *
+ * @param [in] in The input tensor with (N)(D)HW(C) layout.
+ *
+ * @param [in] out The output tensor with the same layout, number of samples and channels as the in tensor.
+ *
+ * @param [in] minInterpolation The type of interpolation to be used when downsampling an extent
+ *                              (i.e. when output extent is smaller than the corresponding input extent).
+ *                              Supported interpolation formats are: `NVCV_INTERP_NEAREST`, `NVCV_INTERP_LINEAR`,
+ *                              `NVCV_INTERP_CUBIC`, `NVCV_INTERP_LANCZOS`, and `NVCV_INTERP_GAUSSIAN`.
+ *
+ * @param [in] magInterpolation The type of interpolation to be used when upsampling an extent
+ *                              (i.e. when output extent is bigger than the corresponding input extent).
+ *                               Supported interpolation formats are: `NVCV_INTERP_NEAREST`, `NVCV_INTERP_LINEAR`,
+ *                              `NVCV_INTERP_CUBIC`, `NVCV_INTERP_LANCZOS`, and `NVCV_INTERP_GAUSSIAN`.
+ *
+ * @param [in] antialias Whether to use antialiasing when downsampling. The value is ignored for
+ *                       `minInterpolation = NVCV_INTERP_NEAREST`.
+ *
+ * @param [in] roi Optional region of interest for the input, in (D)HW layout.
+ *                 If, for some axis, the low bound is bigger than the high bound,
+ *                 the image is flipped in that dimension.
+ *
+ * @retval #NVCV_ERROR_INVALID_ARGUMENT Some parameter is outside valid range.
+ * @retval #NVCV_ERROR_INTERNAL         Internal error in the operator, invalid types passed in.
+ * @retval #NVCV_SUCCESS                Operation executed successfully.
+ */
+CVCUDA_PUBLIC NVCVStatus cvcudaHQResizeSubmit(NVCVOperatorHandle handle, cudaStream_t stream,
+                                              const NVCVWorkspace *workspace, NVCVTensorHandle in, NVCVTensorHandle out,
+                                              const NVCVInterpolationType minInterpolation,
+                                              const NVCVInterpolationType magInterpolation, bool antialias,
+                                              const HQResizeRoiF *roi);
+
+/** Executes the HQResize operation on the given cuda stream. This operation does not wait for completion.
+ *
+ *  Limitations:
+ *
+ *  Input, Output:
+ *       Data Layout:    NVCV_TENSOR_HWC
+ *
+ *       Number of channels: [1, 2, 3, 4]
+ *
+ *       Data Type      | Allowed
+ *       -------------- | -------------
+ *       8bit  Unsigned | Yes
+ *       8bit  Signed   | No
+ *       16bit Unsigned | Yes
+ *       16bit Signed   | Yes
+ *       32bit Unsigned | No
+ *       32bit Signed   | No
+ *       32bit Float    | Yes
+ *       64bit Float    | No
+ *
+ *  Input/Output dependency
+ *       Property      |  Input == Output
+ *      -------------- | -------------
+ *       Data Layout   | Yes
+ *       Data Type     | No (output can be the same or float32).
+ *       Channels      | Yes
+ *       Width         | No
+ *       Height        | No
+ *       Samples       | Yes
+ *
+ *
+ * @param [in] handle Handle to the operator.
+ *                    + Must not be NULL.
+ * @param [in] stream Handle to a valid CUDA stream.
+ *
+ * @param [in] workspace The workspace with memory for intermediate results. The requirements for a given input
+ *                       can be acquired with a call to `cvcudaHQResizeTensorBatchGetWorkspaceRequirements` or
+ *                       `cvcudaHQResizeGetMaxWorkspaceRequirements`.
+ *
+ * @param [in] in The ImageBatchVarShape batch of input samples.
+ *
+ * @param [in] out The ImageBatchVarShape batch of output samples.
+ *
+ * @param [in] minInterpolation The type of interpolation to be used when downsampling an extent
+ *                              (i.e. when output extent is smaller than the corresponding input extent).
+ *                              Supported interpolation formats are: `NVCV_INTERP_NEAREST`, `NVCV_INTERP_LINEAR`,
+ *                              `NVCV_INTERP_CUBIC`, `NVCV_INTERP_LANCZOS`, and `NVCV_INTERP_GAUSSIAN`.
+ *
+ * @param [in] magInterpolation The type of interpolation to be used when upsampling an extent
+ *                              (i.e. when output extent is bigger than the corresponding input extent).
+ *                              Supported interpolation formats are: `NVCV_INTERP_NEAREST`, `NVCV_INTERP_LINEAR`,
+ *                              `NVCV_INTERP_CUBIC`, `NVCV_INTERP_LANCZOS`, and `NVCV_INTERP_GAUSSIAN`.
+ *
+ * @param [in] antialias Whether to use antialiasing when downsampling. The value is ignored for
+ *                       `minInterpolation = NVCV_INTERP_NEAREST`.
+ *
+ * @param [in] roi Optional region of interest for the input, in (D)HW layout. The roi can be described
+ *                 as a list of elements for each sample or a list containing a single element to be used
+ *                 for all the samples in the batch. If, for some axis, the low bound is bigger than
+ *                 the high bound, the image is flipped in that dimension.
+ *
+ * @retval #NVCV_ERROR_INVALID_ARGUMENT Some parameter is outside valid range.
+ * @retval #NVCV_ERROR_INTERNAL         Internal error in the operator, invalid types passed in.
+ * @retval #NVCV_SUCCESS                Operation executed successfully.
+ */
+CVCUDA_PUBLIC NVCVStatus cvcudaHQResizeImageBatchSubmit(NVCVOperatorHandle handle, cudaStream_t stream,
+                                                        const NVCVWorkspace *workspace, NVCVImageBatchHandle in,
+                                                        NVCVImageBatchHandle        out,
+                                                        const NVCVInterpolationType minInterpolation,
+                                                        const NVCVInterpolationType magInterpolation, bool antialias,
+                                                        const HQResizeRoisF roi);
+
+/** Executes the HQResize operation on the given cuda stream. This operation does not wait for completion.
+ *
+ *  Limitations:
+ *
+ *  Input, Output:
+ *       Data Layout:         NVCV_TENSOR_[D]HW[C]
+ *
+ *       Number of channels:  Positive integer
+ *
+ *       Data Type      | Allowed
+ *       -------------- | -------------
+ *       8bit  Unsigned | Yes
+ *       8bit  Signed   | No
+ *       16bit Unsigned | Yes
+ *       16bit Signed   | Yes
+ *       32bit Unsigned | No
+ *       32bit Signed   | No
+ *       32bit Float    | Yes
+ *       64bit Float    | No
+ *
+ *  Input/Output dependency
+ *       Property      |  Input == Output
+ *      -------------- | -------------
+ *       Data Layout   | Yes
+ *       Data Type     | No (output can be the same or float32).
+ *       Channels      | Yes
+ *       Width         | No
+ *       Height        | No
+ *       Samples       | Yes
+ *
+ *
+ * @param [in] handle Handle to the operator.
+ *                    + Must not be NULL.
+ * @param [in] stream Handle to a valid CUDA stream.
+ *
+ * @param [in] workspace The workspace with memory for intermediate results. The requirements for a given input
+ *                       can be acquired with a call to `cvcudaHQResizeTensorBatchGetWorkspaceRequirements` or
+ *                       `cvcudaHQResizeGetMaxWorkspaceRequirements`.
+ *
+ * @param [in] in The TensorBatch of input samples.
+ *
+ * @param [in] out The TensorBatch batch of output samples.
+ *
+ * @param [in] minInterpolation The type of interpolation to be used when downsampling an extent
+ *                              (i.e. when output extent is smaller than the corresponding input extent).
+ *                              Supported interpolation formats are: `NVCV_INTERP_NEAREST`, `NVCV_INTERP_LINEAR`,
+ *                              `NVCV_INTERP_CUBIC`, `NVCV_INTERP_LANCZOS`, and `NVCV_INTERP_GAUSSIAN`.
+ *
+ * @param [in] magInterpolation The type of interpolation to be used when upsampling an extent
+ *                              (i.e. when output extent is bigger than the corresponding input extent).
+ *                              Supported interpolation formats are: `NVCV_INTERP_NEAREST`, `NVCV_INTERP_LINEAR`,
+ *                              `NVCV_INTERP_CUBIC`, `NVCV_INTERP_LANCZOS`, and `NVCV_INTERP_GAUSSIAN`.
+ *
+ * @param [in] antialias Whether to use antialiasing when downsampling. The value is ignored for
+ *                       `minInterpolation = NVCV_INTERP_NEAREST`.
+ *
+ * @param [in] roi Optional region of interest for the input, in (D)HW layout. The roi can be described
+ *                 as a list of elements for each sample or a list containing a single element to be used
+ *                 for all the samples in the batch. If, for some axis, the low bound is bigger than
+ *                 the high bound, the image is flipped in that dimension.
+ *
+ * @retval #NVCV_ERROR_INVALID_ARGUMENT Some parameter is outside valid range.
+ * @retval #NVCV_ERROR_INTERNAL         Internal error in the operator, invalid types passed in.
+ * @retval #NVCV_SUCCESS                Operation executed successfully.
+ */
+CVCUDA_PUBLIC NVCVStatus cvcudaHQResizeTensorBatchSubmit(NVCVOperatorHandle handle, cudaStream_t stream,
+                                                         const NVCVWorkspace *workspace, NVCVTensorBatchHandle in,
+                                                         NVCVTensorBatchHandle       out,
+                                                         const NVCVInterpolationType minInterpolation,
+                                                         const NVCVInterpolationType magInterpolation, bool antialias,
+                                                         const HQResizeRoisF roi);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CVCUDA_HQ_RESIZE_H */
diff --git a/src/cvcuda/include/cvcuda/OpHQResize.hpp b/src/cvcuda/include/cvcuda/OpHQResize.hpp
new file mode 100644
index 000000000..8e929bc5f
--- /dev/null
+++ b/src/cvcuda/include/cvcuda/OpHQResize.hpp
@@ -0,0 +1,154 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file OpHQResize.hpp
+ *
+ * @brief Defines the public C++ Class for the HQResize operation.
+ * @defgroup NVCV_CPP_ALGORITHM_HQ_RESIZE HQ Resize
+ * @{
+ */
+
+#ifndef CVCUDA_HQ_RESIZE_HPP
+#define CVCUDA_HQ_RESIZE_HPP
+
+#include "IOperator.hpp"
+#include "OpHQResize.h"
+#include "Workspace.hpp"
+
+#include <cuda_runtime.h>
+#include <nvcv/ImageBatch.hpp>
+#include <nvcv/ImageFormat.hpp>
+#include <nvcv/Rect.h>
+#include <nvcv/Tensor.hpp>
+#include <nvcv/TensorBatch.hpp>
+
+namespace cvcuda {
+
+class HQResize final : public IOperator
+{
+public:
+    explicit HQResize();
+
+    ~HQResize();
+
+    WorkspaceRequirements getWorkspaceRequirements(int batchSize, const HQResizeTensorShapeI inputShape,
+                                                   const HQResizeTensorShapeI  outputShape,
+                                                   const NVCVInterpolationType minInterpolation,
+                                                   const NVCVInterpolationType magInterpolation, bool antialias,
+                                                   const HQResizeRoiF *roi = nullptr);
+
+    WorkspaceRequirements getWorkspaceRequirements(int batchSize, HQResizeTensorShapesI inputShapes,
+                                                   const HQResizeTensorShapesI outputShapes,
+                                                   const NVCVInterpolationType minInterpolation,
+                                                   const NVCVInterpolationType magInterpolation, bool antialias,
+                                                   const HQResizeRoisF roi = {});
+
+    WorkspaceRequirements getWorkspaceRequirements(int maxBatchSize, const HQResizeTensorShapeI maxShape);
+
+    void operator()(cudaStream_t stream, const Workspace &ws, const nvcv::Tensor &in, const nvcv::Tensor &out,
+                    const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation,
+                    bool antialias = false, const HQResizeRoiF *roi = nullptr);
+
+    void operator()(cudaStream_t stream, const Workspace &ws, const nvcv::ImageBatch &in, const nvcv::ImageBatch &out,
+                    const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation,
+                    bool antialias = false, const HQResizeRoisF roi = {});
+
+    void operator()(cudaStream_t stream, const Workspace &ws, const nvcv::TensorBatch &in, const nvcv::TensorBatch &out,
+                    const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation,
+                    bool antialias = false, const HQResizeRoisF roi = {});
+
+    virtual NVCVOperatorHandle handle() const noexcept override;
+
+private:
+    NVCVOperatorHandle m_handle;
+};
+
+inline HQResize::HQResize()
+{
+    nvcv::detail::CheckThrow(cvcudaHQResizeCreate(&m_handle));
+    assert(m_handle);
+}
+
+inline HQResize::~HQResize()
+{
+    nvcvOperatorDestroy(m_handle);
+}
+
+inline WorkspaceRequirements HQResize::getWorkspaceRequirements(int batchSize, const HQResizeTensorShapeI inputShape,
+                                                                const HQResizeTensorShapeI  outputShape,
+                                                                const NVCVInterpolationType minInterpolation,
+                                                                const NVCVInterpolationType magInterpolation,
+                                                                bool antialias, const HQResizeRoiF *roi)
+{
+    WorkspaceRequirements req{};
+    nvcv::detail::CheckThrow(cvcudaHQResizeTensorGetWorkspaceRequirements(
+        m_handle, batchSize, inputShape, outputShape, minInterpolation, magInterpolation, antialias, roi, &req));
+    return req;
+}
+
+inline WorkspaceRequirements HQResize::getWorkspaceRequirements(int batchSize, const HQResizeTensorShapesI inputShapes,
+                                                                const HQResizeTensorShapesI outputShapes,
+                                                                const NVCVInterpolationType minInterpolation,
+                                                                const NVCVInterpolationType magInterpolation,
+                                                                bool antialias, const HQResizeRoisF roi)
+{
+    WorkspaceRequirements req{};
+    nvcv::detail::CheckThrow(cvcudaHQResizeTensorBatchGetWorkspaceRequirements(
+        m_handle, batchSize, inputShapes, outputShapes, minInterpolation, magInterpolation, antialias, roi, &req));
+    return req;
+}
+
+inline WorkspaceRequirements HQResize::getWorkspaceRequirements(int maxBatchSize, const HQResizeTensorShapeI maxShape)
+{
+    WorkspaceRequirements req{};
+    nvcv::detail::CheckThrow(cvcudaHQResizeGetMaxWorkspaceRequirements(m_handle, maxBatchSize, maxShape, &req));
+    return req;
+}
+
+inline void HQResize::operator()(cudaStream_t stream, const Workspace &ws, const nvcv::Tensor &in,
+                                 const nvcv::Tensor &out, const NVCVInterpolationType minInterpolation,
+                                 const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoiF *roi)
+{
+    nvcv::detail::CheckThrow(cvcudaHQResizeSubmit(m_handle, stream, &ws, in.handle(), out.handle(), minInterpolation,
+                                                  magInterpolation, antialias, roi));
+}
+
+inline void HQResize::operator()(cudaStream_t stream, const Workspace &ws, const nvcv::ImageBatch &in,
+                                 const nvcv::ImageBatch &out, const NVCVInterpolationType minInterpolation,
+                                 const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF roi)
+{
+    nvcv::detail::CheckThrow(cvcudaHQResizeImageBatchSubmit(m_handle, stream, &ws, in.handle(), out.handle(),
+                                                            minInterpolation, magInterpolation, antialias, roi));
+}
+
+inline void HQResize::operator()(cudaStream_t stream, const Workspace &ws, const nvcv::TensorBatch &in,
+                                 const nvcv::TensorBatch &out, const NVCVInterpolationType minInterpolation,
+                                 const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF roi)
+{
+    nvcv::detail::CheckThrow(cvcudaHQResizeTensorBatchSubmit(m_handle, stream, &ws, in.handle(), out.handle(),
+                                                             minInterpolation, magInterpolation, antialias, roi));
+}
+
+inline NVCVOperatorHandle HQResize::handle() const noexcept
+{
+    return m_handle;
+}
+
+} // namespace cvcuda
+
+#endif // CVCUDA_HQ_RESIZE_HPP
diff --git a/src/cvcuda/include/cvcuda/OpLabel.h b/src/cvcuda/include/cvcuda/OpLabel.h
index ad0b40aa0..77f620a52 100644
--- a/src/cvcuda/include/cvcuda/OpLabel.h
+++ b/src/cvcuda/include/cvcuda/OpLabel.h
@@ -58,20 +58,20 @@ CVCUDA_PUBLIC NVCVStatus cvcudaLabelCreate(NVCVOperatorHandle *handle);
  * inside the input tensor, yielding labels in the output tensor with same rank and shape.  Labels are numbers
  * uniquely assigned to each connected region, for example:
  *
- * Input   0 0 0 0  Output   0 0 0 0
- * image:  1 1 0 1  labels:  4 4 0 7
- *         0 0 0 1           0 0 0 7
- *         0 1 1 1           0 7 7 7
+ *     Input   0 0 0 0  Output   0 0 0 0
+ *     image:  1 1 0 1  labels:  4 4 0 7
+ *             0 0 0 1           0 0 0 7
+ *             0 1 1 1           0 7 7 7
  *
  * In the above example, three distinct regions were identified and labeled as 0, 4 and 7.  Note that the region
  * labeled with 0 remained with the same value as the input, and label numbers 4 and 7 were assigned in
  * non-consecutive ordering.  Some values in the input may be ignored, i.e. not labeled, using the \ref bgLabel
  * tensor to define those values as background, which usually is set to the value zero.  For example:
  *
- * Input   0 0 1 0  Output   0 0 2 3  Zeros in  0 0 2 0
- * image:  0 1 0 1  labels:  0 5 6 7  bgLabel:  0 5 0 7
- *         0 0 1 1           0 0 7 7            0 0 7 7
- *         0 1 1 1           0 7 7 7            0 7 7 7
+ *     Input   0 0 1 0  Output   0 0 2 3  Zeros in  0 0 2 0
+ *     image:  0 1 0 1  labels:  0 5 6 7  bgLabel:  0 5 0 7
+ *             0 0 1 1           0 0 7 7            0 0 7 7
+ *             0 1 1 1           0 7 7 7            0 7 7 7
  *
  *  Limitations:
  *
@@ -106,7 +106,6 @@ CVCUDA_PUBLIC NVCVStatus cvcudaLabelCreate(NVCVOperatorHandle *handle);
  *       64bit Float    | No
  *
  *  Input/Output dependency
- *
  *       Property      |  Input == Output
  *      -------------- | -------------
  *       Data Layout   | Yes
diff --git a/src/cvcuda/include/cvcuda/OpMorphology.h b/src/cvcuda/include/cvcuda/OpMorphology.h
index 35c890020..3ab9bd85d 100644
--- a/src/cvcuda/include/cvcuda/OpMorphology.h
+++ b/src/cvcuda/include/cvcuda/OpMorphology.h
@@ -191,11 +191,11 @@ CVCUDA_PUBLIC NVCVStatus cvcudaMorphologySubmit(NVCVOperatorHandle handle, cudaS
  *
  * @param [in] morphType Type of operation to perform (Erode/Dilate). \ref NVCVMorphologyType.
  *
- * @param [in] masks  1D Tensor of NVCV_DATA_TYPE_2S32 mask W/H pairs, where the 1st pair is for image 0, second for image 1, etc.
+ * @param [in, out] masks  1D Tensor of NVCV_DATA_TYPE_2S32 mask W/H pairs, where the 1st pair is for image 0, second for image 1, etc.
  *                    Setting values to -1,-1 will create a default 3,3 mask.
  *                    (Note after the operation the tensor values may be modified by kernel)
  *
- * @param [in] anchors 1D Tensor of NVCV_DATA_TYPE_2S32 X/Y pairs, where the 1st pair is for image 0, second for image 1, etc
+ * @param [in, out] anchors 1D Tensor of NVCV_DATA_TYPE_2S32 X/Y pairs, where the 1st pair is for image 0, second for image 1, etc
  *                      Setting values to -1,-1 will anchor the kernel at the center.
  *                      (Note after the operation the tensor values may be modified by kernel)
  *
diff --git a/src/cvcuda/include/cvcuda/OpNormalize.h b/src/cvcuda/include/cvcuda/OpNormalize.h
index 2830578e2..d20eed11e 100644
--- a/src/cvcuda/include/cvcuda/OpNormalize.h
+++ b/src/cvcuda/include/cvcuda/OpNormalize.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -82,7 +82,7 @@ CVCUDA_PUBLIC NVCVStatus cvcudaNormalizeCreate(NVCVOperatorHandle *handle);
  * Limitations:
  *
  * Input:
- *      Data Layout:    [kNHWC, kHWC, kNCHW, KCHW]
+ *      Data Layout:    [kNHWC, kHWC]
  *      Channels:       [1, 3, 4]
  *
  *      Data Type      | Allowed
@@ -97,7 +97,7 @@ CVCUDA_PUBLIC NVCVStatus cvcudaNormalizeCreate(NVCVOperatorHandle *handle);
  *      64bit Float    | No
  *
  * Output:
- *      Data Layout:    [kNHWC, kHWC, kNCHW, KCHW]
+ *      Data Layout:    [kNHWC, kHWC]
  *      Channels:       [1, 3, 4]
  *
  *      Data Type      | Allowed
diff --git a/src/cvcuda/include/cvcuda/Types.h b/src/cvcuda/include/cvcuda/Types.h
index 17bb8f62e..37eb2e0cf 100644
--- a/src/cvcuda/include/cvcuda/Types.h
+++ b/src/cvcuda/include/cvcuda/Types.h
@@ -43,6 +43,7 @@ typedef enum
     NVCV_INTERP_CUBIC     = 2,
     NVCV_INTERP_AREA      = 3,
     NVCV_INTERP_LANCZOS   = 4,
+    NVCV_INTERP_GAUSSIAN  = 5,
     NVCV_INTERP_MAX       = 7,
     NVCV_WARP_INVERSE_MAP = 16,
     NVCV_INTERP_HAMMING   = 17,
diff --git a/src/cvcuda/include/cvcuda/Workspace.hpp b/src/cvcuda/include/cvcuda/Workspace.hpp
index 65a9ddfd7..e878ff00e 100644
--- a/src/cvcuda/include/cvcuda/Workspace.hpp
+++ b/src/cvcuda/include/cvcuda/Workspace.hpp
@@ -64,6 +64,13 @@ inline NVCVWorkspaceRequirements MaxWorkspaceReq(const WorkspaceRequirements &a,
     return ret;
 }
 
+inline void AlignUp(WorkspaceRequirements &ws)
+{
+    ws.hostMem.size   = nvcv::detail::AlignUp(ws.hostMem.size, ws.hostMem.alignment);
+    ws.pinnedMem.size = nvcv::detail::AlignUp(ws.pinnedMem.size, ws.pinnedMem.alignment);
+    ws.cudaMem.size   = nvcv::detail::AlignUp(ws.cudaMem.size, ws.cudaMem.alignment);
+}
+
 /** A helper class that manages the lifetime of resources stored in a Workspace structure.
  *
  * This class works in a way similar to unique_ptr with a custom deleter.
diff --git a/src/cvcuda/priv/CMakeLists.txt b/src/cvcuda/priv/CMakeLists.txt
index cd3904c41..6b28a39f7 100644
--- a/src/cvcuda/priv/CMakeLists.txt
+++ b/src/cvcuda/priv/CMakeLists.txt
@@ -32,6 +32,7 @@ set(CV_CUDA_PRIV_OP_FILES
     OpRemap.cu
     OpColorTwist.cu
     OpCropFlipNormalizeReformat.cu
+    OpHQResize.cu
     OpNonMaximumSuppression.cu
     OpReformat.cpp
     OpResize.cpp
diff --git a/src/cvcuda/priv/OpHQResize.cu b/src/cvcuda/priv/OpHQResize.cu
new file mode 100644
index 000000000..e7b924d81
--- /dev/null
+++ b/src/cvcuda/priv/OpHQResize.cu
@@ -0,0 +1,2788 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OpHQResize.hpp"
+#include "cvcuda/Workspace.hpp"
+
+#include "OpHQResizeBatchWrap.cuh"
+#include "OpHQResizeFilter.cuh"
+
+#include <cuda_runtime.h>
+#include <cvcuda/priv/WorkspaceUtil.hpp>
+#include <nvcv/DataType.hpp>
+#include <nvcv/Exception.hpp>
+#include <nvcv/ImageData.hpp>
+#include <nvcv/TensorBatch.hpp>
+#include <nvcv/TensorData.hpp>
+#include <nvcv/TensorDataAccess.hpp>
+#include <nvcv/TensorLayout.hpp>
+#include <nvcv/cuda/DropCast.hpp>
+#include <nvcv/cuda/ImageBatchVarShapeWrap.hpp>
+#include <nvcv/cuda/MathOps.hpp>
+#include <nvcv/cuda/MathWrappers.hpp>
+#include <nvcv/cuda/SaturateCast.hpp>
+#include <nvcv/cuda/StaticCast.hpp>
+#include <nvcv/cuda/TensorWrap.hpp>
+#include <util/Assert.h>
+#include <util/CheckError.hpp>
+#include <util/Math.hpp>
+
+#include <tuple>
+#include <type_traits>
+
+namespace {
+
+namespace cuda          = nvcv::cuda;
+namespace filter        = cvcuda::priv::hq_resize::filter;
+namespace batch_wrapper = cvcuda::priv::hq_resize::batch_wrapper;
+
+template<typename T, int N>
+using Vec = typename cuda::MakeType<T, N>;
+
+template<int N>
+using VecI = Vec<int, N>;
+
+template<int N>
+using VecF = Vec<float, N>;
+
+namespace utils {
+
+template<typename T, class = cuda::Require<cuda::IsCompound<T>>>
+inline std::enable_if_t<std::is_integral_v<cuda::BaseType<T>>, int64_t> Volume(const T &v)
+{
+    int64_t vol = 1;
+    for (int i = 0; i < cuda::NumComponents<T>; i++)
+    {
+        vol *= cuda::GetElement(v, i);
+    }
+    return vol;
+}
+
+template<typename T, typename = std::enable_if<std::is_integral_v<T>>>
+auto DivCeil(const T &a, const T &b)
+{
+    return (a + b - 1) / b;
+}
+} // namespace utils
+
+namespace resampling {
+
+template<int _kSpatialNDim>
+struct SampleDesc
+{
+    static constexpr int kSpatialNDim = _kSpatialNDim;
+
+    // input, output and the intermediate buffers
+    static constexpr int kNumBuffers = kSpatialNDim + 1;
+
+    // shapes[0] - input shape, consecutive intermediate results shapes,
+    // shapes[kSpatialNDim] - output shape
+    VecI<kSpatialNDim> shapes[kNumBuffers];
+
+    // the number of channels in the sample, common for input,
+    // intermediate and output sample
+    int channels;
+
+    // describes which axis to processes in a given resampling pass, e.g.
+    // if processingOrder.x = 2, then in the first pass the z axis
+    // will be resampled
+    VecI<kSpatialNDim> processingOrder;
+
+    // resampling origin and scale in pass order, i.e.
+    // origin.x and scale.x describe origin and scale for resampling
+    // in the first pass
+    VecF<kSpatialNDim> origin, scale;
+
+    // what type of filter to use (NN, Linear, Support based)
+    // in pass order (i.e. filterKind[0] refers to filter used in the first pass)
+    filter::FilterTypeKind filterKind[kSpatialNDim];
+
+    // filter description (support, coefficients etc.)
+    // in pass order (i.e. filter[0] refers to filter used in the first pass)
+    filter::ResamplingFilter filter[kSpatialNDim];
+
+    // spatial offset in the input sample based on the input ROI
+    // and filter support
+    VecI<kSpatialNDim> inRoiOffset;
+
+    // describes the logical block shape, i.e. a size of a slice
+    // that a single gpu block will process in a given pass
+    VecI<kSpatialNDim> blockShape[kSpatialNDim];
+};
+
+/**
+ * @brief Helper structure to indicate the static number of channels
+ * dynamic number of channels that may differ between samples.
+ */
+template<int _kStaticChannels>
+struct NumChannels
+{
+    constexpr int __forceinline__ __device__ operator()() const
+    {
+        return kStaticChannels;
+    }
+
+    static constexpr bool kHasStaticChannels = true;
+    static constexpr int  kStaticChannels    = _kStaticChannels;
+};
+
+template<>
+struct NumChannels<-1>
+{
+    int __forceinline__ __device__ operator()() const
+    {
+        return dynamicChannels;
+    }
+
+    static constexpr bool kHasStaticChannels = false;
+    static constexpr int  kStaticChannels    = -1;
+    int                   dynamicChannels;
+};
+
+template<int kNumStaticChannels, typename Cb>
+__forceinline__ __device__ void WithChannels(const int dynamicChannels, Cb &&cb)
+{
+    if constexpr (kNumStaticChannels == -1)
+    {
+        cb(NumChannels<-1>{dynamicChannels});
+    }
+    else if constexpr (kNumStaticChannels != -1)
+    {
+        static_assert(kNumStaticChannels > 0);
+        cb(NumChannels<kNumStaticChannels>{});
+    }
+}
+
+/**
+ * @brief Each threadblock will cover `lanes * volume(blockDim)`
+ * elements of the output sample. More lanes result in:
+ * 1. smaller grid launched (possibly reducing parallelism for small images),
+ * 2. better resuing of the filter's coefficients
+ *    (they are computed once for all lanes).
+ *
+ * @return int - the number of lanes for a single threadblock
+ * to cover in the output image
+ */
+inline int GetResizeBlockLanesEnv()
+{
+    char *env = getenv("CVCUDA_HQ_RESIZE_BLOCK_LANES");
+    if (env)
+    {
+        int lanes = atoi(env);
+        if (lanes < 1)
+        {
+            throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                  "The CVCUDA_HQ_RESIZE_BLOCK_LANES must be a positive integer");
+        }
+        return lanes;
+    }
+    else
+    {
+        return 8;
+    }
+}
+
+inline int GetResizeBlockLanes()
+{
+    static int lanes = GetResizeBlockLanesEnv();
+    return lanes;
+}
+
+template<int kSpatialNDim>
+struct GridHelperDevice
+{
+};
+
+/**
+ * @brief Maps cuda blockIdx to sample and bounds of the sample region
+ * to be processed be the threadblock for 2D resampling
+ */
+template<>
+struct GridHelperDevice<2>
+{
+    GridHelperDevice(VecI<2> numBlocks)
+        : m_numBlocksX{numBlocks.x}
+    {
+    }
+
+    int __forceinline__ __device__ CurrentSample() const
+    {
+        return blockIdx.y;
+    }
+
+    void __forceinline__ __device__ CurrentBlock(VecI<2> &lo, VecI<2> &hi, const VecI<2> blockShape) const
+
+    {
+        VecI<2> currentBlock;
+        {
+            int block      = blockIdx.x;
+            currentBlock.x = block % m_numBlocksX;
+            currentBlock.y = block / m_numBlocksX;
+        }
+        lo = blockShape * currentBlock;
+        hi = lo + blockShape;
+    }
+
+private:
+    int m_numBlocksX;
+};
+
+/**
+ * @brief Maps cuda blockIdx to sample and bounds of the sample region
+ * to be processed be the threadblock for 3D resampling
+ */
+template<>
+struct GridHelperDevice<3>
+{
+    GridHelperDevice(VecI<3> numBlocks)
+        : m_numBlocksX{numBlocks.x}
+        , m_numBlocksY{numBlocks.y}
+    {
+    }
+
+    int __forceinline__ __device__ CurrentSample() const
+    {
+        return blockIdx.y;
+    }
+
+    void __forceinline__ __device__ CurrentBlock(VecI<3> &lo, VecI<3> &hi, const VecI<3> blockShape) const
+
+    {
+        VecI<3> currentBlock;
+        {
+            int block      = blockIdx.x;
+            currentBlock.x = block % m_numBlocksX;
+            block          = block / m_numBlocksX;
+            currentBlock.y = block % m_numBlocksY;
+            currentBlock.z = block / m_numBlocksY;
+        }
+        lo = blockShape * currentBlock;
+        hi = lo + blockShape;
+    }
+
+private:
+    int m_numBlocksX, m_numBlocksY;
+};
+
+/**
+ * @brief Maps the logical blocks and the number of samples into cuda grid and back.
+ */
+template<int kSpatialNDim>
+struct GridHelper
+{
+    GridHelper(VecI<kSpatialNDim> numBlocks, int numSamples)
+        : m_numBlocks{numBlocks}
+        , m_numSamples{numSamples}
+    {
+    }
+
+    template<int ndim = kSpatialNDim>
+    std::enable_if_t<ndim == 2, dim3> GetKernelGrid() const
+    {
+        static_assert(kSpatialNDim == 2);
+        return dim3(m_numBlocks.x * m_numBlocks.y, m_numSamples, 1);
+    }
+
+    template<int ndim = kSpatialNDim>
+    std::enable_if_t<ndim == 3, dim3> GetKernelGrid() const
+    {
+        static_assert(kSpatialNDim == 3);
+        return dim3(m_numBlocks.x * m_numBlocks.y * m_numBlocks.z, m_numSamples, 1);
+    }
+
+    GridHelperDevice<kSpatialNDim> GetDeviceGridHelper()
+    {
+        return {m_numBlocks};
+    }
+
+private:
+    VecI<kSpatialNDim> m_numBlocks;
+    int                m_numSamples;
+};
+
+// The namespace contains implementation of different resampling
+// methods in device code.
+namespace interpolate {
+
+template<typename Wrap, typename... Idxs>
+auto __forceinline__ __device__ GetWrapPtr(const Wrap wrap, const VecI<2> yx, const Idxs... idxs)
+{
+    return wrap.ptr(yx.y, yx.x, idxs...);
+}
+
+template<typename Wrap, typename... Idxs>
+auto __forceinline__ __device__ GetWrapPtr(const Wrap wrap, const VecI<3> zyx, const Idxs... idxs)
+{
+    return wrap.ptr(zyx.z, zyx.y, zyx.x, idxs...);
+}
+
+template<typename Wrap, typename NumChannelsT, typename... Idxs>
+std::enable_if_t<NumChannelsT::kHasStaticChannels, typename Wrap::ValueType> __forceinline__ __device__
+    LoadPixelLdg(const Wrap wrap, const NumChannelsT numChannels, const Idxs... idxs)
+{
+    using T                       = std::remove_const_t<typename Wrap::ValueType>;
+    using BT                      = cuda::BaseType<T>;
+    constexpr int kStaticChannels = NumChannelsT::kStaticChannels;
+    static_assert(kStaticChannels == cuda::NumElements<T>);
+
+    constexpr bool kSupportsLdg = kStaticChannels == 2 || kStaticChannels == 4;
+
+    if constexpr (kSupportsLdg)
+    {
+        return __ldg(GetWrapPtr(wrap, idxs...));
+    }
+    else if constexpr (!kSupportsLdg)
+    {
+        const BT *basePtr = reinterpret_cast<const BT *>(GetWrapPtr(wrap, idxs...));
+        T         value;
+#pragma unroll
+        for (int c = 0; c < kStaticChannels; c++)
+        {
+            cuda::GetElement(value, c) = __ldg(basePtr + c);
+        }
+        return value;
+    }
+}
+
+template<typename Wrap, typename NumChannelsT, typename... Idxs>
+std::enable_if_t<!NumChannelsT::kHasStaticChannels, typename Wrap::ValueType> __forceinline__ __device__
+    LoadPixelLdg(const Wrap wrap, const NumChannelsT numChannels, const Idxs... idxs)
+{
+    static_assert(!cuda::IsCompound<typename Wrap::ValueType>);
+    return __ldg(GetWrapPtr(wrap, idxs...));
+}
+
+namespace nn {
+
+template<typename ProcessPixel>
+void __forceinline__ __device__ ForAllPixels(const VecI<2> lo, const VecI<2> hi, ProcessPixel &&processPixel)
+{
+    for (int y = lo.y + threadIdx.y; y < hi.y; y += blockDim.y)
+    {
+        for (int x = lo.x + threadIdx.x; x < hi.x; x += blockDim.x)
+        {
+            processPixel(VecI<2>{x, y});
+        }
+    }
+}
+
+template<typename ProcessPixel>
+void __forceinline__ __device__ ForAllPixels(const VecI<3> lo, const VecI<3> hi, ProcessPixel &&processPixel)
+{
+    for (int z = lo.z + threadIdx.z; z < hi.z; z += blockDim.z)
+    {
+        for (int y = lo.y + threadIdx.y; y < hi.y; y += blockDim.y)
+        {
+            for (int x = lo.x + threadIdx.x; x < hi.x; x += blockDim.x)
+            {
+                processPixel(VecI<3>{x, y, z});
+            }
+        }
+    }
+}
+
+/**
+ * @brief Nearest neighbor resampling
+ *
+ * @param outWrap - the wrapper for accessing output data
+ * @param inWrap - the wrapper for accessing input data
+ * @param lo - inclusive lower bound output coordinates of the block processed by the threadblock
+ * @param hi - exclusive upper bound output coordinates of the block processed by the threadblock
+ * @param origin - source coordinates corresponding to output's (0, 0)
+ * @param scale - step, in source coordinates, for one pixel in output coordinates
+ * @param inShape - shape of the input (x, y) order
+ * @param numChannels - the NumChannels specialization describing the number of interleaved
+ *                      channels in the input and output sample.
+ */
+template<int kSpatialNDim, typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ Resample(const PassOutWrap outWrap, const PassInWrap inWrap,
+                                         const VecI<kSpatialNDim> lo, const VecI<kSpatialNDim> hi,
+                                         VecF<kSpatialNDim> origin, const VecF<kSpatialNDim> scale,
+                                         const VecI<kSpatialNDim> inShape, const NumChannelsT numChannels)
+{
+    using OutT = typename PassOutWrap::ValueType;
+    using InT  = typename PassInWrap::ValueType;
+    // spatial extents and optional channels extent
+    constexpr int kNDim = kSpatialNDim + !NumChannelsT::kHasStaticChannels;
+
+    static_assert(!NumChannelsT::kHasStaticChannels || NumChannelsT::kStaticChannels == cuda::NumElements<InT>);
+    static_assert(cuda::NumElements<OutT> == cuda::NumElements<InT>);
+    static_assert(PassOutWrap::kNumDimensions == kNDim);
+    static_assert(PassInWrap::kNumDimensions == kNDim);
+
+    origin += 0.5f * scale;
+    ForAllPixels(lo, hi,
+                 [=](const VecI<kSpatialNDim> outIdxs)
+                 {
+                     VecI<kSpatialNDim> inIdxs = cuda::round<cuda::RoundMode::DOWN, int>(outIdxs * scale + origin);
+                     inIdxs                    = cuda::clamp(inIdxs, cuda::SetAll<VecI<kSpatialNDim>>(0), inShape - 1);
+
+                     if constexpr (NumChannelsT::kHasStaticChannels)
+                     {
+                         const InT in  = LoadPixelLdg(inWrap, numChannels, inIdxs);
+                         OutT     &out = *GetWrapPtr(outWrap, outIdxs);
+                         out           = cuda::SaturateCast<OutT>(in);
+                     }
+                     else if constexpr (!NumChannelsT::kHasStaticChannels)
+                     {
+                         for (int c = 0; c < numChannels(); c++)
+                         {
+                             const InT in  = LoadPixelLdg(inWrap, numChannels, inIdxs, c);
+                             OutT     &out = *GetWrapPtr(outWrap, outIdxs, c);
+                             out           = cuda::SaturateCast<OutT>(in);
+                         }
+                     }
+                 });
+}
+
+} // namespace nn
+
+namespace linear {
+
+template<int kSpatialNDim, typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ Linear(const PassOutWrap outWrap, const PassInWrap inWrap,
+                                       const NumChannelsT numChannels, const VecI<kSpatialNDim> inIdx0,
+                                       const VecI<kSpatialNDim> inIdx1, const float q, const VecI<kSpatialNDim> outIdx)
+{
+    using OutT = typename PassOutWrap::ValueType;
+    using InT  = std::remove_const_t<typename PassInWrap::ValueType>;
+    // spatial extents and optional channels extent
+    constexpr int kNDim = kSpatialNDim + !NumChannelsT::kHasStaticChannels;
+
+    static_assert(!NumChannelsT::kHasStaticChannels || NumChannelsT::kStaticChannels == cuda::NumElements<InT>);
+    static_assert(cuda::NumElements<OutT> == cuda::NumElements<InT>);
+    static_assert(PassOutWrap::kNumDimensions == kNDim);
+    static_assert(PassInWrap::kNumDimensions == kNDim);
+
+    if constexpr (NumChannelsT::kHasStaticChannels)
+    {
+        using FloatT     = cuda::ConvertBaseTypeTo<float, InT>;
+        const FloatT a   = cuda::StaticCast<float>(LoadPixelLdg(inWrap, numChannels, inIdx0));
+        const FloatT b   = cuda::StaticCast<float>(LoadPixelLdg(inWrap, numChannels, inIdx1));
+        FloatT       tmp = b - a;
+#pragma unroll
+        for (int c = 0; c < NumChannelsT::kStaticChannels; c++)
+        {
+            cuda::GetElement(tmp, c) = fmaf(cuda::GetElement(tmp, c), q, cuda::GetElement(a, c));
+        }
+        OutT &out = *GetWrapPtr(outWrap, outIdx);
+        out       = cuda::SaturateCast<OutT>(tmp);
+    }
+    else if constexpr (!NumChannelsT::kHasStaticChannels)
+    {
+        for (int c = 0; c < numChannels(); c++)
+        {
+            const float a   = LoadPixelLdg(inWrap, numChannels, inIdx0, c);
+            const float b   = LoadPixelLdg(inWrap, numChannels, inIdx1, c);
+            const float tmp = fmaf(b - a, q, a);
+            OutT       &out = *GetWrapPtr(outWrap, outIdx, c);
+            out             = cuda::SaturateCast<OutT>(tmp);
+        }
+    }
+}
+
+template<typename ProcessPixel>
+void __forceinline__ __device__ ForAllPixelsHorz(const VecI<2> lo, const VecI<2> hi, ProcessPixel &&processPixel)
+{
+    for (int x = lo.x + threadIdx.x; x < hi.x; x += blockDim.x)
+    {
+        for (int y = threadIdx.y + lo.y; y < hi.y; y += blockDim.y)
+        {
+            processPixel(VecI<2>{x, y});
+        }
+    }
+}
+
+template<typename ProcessPixel>
+void __forceinline__ __device__ ForAllPixelsHorz(const VecI<3> lo, const VecI<3> hi, ProcessPixel &&processPixel)
+{
+    for (int x = lo.x + threadIdx.x; x < hi.x; x += blockDim.x)
+    {
+        for (int z = threadIdx.z + lo.z; z < hi.z; z += blockDim.z)
+        {
+            for (int y = threadIdx.y + lo.y; y < hi.y; y += blockDim.y)
+            {
+                processPixel(VecI<3>{x, y, z});
+            }
+        }
+    }
+}
+
+template<typename ProcessPixel>
+void __forceinline__ __device__ ForAllPixelsVert(const VecI<2> lo, const VecI<2> hi, ProcessPixel &&processPixel)
+{
+    for (int y = threadIdx.y + lo.y; y < hi.y; y += blockDim.y)
+    {
+        for (int x = lo.x + threadIdx.x; x < hi.x; x += blockDim.x)
+        {
+            processPixel(VecI<2>{x, y});
+        }
+    }
+}
+
+template<typename ProcessPixel>
+void __forceinline__ __device__ ForAllPixelsVert(const VecI<3> lo, const VecI<3> hi, ProcessPixel &&processPixel)
+{
+    for (int z = threadIdx.z + lo.z; z < hi.z; z += blockDim.z)
+    {
+        for (int y = threadIdx.y + lo.y; y < hi.y; y += blockDim.y)
+        {
+            for (int x = lo.x + threadIdx.x; x < hi.x; x += blockDim.x)
+            {
+                processPixel(VecI<3>{x, y, z});
+            }
+        }
+    }
+}
+
+/**
+ * @brief Implements horizontal resampling
+ *
+ * @param outWrap - the wrapper for accessing output data
+ * @param inWrap - the wrapper for accessing input data
+ * @param lo - inclusive lower bound output coordinates of the block processed by the threadblock
+ * @param hi - exclusive upper bound output coordinates of the block processed by the threadblock
+ * @param srcX0 - X coordinate in the source image corresponding to output 0
+ * @param scale - step, in source X, for one pixel in output X (may be negative)
+ * @param inShape - shape of the input (x, y[, z]) order
+ * @param numChannels - the NumChannels specialization describing the number of interleaved
+ *                      channels in the input and output sample.
+ *
+ * The input region of interest is defined in terms of origin/scale, which are relative to
+ * output (0, 0).
+ * The lo/hi parameters are not output RoI - they merely indicate the output slice processed
+ * by current block.
+ */
+template<int kSpatialNDim, typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ ResampleHorz(const PassOutWrap outWrap, const PassInWrap inWrap,
+                                             const VecI<kSpatialNDim> lo, const VecI<kSpatialNDim> hi, float srcX0,
+                                             const float scale, const VecI<kSpatialNDim> inShape,
+                                             const NumChannelsT numChannels)
+{
+    srcX0 += 0.5f * scale - 0.5f;
+    ForAllPixelsHorz(lo, hi,
+                     [=](const VecI<kSpatialNDim> outIdx)
+                     {
+                         const float sx0f = outIdx.x * scale + srcX0;
+                         const int   sx0i = cuda::round<cuda::RoundMode::DOWN, int>(sx0f);
+                         const float q    = sx0f - sx0i;
+                         const int   sx0  = cuda::clamp(sx0i, 0, inShape.x - 1);
+                         const int   sx1  = cuda::clamp(sx0i + 1, 0, inShape.x - 1);
+
+                         VecI<kSpatialNDim> inIdx0 = outIdx;
+                         VecI<kSpatialNDim> inIdx1 = outIdx;
+                         inIdx0.x                  = sx0;
+                         inIdx1.x                  = sx1;
+
+                         Linear<kSpatialNDim>(outWrap, inWrap, numChannels, inIdx0, inIdx1, q, outIdx);
+                     });
+}
+
+/**
+ * @brief Implements vertical resampling
+ *
+ * @param outWrap - the wrapper for accessing output data
+ * @param inWrap - the wrapper for accessing input data
+ * @param lo - inclusive lower bound output coordinates of the block processed by the threadblock
+ * @param hi - exclusive upper bound output coordinates of the block processed by the threadblock
+ * @param srcY0 - Y coordinate in the source image corresponding to output 0
+ * @param scale - step, in source Y, for one pixel in output Y (may be negative)
+ * @param inShape - shape of the input (x, y[, z]) order
+ * @param numChannels - the NumChannels specialization describing the number of interleaved
+ *                      channels in the input and output sample.
+ */
+template<int kSpatialNDim, typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ ResampleVert(const PassOutWrap outWrap, const PassInWrap inWrap,
+                                             const VecI<kSpatialNDim> lo, const VecI<kSpatialNDim> hi, float srcY0,
+                                             const float scale, const VecI<kSpatialNDim> inShape,
+                                             const NumChannelsT numChannels)
+{
+    srcY0 += 0.5f * scale - 0.5f;
+    ForAllPixelsVert(lo, hi,
+                     [=](const VecI<kSpatialNDim> outIdx)
+                     {
+                         const float sy0f = outIdx.y * scale + srcY0;
+                         const int   sy0i = cuda::round<cuda::RoundMode::DOWN, int>(sy0f);
+                         const float q    = sy0f - sy0i;
+                         const int   sy0  = cuda::clamp(sy0i, 0, inShape.y - 1);
+                         const int   sy1  = cuda::clamp(sy0i + 1, 0, inShape.y - 1);
+
+                         VecI<kSpatialNDim> inIdx0 = outIdx;
+                         VecI<kSpatialNDim> inIdx1 = outIdx;
+                         inIdx0.y                  = sy0;
+                         inIdx1.y                  = sy1;
+
+                         Linear<kSpatialNDim>(outWrap, inWrap, numChannels, inIdx0, inIdx1, q, outIdx);
+                     });
+}
+
+/**
+ * @brief Implements depthwise resampling
+ *
+ * @param outWrap - the wrapper for accessing output data
+ * @param inWrap - the wrapper for accessing input data
+ * @param lo - inclusive lower bound output coordinates of the block processed by the threadblock
+ * @param hi - exclusive upper bound output coordinates of the block processed by the threadblock
+ * @param srcZ0 - Z coordinate in the source image corresponding to output's 0
+ * @param scale - step, in source Z, for one pixel in output Z (may be negative)
+ * @param inShape - shape of the input (x, y[, z]) order
+ * @param numChannels - the NumChannels specialization describing the number of interleaved
+ *                      channels in the input and output sample.
+ */
+template<typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ ResampleDepth(const PassOutWrap outWrap, const PassInWrap inWrap, const VecI<3> lo,
+                                              const VecI<3> hi, float srcZ0, const float scale, const VecI<3> inShape,
+                                              const NumChannelsT numChannels)
+{
+    srcZ0 += 0.5f * scale - 0.5f;
+    // threadIdx.y is used to traverse Z axis
+    for (int z = lo.z + threadIdx.y; z < hi.z; z += blockDim.y)
+    {
+        const float sz0f = z * scale + srcZ0;
+        const int   sz0i = cuda::round<cuda::RoundMode::DOWN, int>(sz0f);
+        const float q    = sz0f - sz0i;
+        const int   sz0  = cuda::clamp(sz0i, 0, inShape.z - 1);
+        const int   sz1  = cuda::clamp(sz0i + 1, 0, inShape.z - 1);
+
+        for (int y = lo.y + threadIdx.z; y < hi.y; y += blockDim.z)
+        {
+            for (int x = lo.x + threadIdx.x; x < hi.x; x += blockDim.x)
+            {
+                VecI<3> inIdx0{x, y, sz0};
+                VecI<3> inIdx1{x, y, sz1};
+                VecI<3> outIdx{x, y, z};
+                Linear<3>(outWrap, inWrap, numChannels, inIdx0, inIdx1, q, outIdx);
+            }
+        }
+    }
+}
+
+} // namespace linear
+
+namespace filter_support {
+
+constexpr int kMaxGPUFilterSupport = 8192;
+
+bool __forceinline__ __host__ __device__ CanComputeCoefPerThread(const int support, const int resamplingAxisBlockSize)
+{
+    return support * resamplingAxisBlockSize <= kMaxGPUFilterSupport;
+}
+
+inline int RequiredSharedMemoryElements(const int support, const int resamplingAxisBlockSize)
+{
+    if (CanComputeCoefPerThread(support, resamplingAxisBlockSize))
+    {
+        return support * resamplingAxisBlockSize;
+    }
+    else
+    {
+        return support;
+    }
+}
+
+template<typename ProcessPixel>
+void __forceinline__ __device__ ForAllOrthogonalToHorz(const VecI<2> lo, const VecI<2> hi, ProcessPixel &&processPixel)
+{
+    for (int y = threadIdx.y + lo.y; y < hi.y; y += blockDim.y)
+    {
+        processPixel(VecI<2>{0, y});
+    }
+}
+
+template<typename ProcessPixel>
+void __forceinline__ __device__ ForAllOrthogonalToHorz(const VecI<3> lo, const VecI<3> hi, ProcessPixel &&processPixel)
+{
+    for (int z = threadIdx.z + lo.z; z < hi.z; z += blockDim.z)
+    {
+        for (int y = threadIdx.y + lo.y; y < hi.y; y += blockDim.y)
+        {
+            processPixel(VecI<3>{0, y, z});
+        }
+    }
+}
+
+template<typename ProcessPixel>
+void __forceinline__ __device__ ForAllOrthogonalToVert(const VecI<2> lo, const VecI<2> hi, ProcessPixel &&processPixel)
+{
+    for (int x = threadIdx.x + lo.x; x < hi.x; x += blockDim.x)
+    {
+        processPixel(VecI<2>{x, 0});
+    }
+}
+
+template<typename ProcessPixel>
+void __forceinline__ __device__ ForAllOrthogonalToVert(const VecI<3> lo, const VecI<3> hi, ProcessPixel &&processPixel)
+{
+    for (int z = threadIdx.z + lo.z; z < hi.z; z += blockDim.z)
+    {
+        for (int x = threadIdx.x + lo.x; x < hi.x; x += blockDim.x)
+        {
+            processPixel(VecI<3>{x, 0, z});
+        }
+    }
+}
+
+/**
+ * @brief Implements horizontal resampling
+ *
+ * @param outWrap - the wrapper for accessing output data
+ * @param inWrap - the wrapper for accessing input data
+ * @param lo - inclusive lower bound output coordinates of the block processed by the threadblock
+ * @param hi - exclusive upper bound output coordinates of the block processed by the threadblock
+ * @param srcX0 - X coordinate in the source image corresponding to output's 0
+ * @param scale - step, in source X, for one pixel in output X (may be negative)
+ * @param support - size of the resampling kernel, in source pixels
+ * @param numChannels - the NumChannels specialization describing the number of interleaved
+ *                      channels in the input and output sample.
+ *
+ * The function fills the output in block-sized vertical spans.
+ * Block horizontal size is warp-aligned.
+ * Filter coefficients are pre-calculated for each vertical span to avoid
+ * recalculating them for each row, and stored in a shared memory block.
+ *
+ * The function follows different code paths for static and dynamic number of channels.
+ * For the dynamic, the innermost loop goes over filter taps, which eliminates the need
+ * for thread-local memory to store intermediate sums. This allows processing arbitrary
+ * number of channels.
+ * For static number of channels, the run-time parameter `channels` is ignored and
+ * there's also a local temporary storage for a tap sum for each channel. This is faster,
+ * but requires extra registers for the intermediate sums.
+ */
+template<int kSpatialNDim, typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ ResampleHorz(const PassOutWrap outWrap, const PassInWrap inWrap,
+                                             const VecI<kSpatialNDim> lo, const VecI<kSpatialNDim> hi, float srcX0,
+                                             const float scale, const VecI<kSpatialNDim> inShape,
+                                             const filter::ResamplingFilter filter, const NumChannelsT numChannels)
+{
+    extern __shared__ float coeffs[];
+
+    using OutT = typename PassOutWrap::ValueType;
+    using InT  = std::remove_const_t<typename PassInWrap::ValueType>;
+    // spatial extents and optional channels extent
+    constexpr int kNDim = kSpatialNDim + !NumChannelsT::kHasStaticChannels;
+
+    static_assert(!NumChannelsT::kHasStaticChannels || NumChannelsT::kStaticChannels == cuda::NumElements<InT>);
+    static_assert(cuda::NumElements<OutT> == cuda::NumElements<InT>);
+    static_assert(PassOutWrap::kNumDimensions == kNDim);
+    static_assert(PassInWrap::kNumDimensions == kNDim);
+
+    const int   support    = filter.support();
+    const float filterStep = filter.scale;
+    // If the support is small enough (for blockDim.x = 32 and kMaxGPUFilterSupport = 8192, it's 256),
+    // we can fit `support` x `blockDim.x` elements into shm, so that for each output_x mapped to input_x,
+    // we take into account the exact error that comes from rounding the input_x from float to integer.
+    // For larger supports, we just compute `support` elements common for all threads.
+    const bool  hugeSupport = !CanComputeCoefPerThread(support, blockDim.x);
+    const int   coeffBase   = hugeSupport ? 0 : threadIdx.x;
+    const int   coeffStride = hugeSupport ? 1 : blockDim.x;
+
+    srcX0 += 0.5f * scale - 0.5f - filter.anchor;
+
+    for (int j = lo.x; j < hi.x; j += blockDim.x)
+    {
+        const int   x    = j + threadIdx.x;
+        const float sx0f = x * scale + srcX0;
+        const int   sx0  = hugeSupport ? cuda::round<cuda::RoundMode::NEAREST, int>(sx0f)
+                                       : cuda::round<cuda::RoundMode::UP, int>(sx0f);
+        const float f    = (sx0 - sx0f) * filterStep;
+        __syncthreads();
+        if (hugeSupport)
+        {
+            for (int k = threadIdx.x + blockDim.x * threadIdx.y; k < support; k += blockDim.x * blockDim.y)
+            {
+                float flt = filter(f + k * filterStep);
+                coeffs[k] = flt;
+            }
+        }
+        else
+        {
+            for (int k = threadIdx.y; k < support; k += blockDim.y)
+            {
+                float flt                           = filter(f + k * filterStep);
+                coeffs[coeffBase + coeffStride * k] = flt;
+            }
+        }
+        __syncthreads();
+
+        if (x >= hi.x)
+            continue;
+
+        float norm = 0;
+        for (int k = 0; k < support; k++)
+        {
+            norm += coeffs[coeffBase + coeffStride * k];
+        }
+        norm = 1.0f / norm;
+
+        ForAllOrthogonalToHorz(
+            lo, hi,
+            [=](VecI<kSpatialNDim> outIdx)
+            {
+                VecI<kSpatialNDim> inIdx = outIdx;
+                outIdx.x                 = x;
+
+                if constexpr (NumChannelsT::kHasStaticChannels)
+                {
+                    using FloatT = cuda::ConvertBaseTypeTo<float, InT>;
+                    FloatT tmp{};
+
+                    for (int k = 0, coeffIdx = coeffBase; k < support; k++, coeffIdx += coeffStride)
+                    {
+                        inIdx.x         = cuda::clamp(sx0 + k, 0, inShape.x - 1);
+                        const float flt = coeffs[coeffIdx];
+                        const InT   px  = LoadPixelLdg(inWrap, numChannels, inIdx);
+#pragma unroll
+                        for (int c = 0; c < NumChannelsT::kStaticChannels; c++)
+                        {
+                            cuda::GetElement(tmp, c) = fmaf(cuda::GetElement(px, c), flt, cuda::GetElement(tmp, c));
+                        }
+                    }
+
+                    OutT &out = *GetWrapPtr(outWrap, outIdx);
+                    out       = cuda::SaturateCast<OutT>(tmp * norm);
+                }
+                else if constexpr (!NumChannelsT::kHasStaticChannels)
+                {
+                    for (int c = 0; c < numChannels(); c++)
+                    {
+                        float tmp = 0;
+
+                        for (int k = 0, coeffIdx = coeffBase; k < support; k++, coeffIdx += coeffStride)
+                        {
+                            inIdx.x         = cuda::clamp(sx0 + k, 0, inShape.x - 1);
+                            const float flt = coeffs[coeffIdx];
+                            const InT   px  = LoadPixelLdg(inWrap, numChannels, inIdx, c);
+                            tmp             = fmaf(px, flt, tmp);
+                        }
+
+                        OutT &out = *GetWrapPtr(outWrap, outIdx, c);
+                        out       = cuda::SaturateCast<OutT>(tmp * norm);
+                    }
+                }
+            });
+    }
+}
+
+/**
+ * @brief Implements vertical resampling
+ *
+ * @param outWrap - the wrapper for accessing output data
+ * @param inWrap - the wrapper for accessing input data
+ * @param lo - inclusive lower bound output coordinates of the block processed by the threadblock
+ * @param hi - exclusive upper bound output coordinates of the block processed by the threadblock
+ * @param srcY0 - Y coordinate in the source image corresponding to output's 0
+ * @param scale - step, in source Y, for one pixel in output Y (may be negative)
+ * @param support - size of the resampling kernel, in source pixels
+ * @param numChannels - the NumChannels specialization describing the number of interleaved
+ *                      channels in the input and output sample.
+ *
+ * The function fills the output in block-sized horizontal spans.
+ * Filter coefficients are pre-calculated for each horizontal span to avoid
+ * recalculating them for each column, and stored in a shared memory block.
+ */
+template<int kSpatialNDim, typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ ResampleVert(const PassOutWrap outWrap, const PassInWrap inWrap,
+                                             const VecI<kSpatialNDim> lo, const VecI<kSpatialNDim> hi, float srcY0,
+                                             const float scale, const VecI<kSpatialNDim> inShape,
+                                             const filter::ResamplingFilter filter, const NumChannelsT numChannels)
+{
+    extern __shared__ float coeffs[];
+
+    using OutT = typename PassOutWrap::ValueType;
+    using InT  = std::remove_const_t<typename PassInWrap::ValueType>;
+    // spatial extents and optional channels extent
+    constexpr int kNDim = kSpatialNDim + !NumChannelsT::kHasStaticChannels;
+
+    static_assert(!NumChannelsT::kHasStaticChannels || NumChannelsT::kStaticChannels == cuda::NumElements<InT>);
+    static_assert(cuda::NumElements<OutT> == cuda::NumElements<InT>);
+    static_assert(PassOutWrap::kNumDimensions == kNDim);
+    static_assert(PassInWrap::kNumDimensions == kNDim);
+
+    const int   support    = filter.support();
+    const float filterStep = filter.scale;
+    // If the support is small enough, we can fit `blockDim.y` x `support` elements into shm, so that
+    // for each output_y mapped to input_y, we take into account the exact error that comes from
+    // rounding the input_y from float to integer. For larger supports, we just compute `support`
+    // elements common for all threads.
+    const bool  hugeSupport = !CanComputeCoefPerThread(support, blockDim.y);
+    const int   coeffBase   = hugeSupport ? 0 : support * threadIdx.y;
+
+    srcY0 += 0.5f * scale - 0.5f - filter.anchor;
+
+    for (int i = lo.y; i < hi.y; i += blockDim.y)
+    {
+        const int   y    = i + threadIdx.y;
+        const float sy0f = y * scale + srcY0;
+        const int   sy0  = hugeSupport ? cuda::round<cuda::RoundMode::NEAREST, int>(sy0f)
+                                       : cuda::round<cuda::RoundMode::UP, int>(sy0f);
+        float       f    = (sy0 - sy0f) * filterStep;
+        __syncthreads();
+        // fills `support`
+        if (hugeSupport)
+        {
+            for (int k = threadIdx.x + blockDim.x * threadIdx.y; k < support; k += blockDim.x * blockDim.y)
+            {
+                float flt = filter(f + k * filterStep);
+                coeffs[k] = flt;
+            }
+        }
+        else
+        {
+            for (int k = threadIdx.x; k < support; k += blockDim.x)
+            {
+                float flt             = filter(f + k * filterStep);
+                coeffs[coeffBase + k] = flt;
+            }
+        }
+        __syncthreads();
+
+        if (y >= hi.y)
+            continue;
+
+        float norm = 0;
+        for (int k = 0; k < support; k++)
+        {
+            norm += coeffs[coeffBase + k];
+        }
+        norm = 1.0f / norm;
+
+        ForAllOrthogonalToVert(lo, hi,
+                               [=](VecI<kSpatialNDim> outIdx)
+                               {
+                                   VecI<kSpatialNDim> inIdx = outIdx;
+                                   outIdx.y                 = y;
+
+                                   if constexpr (NumChannelsT::kHasStaticChannels)
+                                   {
+                                       using FloatT = cuda::ConvertBaseTypeTo<float, InT>;
+                                       FloatT tmp{};
+
+                                       for (int k = 0; k < support; k++)
+                                       {
+                                           inIdx.y         = cuda::clamp(sy0 + k, 0, inShape.y - 1);
+                                           const float flt = coeffs[coeffBase + k];
+                                           const InT   px  = LoadPixelLdg(inWrap, numChannels, inIdx);
+#pragma unroll
+                                           for (int c = 0; c < NumChannelsT::kStaticChannels; c++)
+                                           {
+                                               cuda::GetElement(tmp, c)
+                                                   = fmaf(cuda::GetElement(px, c), flt, cuda::GetElement(tmp, c));
+                                           }
+                                       }
+
+                                       OutT &out = *GetWrapPtr(outWrap, outIdx);
+                                       out       = cuda::SaturateCast<OutT>(tmp * norm);
+                                   }
+                                   else if constexpr (!NumChannelsT::kHasStaticChannels)
+                                   {
+                                       for (int c = 0; c < numChannels(); c++)
+                                       {
+                                           float tmp = 0;
+
+                                           for (int k = 0; k < support; k++)
+                                           {
+                                               inIdx.y         = cuda::clamp(sy0 + k, 0, inShape.y - 1);
+                                               const float flt = coeffs[coeffBase + k];
+                                               const InT   px  = LoadPixelLdg(inWrap, numChannels, inIdx, c);
+                                               tmp             = fmaf(px, flt, tmp);
+                                           }
+
+                                           OutT &out = *GetWrapPtr(outWrap, outIdx, c);
+                                           out       = cuda::SaturateCast<OutT>(tmp * norm);
+                                       }
+                                   }
+                               });
+    }
+}
+
+/**
+ * @brief Implements depth resampling
+ *
+ * @param outWrap - the wrapper for accessing output data
+ * @param inWrap - the wrapper for accessing input data
+ * @param lo - inclusive lower bound output coordinates of the block processed by the threadblock
+ * @param hi - exclusive upper bound output coordinates of the block processed by the threadblock
+ * @param srcZ0 - Y coordinate in the source image corresponding to output's 0
+ * @param scale - step, in source Y, for one pixel in output Y (may be negative)
+ * @param support - size of the resampling kernel, in source pixels
+ * @param numChannels - the NumChannels specialization describing the number of interleaved
+ *                      channels in the input and output sample.
+ *
+ * The function fills the output in block-sized horizontal spans.
+ * Filter coefficients are pre-calculated for each horizontal span to avoid
+ * recalculating them for each column, and stored in a shared memory block.
+ */
+template<typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ ResampleDepth(const PassOutWrap outWrap, const PassInWrap inWrap, const VecI<3> lo,
+                                              const VecI<3> hi, float srcZ0, const float scale, const VecI<3> inShape,
+                                              const filter::ResamplingFilter filter, const NumChannelsT numChannels)
+{
+    extern __shared__ float coeffs[];
+
+    using OutT = typename PassOutWrap::ValueType;
+    using InT  = std::remove_const_t<typename PassInWrap::ValueType>;
+    // spatial extents and optional channels extent
+    constexpr int kNDim = 3 + !NumChannelsT::kHasStaticChannels;
+
+    static_assert(!NumChannelsT::kHasStaticChannels || NumChannelsT::kStaticChannels == cuda::NumElements<InT>);
+    static_assert(cuda::NumElements<OutT> == cuda::NumElements<InT>);
+    static_assert(PassOutWrap::kNumDimensions == kNDim);
+    static_assert(PassInWrap::kNumDimensions == kNDim);
+
+    const int   support    = filter.support();
+    const float filterStep = filter.scale;
+    // If the support is small enough, we can fit `blockDim.y` x `support` elements into shm,
+    // so that for each output_z mapped to input_z, we take into account the exact error that
+    // comes from rounding the input_z from float to integer. For larger supports, we just
+    // compute `support` elements common for all threads.
+    const bool  hugeSupport = !CanComputeCoefPerThread(support, blockDim.y);
+    const int   coeffBase   = hugeSupport ? 0 : support * threadIdx.y;
+
+    srcZ0 += 0.5f * scale - 0.5f - filter.anchor;
+
+    for (int i = lo.z; i < hi.z; i += blockDim.y)
+    {
+        // threadIdx.y is used to traverse Z axis
+        const int   z    = i + threadIdx.y;
+        const float sz0f = z * scale + srcZ0;
+        const int   sz0  = hugeSupport ? cuda::round<cuda::RoundMode::NEAREST, int>(sz0f)
+                                       : cuda::round<cuda::RoundMode::UP, int>(sz0f);
+        float       f    = (sz0 - sz0f) * filterStep;
+        __syncthreads();
+        if (hugeSupport)
+        {
+            for (int k = threadIdx.x + blockDim.x * threadIdx.y; k < support; k += blockDim.x * blockDim.y)
+            {
+                float flt = filter(f + k * filterStep);
+                coeffs[k] = flt;
+            }
+        }
+        else
+        {
+            for (int k = threadIdx.x; k < support; k += blockDim.x)
+            {
+                float flt             = filter(f + k * filterStep);
+                coeffs[coeffBase + k] = flt;
+            }
+        }
+        __syncthreads();
+
+        if (z >= hi.z)
+            continue;
+
+        float norm = 0;
+        for (int k = 0; k < support; k++)
+        {
+            norm += coeffs[coeffBase + k];
+        }
+        norm = 1.0f / norm;
+
+        for (int y = threadIdx.z + lo.y; y < hi.y; y += blockDim.z)
+        {
+            for (int x = threadIdx.x + lo.x; x < hi.x; x += blockDim.x)
+            {
+                const VecI<3> outIdx{x, y, z};
+                VecI<3>       inIdx = outIdx;
+
+                if constexpr (NumChannelsT::kHasStaticChannels)
+                {
+                    using FloatT = cuda::ConvertBaseTypeTo<float, InT>;
+                    FloatT tmp{};
+
+                    for (int k = 0; k < support; k++)
+                    {
+                        inIdx.z         = cuda::clamp(sz0 + k, 0, inShape.z - 1);
+                        const float flt = coeffs[coeffBase + k];
+                        const InT   px  = LoadPixelLdg(inWrap, numChannels, inIdx);
+#pragma unroll
+                        for (int c = 0; c < NumChannelsT::kStaticChannels; c++)
+                        {
+                            cuda::GetElement(tmp, c) = fmaf(cuda::GetElement(px, c), flt, cuda::GetElement(tmp, c));
+                        }
+                    }
+
+                    OutT &out = *GetWrapPtr(outWrap, outIdx);
+                    out       = cuda::SaturateCast<OutT>(tmp * norm);
+                }
+                else if constexpr (!NumChannelsT::kHasStaticChannels)
+                {
+                    for (int c = 0; c < numChannels(); c++)
+                    {
+                        float tmp = 0;
+
+                        for (int k = 0; k < support; k++)
+                        {
+                            inIdx.z         = cuda::clamp(sz0 + k, 0, inShape.z - 1);
+                            const float flt = coeffs[coeffBase + k];
+                            const InT   px  = LoadPixelLdg(inWrap, numChannels, inIdx, c);
+                            tmp             = fmaf(px, flt, tmp);
+                        }
+
+                        OutT &out = *GetWrapPtr(outWrap, outIdx, c);
+                        out       = cuda::SaturateCast<OutT>(tmp * norm);
+                    }
+                }
+            }
+        }
+    }
+}
+} // namespace filter_support
+
+template<int kSpatialNDim, typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ RunNN(const PassOutWrap outWrap, const PassInWrap inWrap, const VecI<kSpatialNDim> lo,
+                                      const VecI<kSpatialNDim> hi, int axis, const VecI<kSpatialNDim> inShape,
+                                      const float origin, const float scale, const NumChannelsT numChannels)
+{
+    auto originV                    = cuda::SetAll<VecF<kSpatialNDim>>(0.f);
+    auto scaleV                     = cuda::SetAll<VecF<kSpatialNDim>>(1.f);
+    cuda::GetElement(originV, axis) = origin;
+    cuda::GetElement(scaleV, axis)  = scale;
+    nn::Resample<kSpatialNDim>(outWrap, inWrap, lo, hi, originV, scaleV, inShape, numChannels);
+}
+
+template<int kSpatialNDim, typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ RunLinear(const PassOutWrap outWrap, const PassInWrap inWrap,
+                                          const VecI<kSpatialNDim> lo, const VecI<kSpatialNDim> hi, int axis,
+                                          const VecI<kSpatialNDim> inShape, const float origin, const float scale,
+                                          const NumChannelsT numChannels)
+{
+    if (axis == 0)
+    {
+        linear::ResampleHorz<kSpatialNDim>(outWrap, inWrap, lo, hi, origin, scale, inShape, numChannels);
+    }
+    else if (axis == 1)
+    {
+        linear::ResampleVert<kSpatialNDim>(outWrap, inWrap, lo, hi, origin, scale, inShape, numChannels);
+    }
+    else if (axis == 2)
+    {
+        if constexpr (kSpatialNDim == 3)
+        {
+            linear::ResampleDepth(outWrap, inWrap, lo, hi, origin, scale, inShape, numChannels);
+        }
+    }
+}
+
+template<int kSpatialNDim, typename PassOutWrap, typename PassInWrap, typename NumChannelsT>
+void __forceinline__ __device__ RunFilter(const PassOutWrap outWrap, const PassInWrap inWrap,
+                                          const VecI<kSpatialNDim> lo, const VecI<kSpatialNDim> hi, int axis,
+                                          const VecI<kSpatialNDim> inShape, const float origin, const float scale,
+                                          const filter::ResamplingFilter filter, const NumChannelsT numChannels)
+{
+    if (axis == 0)
+    {
+        filter_support::ResampleHorz<kSpatialNDim>(outWrap, inWrap, lo, hi, origin, scale, inShape, filter,
+                                                   numChannels);
+    }
+    else if (axis == 1)
+    {
+        filter_support::ResampleVert<kSpatialNDim>(outWrap, inWrap, lo, hi, origin, scale, inShape, filter,
+                                                   numChannels);
+    }
+    else if (axis == 2)
+    {
+        if constexpr (kSpatialNDim == 3)
+        {
+            filter_support::ResampleDepth(outWrap, inWrap, lo, hi, origin, scale, inShape, filter, numChannels);
+        }
+    }
+}
+} // namespace interpolate
+
+template<int kWhichPass, typename PassOutWrap, typename PassInWrap, int kSpatialNDim, typename NumChannelsT>
+void __forceinline__ __device__ RunResamplingPass(const SampleDesc<kSpatialNDim> sampleDesc, const PassOutWrap outWrap,
+                                                  const PassInWrap inWrap, const VecI<kSpatialNDim> lo,
+                                                  const VecI<kSpatialNDim> hi, const NumChannelsT numChannels)
+{
+    VecI<kSpatialNDim> inShape = sampleDesc.shapes[kWhichPass];
+    int         axis   = cuda::GetElement(sampleDesc.processingOrder, kWhichPass); // vec-order: 0 = X, 1 = Y, 2 = Z
+    const float origin = cuda::GetElement(sampleDesc.origin, kWhichPass);
+    const float scale  = cuda::GetElement(sampleDesc.scale, kWhichPass);
+
+    switch (sampleDesc.filterKind[kWhichPass])
+    {
+    case filter::FilterTypeKind::Nearest:
+        interpolate::RunNN<kSpatialNDim>(outWrap, inWrap, lo, hi, axis, inShape, origin, scale, numChannels);
+        break;
+    case filter::FilterTypeKind::Linear:
+        interpolate::RunLinear<kSpatialNDim>(outWrap, inWrap, lo, hi, axis, inShape, origin, scale, numChannels);
+        break;
+    default:
+        interpolate::RunFilter<kSpatialNDim>(outWrap, inWrap, lo, hi, axis, inShape, origin, scale,
+                                             sampleDesc.filter[kWhichPass], numChannels);
+        break;
+    }
+}
+
+// Tensor variant (unfirom batch)
+template<int kNumStaticChannels, int kWhichPass, typename PassOutWrap, typename PassInWrap, int kSpatialNDim>
+__global__ void SeparableResamplingKernel(const SampleDesc<kSpatialNDim> sampleDesc, const PassOutWrap outWrap,
+                                          const PassInWrap inWrap, const GridHelperDevice<kSpatialNDim> gridHelper)
+
+{
+    constexpr bool kHasDynamicChannels = kNumStaticChannels == -1;
+    static_assert(PassInWrap::kNumDimensions == 1 + kSpatialNDim + kHasDynamicChannels);
+    static_assert(PassOutWrap::kNumDimensions == 1 + kSpatialNDim + kHasDynamicChannels);
+    // Get sample idx and the region of the output image that
+    // the current threadblock has to process
+    int                sampleIdx = gridHelper.CurrentSample();
+    VecI<kSpatialNDim> lo, hi;
+    gridHelper.CurrentBlock(lo, hi, sampleDesc.blockShape[kWhichPass]);
+    hi = cuda::min(hi, sampleDesc.shapes[kWhichPass + 1]);
+
+    const auto outSampleView = batch_wrapper::tensor::GetSampleView(outWrap, sampleIdx);
+    const auto inSampleView  = batch_wrapper::tensor::GetSampleView(inWrap, sampleIdx);
+    WithChannels<kNumStaticChannels>(
+        sampleDesc.channels, [=](const NumChannels<kNumStaticChannels> numChannels)
+        { RunResamplingPass<kWhichPass>(sampleDesc, outSampleView, inSampleView, lo, hi, numChannels); });
+}
+
+// Batch variant (ImageBatchVarShape, TensorBatch)
+template<int kNumStaticChannels, int kWhichPass, typename PassOutWrap, typename PassInWrap, int kSpatialNDim>
+__global__ void SeparableResamplingKernel(const SampleDesc<kSpatialNDim> *__restrict__ samples,
+                                          const PassOutWrap outWrap, const PassInWrap inWrap,
+                                          const GridHelperDevice<kSpatialNDim> gridHelper)
+{
+    constexpr bool kHasDynamicChannels = kNumStaticChannels == -1;
+    static_assert(PassInWrap::kNumDimensions == 1 + kSpatialNDim + kHasDynamicChannels);
+    static_assert(PassOutWrap::kNumDimensions == 1 + kSpatialNDim + kHasDynamicChannels);
+    // Get sample idx and the region of the output image that
+    // the current threadblock has to process
+    const int                sampleIdx  = gridHelper.CurrentSample();
+    const auto               sampleDesc = samples[sampleIdx];
+    const VecI<kSpatialNDim> outShape   = sampleDesc.shapes[kWhichPass + 1];
+    VecI<kSpatialNDim>       lo, hi;
+    gridHelper.CurrentBlock(lo, hi, sampleDesc.blockShape[kWhichPass]);
+
+    // exit early for smaller samples
+    if (lo.x >= outShape.x || lo.y >= outShape.y)
+    {
+        return;
+    }
+    if constexpr (kSpatialNDim == 3)
+    {
+        if (lo.z >= outShape.z)
+        {
+            return;
+        }
+    }
+    hi = cuda::min(hi, outShape);
+
+    const auto outSampleView = outWrap.GetSampleView(sampleIdx);
+    WithChannels<kNumStaticChannels>(
+        sampleDesc.channels,
+        [=](const NumChannels<kNumStaticChannels> numChannels)
+        {
+            if constexpr (kWhichPass == 0)
+            {
+                const auto inSampleView = inWrap.GetSampleView(sampleIdx, sampleDesc.inRoiOffset);
+                RunResamplingPass<kWhichPass>(sampleDesc, outSampleView, inSampleView, lo, hi, numChannels);
+            }
+            else if constexpr (kWhichPass != 0)
+            {
+                const auto inSampleView = inWrap.GetSampleView(sampleIdx);
+                RunResamplingPass<kWhichPass>(sampleDesc, outSampleView, inSampleView, lo, hi, numChannels);
+            }
+        });
+}
+
+} // namespace resampling
+
+namespace validate {
+inline auto srcDst(const nvcv::Tensor &src, const nvcv::Tensor &dst)
+{
+    auto srcData = src.exportData<nvcv::TensorDataStridedCuda>();
+    auto dstData = dst.exportData<nvcv::TensorDataStridedCuda>();
+
+    if (!srcData)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Input must be cuda-accessible tensor");
+    }
+
+    if (!dstData)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Output must be cuda-accessible tensor");
+    }
+
+    using maybeTensorAccess = nvcv::Optional<nvcv::TensorDataAccessStridedImagePlanar>;
+    std::tuple<maybeTensorAccess, maybeTensorAccess, int, int, nvcv::DataType, nvcv::DataType> ret;
+
+    auto &[srcAccess, dstAccess, numSamples, numChannels, srcDtype, dstDtype] = ret;
+
+    srcDtype = srcData->dtype();
+    dstDtype = dstData->dtype();
+
+    srcAccess = nvcv::TensorDataAccessStridedImagePlanar::Create(*srcData);
+    dstAccess = nvcv::TensorDataAccessStridedImagePlanar::Create(*dstData);
+    NVCV_ASSERT(srcAccess && dstAccess);
+
+    numSamples = srcAccess->numSamples();
+    if (numSamples != dstAccess->numSamples())
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of samples");
+    }
+
+    if (srcDtype.numChannels() > 1 || dstDtype.numChannels() > 1)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                              "The tensor channels should be explicit part of the shape, not tensor type");
+    }
+
+    numChannels = srcAccess->numChannels();
+    if (numChannels != dstAccess->numChannels())
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of channels");
+    }
+
+    if (numChannels <= 0)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Number of channels must be positive");
+    }
+
+    auto numPlanes = srcAccess->numPlanes();
+    if (numPlanes != dstAccess->numPlanes())
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of planes");
+    }
+
+    if (numPlanes > 1)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Plannar images are not supported");
+    }
+
+    if (srcData->layout() != dstData->layout())
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Input and output must have the same layout");
+    }
+
+    return ret;
+}
+
+inline void srcDst(int &numSamples, int &uniqueNumChannels, nvcv::DataType &srcDtype, nvcv::DataType &dstDtype,
+                   const nvcv::ImageBatchVarShape &src, const nvcv::ImageBatchVarShape &dst)
+{
+    numSamples = src.numImages();
+    if (numSamples != dst.numImages())
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of samples");
+    }
+
+    const auto &srcFormat = src.uniqueFormat();
+    const auto &dstFormat = dst.uniqueFormat();
+
+    if (!srcFormat || !dstFormat)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                              "All images in a batch must have the same format (including number of channels)");
+    }
+
+    auto numPlanes = srcFormat.numPlanes();
+    if (numPlanes != dstFormat.numPlanes())
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of planes");
+    }
+
+    if (numPlanes > 1)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Plannar images are not supported");
+    }
+
+    srcDtype = srcFormat.planeDataType(0);
+    dstDtype = dstFormat.planeDataType(0);
+
+    uniqueNumChannels = srcFormat.numChannels();
+    if (uniqueNumChannels != dstFormat.numChannels())
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of channels");
+    }
+}
+
+inline void srcDst(int &numSamples, int &uniqueNumChannels, nvcv::DataType &srcDtype, nvcv::DataType &dstDtype,
+                   const nvcv::TensorBatch &src, const nvcv::TensorBatch &dst)
+{
+    numSamples = src.numTensors();
+    if (numSamples != dst.numTensors())
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of samples");
+    }
+
+    uniqueNumChannels = -1;
+    srcDtype          = src.dtype();
+    dstDtype          = dst.dtype();
+
+    if (srcDtype.numChannels() > 1 || dstDtype.numChannels() > 1)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                              "The tensor channels should be explicit part of the shape, not tensor type");
+    }
+
+    if (src.layout() != dst.layout())
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output layouts");
+    }
+
+    if (src.layout() != nvcv::TENSOR_HW && src.layout() != nvcv::TENSOR_HWC && src.layout() != nvcv::TENSOR_DHW
+        && src.layout() != nvcv::TENSOR_DHWC)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "The tensor batch must contain [D]HW[C] samples");
+    }
+}
+
+inline void inOutNumberOfChannels(const HQResizeTensorShapeI &inShape, const HQResizeTensorShapeI &outShape)
+{
+    if (inShape.numChannels != outShape.numChannels)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                              "Incompatible input/output number of channels in one of the samples");
+    }
+    if (inShape.numChannels <= 0)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "The number of channels must be positive");
+    }
+}
+
+inline void sameInOutNdim(const HQResizeTensorShapeI &inShape, const HQResizeTensorShapeI &outShape)
+{
+    if (inShape.ndim != outShape.ndim)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                              "Incompatible input/output number extents to resize");
+    }
+}
+
+inline void inOutShapes(int numSamples, const HQResizeTensorShapesI &inShapes, const HQResizeTensorShapesI &outShapes)
+{
+    if (inShapes.ndim != outShapes.ndim)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                              "The dimensionality of input and output shapes does not match");
+    }
+
+    if (numSamples != inShapes.size || numSamples != outShapes.size)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of samples");
+    }
+
+    if (inShapes.ndim != outShapes.ndim)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of extents");
+    }
+
+    if (inShapes.numChannels != outShapes.numChannels)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of channels");
+    }
+
+    if (inShapes.numChannels < 0)
+    {
+        for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+        {
+            inOutNumberOfChannels(inShapes.shape[sampleIdx], outShapes.shape[sampleIdx]);
+        }
+    }
+    else if (inShapes.numChannels == 0)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "The number of channels cannot be 0");
+    }
+}
+
+inline void roiBatch(int numSamples, int ndim, const HQResizeRoisF &rois)
+{
+    auto numRois = rois.size;
+    if (numRois != 0 && numRois != 1 && numRois != numSamples)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                              "The resize ROI list, if specified, must contain a single element to be used across all "
+                              "samples in a batch or its length must match the batch size.");
+    }
+    if (numRois != 0)
+    {
+        if (rois.ndim != ndim)
+        {
+            throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                  "The number of ROI extents does not match the numebr of extents in the input");
+        }
+    }
+}
+} // namespace validate
+
+namespace shape {
+
+template<typename T, int kSpatialNDim>
+struct Roi
+{
+    Vec<T, kSpatialNDim> Size() const
+    {
+        return hi - lo;
+    }
+
+    Vec<T, kSpatialNDim> lo, hi;
+};
+
+inline HQResizeRoiF *SampleRoi(const HQResizeRoisF &rois, int sampleIdx)
+{
+    if (rois.size == 0)
+    {
+        return nullptr;
+    }
+    else if (rois.size == 1)
+    {
+        return rois.roi;
+    }
+    else
+    {
+        return rois.roi + sampleIdx;
+    }
+}
+
+template<int kSpatialNDim>
+inline VecI<kSpatialNDim> TensorShape(const HQResizeTensorShapeI &shape)
+{
+    VecI<kSpatialNDim> shapeVec;
+    for (int d = 0; d < kSpatialNDim; d++)
+    {
+        cuda::GetElement(shapeVec, d) = shape.extent[kSpatialNDim - d - 1];
+    }
+    return shapeVec;
+}
+
+template<int kSpatialNDim>
+inline VecI<kSpatialNDim> SampleShape(const HQResizeTensorShapesI &shapes, int sampleIdx)
+{
+    return TensorShape<kSpatialNDim>(shapes.shape[shapes.size == 1 ? 0 : sampleIdx]);
+}
+
+template<int kSpatialNDim>
+inline VecI<kSpatialNDim> TensorShape(const nvcv::Tensor &tensor)
+{
+    static_assert(kSpatialNDim == 2 || kSpatialNDim == 3);
+    const auto        &shape             = tensor.shape();
+    const auto        &layout            = tensor.layout();
+    char               shapeArgLayout[4] = "WHD";
+    VecI<kSpatialNDim> tensorShape;
+    for (int d = 0; d < kSpatialNDim; d++)
+    {
+        int axis = layout.find(shapeArgLayout[d]);
+        if (axis < 0)
+        {
+            throw std::runtime_error(
+                "The layout of an input tensor to the resize operator must contain HW extents in the layout (for "
+                "images) or DHW extents (for 3D resampling). Some extents are missing in the input tensor.");
+        }
+        cuda::GetElement(tensorShape, d) = shape[axis];
+    }
+    return tensorShape;
+}
+
+template<int kSpatialNDim>
+inline VecI<kSpatialNDim> SampleShape(const nvcv::ImageBatchVarShape &batch, int sampleIdx)
+{
+    static_assert(kSpatialNDim == 2);
+    VecI<kSpatialNDim> sampleShape;
+    const nvcv::Image &image     = batch[sampleIdx];
+    const auto        &imageSize = image.size();
+    sampleShape.x                = imageSize.w;
+    sampleShape.y                = imageSize.h;
+    return sampleShape;
+}
+
+template<int kSpatialNDim>
+inline VecI<kSpatialNDim> SampleShape(const nvcv::TensorBatch &batch, int sampleIdx)
+{
+    return TensorShape<kSpatialNDim>(batch[sampleIdx]);
+}
+
+inline int TensorNumChannels(const nvcv::Tensor &tensor)
+{
+    const auto &shape       = tensor.shape();
+    const auto &layout      = tensor.layout();
+    int         channelAxis = layout.find('C');
+    if (channelAxis < 0)
+    {
+        return 1;
+    }
+    return shape[channelAxis];
+}
+
+inline int SampleNumChannels(const nvcv::TensorBatch &src, const nvcv::TensorBatch &dst, int sampleIdx)
+{
+    const auto &srcSample   = src[sampleIdx];
+    const auto &dstSample   = dst[sampleIdx];
+    int         numChannels = TensorNumChannels(srcSample);
+    if (numChannels != TensorNumChannels(dstSample))
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Incompatible input/output number of channels");
+    }
+    if (numChannels <= 0)
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Number of channels must be positive");
+    }
+    return numChannels;
+}
+} // namespace shape
+
+/**
+ * @brief Calculates optimum processing order based on input/output sizes and filter support.
+ *
+ * The sizes of intermediate storage and time taken to compute the intermediate images
+ * may depend on the order - i.e. if downscaling only one axis, it's beneficial to resample that
+ * axis first, so that intermediate image is smaller.
+ */
+template<int ndim>
+class ProcessingOrderCalculator
+
+{
+public:
+    static constexpr float size_bias = 3;
+
+    ProcessingOrderCalculator(const VecI<ndim> inSize, const VecI<ndim> outSize, const VecI<ndim> filterSupport)
+        : m_inSize(inSize)
+        , m_outSize(outSize)
+        , m_filterSupport(filterSupport)
+    {
+    }
+
+    VecI<ndim> operator()()
+    {
+        for (int i = 0; i < ndim; i++) cuda::GetElement(m_bestOrder, i) = i;
+        m_axisVisited = {};
+        m_currSize    = m_inSize;
+        m_minCost     = 1e+30f;
+        Run(0);
+        return m_bestOrder;
+    }
+
+private:
+    // recursively check every possible order in DFS fashion
+    void Run(int pass, float totalCost = 0)
+    {
+        if (totalCost >= m_minCost)
+            return; // this branch of recursion will not yield a better result - abandon it
+
+        if (pass == ndim)
+        {
+            m_minCost   = totalCost;
+            m_bestOrder = m_currOrder;
+        }
+        else
+        {
+            for (int a = 0; a < ndim; a++)
+            {
+                if (cuda::GetElement(m_axisVisited, a))
+                    continue;
+                cuda::GetElement(m_axisVisited, a)  = true;
+                cuda::GetElement(m_currOrder, pass) = a;
+                auto prevSize                       = cuda::GetElement(m_currSize, a);
+                cuda::GetElement(m_currSize, a)     = cuda::GetElement(m_outSize, a);
+
+                float passCost = PassCost(pass, a);
+                Run(pass + 1, totalCost + passCost);
+
+                cuda::GetElement(m_currSize, a)    = prevSize;
+                cuda::GetElement(m_axisVisited, a) = false;
+            }
+        }
+    }
+
+    float PassCost(int pass, int axis)
+    {
+        // y-axis is likely to be the cheapest
+        float axisCost        = axis == 0 ? 1.4f : axis > 1 ? 1.2f : 1.0f;
+        auto  vol             = utils::Volume(m_currSize);
+        float baseComputeCost = cuda::GetElement(m_filterSupport, axis) * vol;
+        return axisCost * baseComputeCost + vol * size_bias;
+    }
+
+    const VecI<ndim> m_inSize, m_outSize, m_filterSupport;
+    float            m_minCost;
+    VecI<ndim>       m_currSize, m_bestOrder, m_currOrder, m_axisVisited;
+};
+
+template<typename IntermediateBaseT, typename Cb>
+inline void RunTypedSwitch(nvcv::DataType srcDtype, nvcv::DataType dstDtype, int numChannels, const Cb &cb)
+{
+    using uchar = unsigned char;
+
+#define NVCV_RUN_DYNAMIC_CHANNELS_HQ_RESIZE(SRC_TYPE_NAME, DST_TYPE_NAME, SRC_VEC, DST_VEC) \
+    ((srcDtype == nvcv::TYPE_##SRC_TYPE_NAME) && (dstDtype == nvcv::TYPE_##DST_TYPE_NAME))  \
+        cb(SRC_VEC{}, IntermediateBaseT{}, DST_VEC{}, std::integral_constant<int, -1>{})
+
+#define NVCV_RUN_SINGLE_CHANNEL_HQ_RESIZE(NUM_STATIC_CHANNELS, SRC_TYPE_NAME, DST_TYPE_NAME, SRC_VEC, DST_VEC) \
+    ((numChannels == NUM_STATIC_CHANNELS) && (srcDtype == nvcv::TYPE_##SRC_TYPE_NAME)                          \
+     && (dstDtype == nvcv::TYPE_##DST_TYPE_NAME))                                                              \
+        cb(SRC_VEC{}, IntermediateBaseT{}, DST_VEC{}, std::integral_constant<int, NUM_STATIC_CHANNELS>{})
+
+#define NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(NUM_STATIC_CHANNELS, SRC_TYPE_NAME, DST_TYPE_NAME, SRC_VEC, DST_VEC) \
+    ((numChannels == NUM_STATIC_CHANNELS)                                                                            \
+     && (srcDtype == nvcv::TYPE_##SRC_TYPE_NAME || srcDtype == nvcv::TYPE_##NUM_STATIC_CHANNELS##SRC_TYPE_NAME)      \
+     && (dstDtype == nvcv::TYPE_##DST_TYPE_NAME || dstDtype == nvcv::TYPE_##NUM_STATIC_CHANNELS##DST_TYPE_NAME))     \
+        cb(SRC_VEC##NUM_STATIC_CHANNELS{}, Vec<IntermediateBaseT, NUM_STATIC_CHANNELS>{},                            \
+           DST_VEC##NUM_STATIC_CHANNELS{}, std::integral_constant<int, NUM_STATIC_CHANNELS>{})
+
+    // clang-format off
+    if NVCV_RUN_SINGLE_CHANNEL_HQ_RESIZE(1, U8, U8, uchar, uchar);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(2, U8, U8, uchar, uchar);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(3, U8, U8, uchar, uchar);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(4, U8, U8, uchar, uchar);
+    else if NVCV_RUN_DYNAMIC_CHANNELS_HQ_RESIZE(U8, U8, uchar, uchar);
+
+    else if NVCV_RUN_SINGLE_CHANNEL_HQ_RESIZE(1, U8, F32, uchar, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(2, U8, F32, uchar, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(3, U8, F32, uchar, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(4, U8, F32, uchar, float);
+    else if NVCV_RUN_DYNAMIC_CHANNELS_HQ_RESIZE(U8, F32, uchar, float);
+
+    else if NVCV_RUN_SINGLE_CHANNEL_HQ_RESIZE(1, S16, S16, short, short);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(2, S16, S16, short, short);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(3, S16, S16, short, short);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(4, S16, S16, short, short);
+    else if NVCV_RUN_DYNAMIC_CHANNELS_HQ_RESIZE(S16, S16, short, short);
+
+    else if NVCV_RUN_SINGLE_CHANNEL_HQ_RESIZE(1, S16, F32, short, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(2, S16, F32, short, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(3, S16, F32, short, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(4, S16, F32, short, float);
+    else if NVCV_RUN_DYNAMIC_CHANNELS_HQ_RESIZE(S16, F32, short, float);
+
+    else if NVCV_RUN_SINGLE_CHANNEL_HQ_RESIZE(1, U16, U16, ushort, ushort);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(2, U16, U16, ushort, ushort);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(3, U16, U16, ushort, ushort);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(4, U16, U16, ushort, ushort);
+    else if NVCV_RUN_DYNAMIC_CHANNELS_HQ_RESIZE(U16, U16, ushort, ushort);
+
+    else if NVCV_RUN_SINGLE_CHANNEL_HQ_RESIZE(1, U16, F32, ushort, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(2, U16, F32, ushort, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(3, U16, F32, ushort, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(4, U16, F32, ushort, float);
+    else if NVCV_RUN_DYNAMIC_CHANNELS_HQ_RESIZE(U16, F32, ushort, float);
+
+    else if NVCV_RUN_SINGLE_CHANNEL_HQ_RESIZE(1, F32, F32, float, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(2, F32, F32, float, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(3, F32, F32, float, float);
+    else if NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE(4, F32, F32, float, float);
+    else if NVCV_RUN_DYNAMIC_CHANNELS_HQ_RESIZE(F32, F32, float, float);
+    else
+    {
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+            "Unsupported input/output types. The resize operator supports the "
+            "following types: uint8, int16, uint16, and float32. "
+            "The output type must be same as the input type or float.");
+    }
+// clang-format on
+#undef NVCV_RUN_DYNAMIC_CHANNELS_HQ_RESIZE
+#undef NVCV_RUN_SINGLE_CHANNEL_HQ_RESIZE
+#undef NVCV_RUN_MULTI_STATIC_CHANNEL_HQ_RESIZE
+}
+
+template<int _kSpatialNDim, typename IntermediateBaseT = float>
+class HQResizeRun
+{
+public:
+    static_assert(_kSpatialNDim == 2 || _kSpatialNDim == 3,
+                  "Currently, the resampling operator supports only 2 or 3 spatial dimensions");
+
+    HQResizeRun(const filter::ResamplingFiltersFactory &filtersFactory)
+        : m_filtersFactory{filtersFactory}
+    {
+    }
+
+    using SampleDescT = resampling::SampleDesc<_kSpatialNDim>;
+    static_assert(std::is_trivially_copyable_v<SampleDescT>);
+    using DynamicBatchWrapMeta = batch_wrapper::dynamic::DynamicBatchWrapMeta;
+
+    static constexpr VecI<3> kBlockDim    = {32, 8, 1};
+    static constexpr int     kSpatialNDim = _kSpatialNDim;
+    // the number of buffers for intermediate results
+    static constexpr int     kNumTmpBuffers = kSpatialNDim - 1;
+    // use alignment suitable for maximal supported number of static channels
+    static constexpr int     kIntermediateAlignment = alignof(Vec<IntermediateBaseT, 4>);
+
+    // Computes workspace requierements for calling the operator with tensor (uniform batch) input/output
+    cvcuda::WorkspaceRequirements getWorkspaceRequirements(int numSamples, const HQResizeTensorShapeI inputShape,
+                                                           const HQResizeTensorShapeI  outputShape,
+                                                           const NVCVInterpolationType minInterpolation,
+                                                           const NVCVInterpolationType magInterpolation,
+                                                           const bool antialias, const HQResizeRoiF *roi) const
+    {
+        validate::inOutNumberOfChannels(inputShape, outputShape);
+        validate::sameInOutNdim(inputShape, outputShape);
+
+        SampleDescT        sampleDesc;
+        VecI<kSpatialNDim> srcShape    = shape::TensorShape<kSpatialNDim>(inputShape);
+        VecI<kSpatialNDim> dstShape    = shape::TensorShape<kSpatialNDim>(outputShape);
+        int                numChannels = inputShape.numChannels;
+        auto [minFilter, magFilter]    = filter::GetFilterModes(minInterpolation, magInterpolation, antialias);
+        SetupSampleDesc(sampleDesc, srcShape, dstShape, numChannels, roi, minFilter, magFilter);
+
+        cvcuda::WorkspaceEstimator est;
+        for (int t = 0; t < kNumTmpBuffers; t++)
+        {
+            // the vectorized alignment may or may not be needed, depending on the number of channels
+            est.addCuda<IntermediateBaseT>(GetPassOutputVolume(sampleDesc, t) * numSamples, kIntermediateAlignment);
+        }
+
+        cvcuda::WorkspaceRequirements req{};
+        req.hostMem   = est.hostMem.req;
+        req.pinnedMem = est.pinnedMem.req;
+        req.cudaMem   = est.cudaMem.req;
+
+        // The allocator requries the total size of the allocation to be aligned
+        cvcuda::AlignUp(req);
+        return req;
+    }
+
+    // Computes workspace requirements for calling the operator with TensorBatch/ImageBatchVarShape input/output
+    cvcuda::WorkspaceRequirements getWorkspaceRequirements(int numSamples, const HQResizeTensorShapesI inputShapes,
+                                                           const HQResizeTensorShapesI outputShapes,
+                                                           const NVCVInterpolationType minInterpolation,
+                                                           const NVCVInterpolationType magInterpolation,
+                                                           const bool antialias, const HQResizeRoisF rois) const
+    {
+        validate::roiBatch(numSamples, kSpatialNDim, rois);
+        validate::inOutShapes(numSamples, inputShapes, outputShapes);
+        auto [minFilter, magFilter] = filter::GetFilterModes(minInterpolation, magInterpolation, antialias);
+
+        size_t intermediateSizes[kNumTmpBuffers]{};
+        for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+        {
+            const VecI<kSpatialNDim> srcShape  = shape::SampleShape<kSpatialNDim>(inputShapes, sampleIdx);
+            const VecI<kSpatialNDim> dstShape  = shape::SampleShape<kSpatialNDim>(outputShapes, sampleIdx);
+            const HQResizeRoiF      *sampleRoi = shape::SampleRoi(rois, sampleIdx);
+            int                      numChannels
+                = inputShapes.numChannels < 0 ? inputShapes.shape[sampleIdx].numChannels : inputShapes.numChannels;
+
+            SampleDescT sampleDesc;
+            SetupSampleDesc(sampleDesc, srcShape, dstShape, numChannels, sampleRoi, minFilter, magFilter);
+            for (int t = 0; t < kNumTmpBuffers; t++)
+            {
+                intermediateSizes[t] += GetPassOutputVolume(sampleDesc, t);
+            }
+        }
+
+        cvcuda::WorkspaceEstimator est;
+        est.addPinned<SampleDescT>(numSamples);
+        est.addCuda<SampleDescT>(numSamples);
+
+        // reserve space for pointers and strides for intermediate wrappers
+        for (int t = 0; t < kNumTmpBuffers; t++)
+        {
+            batch_wrapper::dynamic::AddDynamicBatchWrapMeta(est, numSamples);
+        }
+        for (int t = 0; t < kNumTmpBuffers; t++)
+        {
+            // the vectorized alignment may or may not be needed, depending on the number of channels
+            est.addCuda<IntermediateBaseT>(intermediateSizes[t], kIntermediateAlignment);
+        }
+
+        cvcuda::WorkspaceRequirements req{};
+        req.hostMem   = est.hostMem.req;
+        req.pinnedMem = est.pinnedMem.req;
+        req.cudaMem   = est.cudaMem.req;
+        // The allocator requries the total size of the allocation to be aligned
+        cvcuda::AlignUp(req);
+
+        return req;
+    }
+
+    // Computes upper bound for workspace requirements, i.e. the workspace that meets the computed requirements
+    // can be passed to the call with any type of input/output as long as there are no more than maxBatchSize
+    // samples that do not exceed the maxShape (in the input nor in the output).
+    cvcuda::WorkspaceRequirements getWorkspaceRequirements(int maxNumSamples, const HQResizeTensorShapeI maxShape) const
+    {
+        validate::inOutNumberOfChannels(maxShape, maxShape);
+
+        cvcuda::WorkspaceEstimator est;
+        est.addPinned<SampleDescT>(maxNumSamples);
+        est.addCuda<SampleDescT>(maxNumSamples);
+
+        // reserve space for pointers and strides for intermediate wrappers
+        for (int t = 0; t < kNumTmpBuffers; t++)
+        {
+            batch_wrapper::dynamic::AddDynamicBatchWrapMeta(est, maxNumSamples);
+        }
+        VecI<kSpatialNDim> shape = shape::TensorShape<kSpatialNDim>(maxShape);
+        for (int t = 0; t < kNumTmpBuffers; t++)
+        {
+            size_t numElements = utils::Volume(shape) * maxNumSamples * maxShape.numChannels;
+            est.addCuda<IntermediateBaseT>(numElements, kIntermediateAlignment);
+        }
+
+        cvcuda::WorkspaceRequirements req{};
+        req.hostMem   = est.hostMem.req;
+        req.pinnedMem = est.pinnedMem.req;
+        req.cudaMem   = est.cudaMem.req;
+        // The allocator requries the total size of the allocation to be aligned
+        cvcuda::AlignUp(req);
+
+        return req;
+    }
+
+    void operator()(cudaStream_t stream, const cvcuda::Workspace &ws, const nvcv::Tensor &src, const nvcv::Tensor &dst,
+                    const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation,
+                    const bool antialias, const HQResizeRoiF *roi) const
+    {
+        auto tensorAccess                                                         = validate::srcDst(src, dst);
+        auto &[srcAccess, dstAccess, numSamples, numChannels, srcDtype, dstDtype] = tensorAccess;
+
+        SampleDescT        sampleDesc;
+        VecI<kSpatialNDim> srcShape       = shape::TensorShape<kSpatialNDim>(src);
+        VecI<kSpatialNDim> dstShape       = shape::TensorShape<kSpatialNDim>(dst);
+        const auto [minFilter, magFilter] = filter::GetFilterModes(minInterpolation, magInterpolation, antialias);
+        SetupSampleDesc(sampleDesc, srcShape, dstShape, numChannels, roi, minFilter, magFilter);
+
+        cvcuda::WorkspaceAllocator allocator(ws);
+        if (ws.cudaMem.ready != nullptr)
+        {
+            NVCV_CHECK_THROW(cudaStreamWaitEvent(stream, ws.cudaMem.ready));
+        }
+        IntermediateBaseT *intermediate[kNumTmpBuffers];
+        // Get intermediate buffers
+        for (int t = 0; t < kNumTmpBuffers; t++)
+        {
+            intermediate[t] = allocator.getCuda<IntermediateBaseT>(GetPassOutputVolume(sampleDesc, t) * numSamples,
+                                                                   kIntermediateAlignment);
+        }
+
+        RunTypedSwitch<IntermediateBaseT>(
+            srcDtype, dstDtype, numChannels,
+            [&](auto dummySrcVal, auto intermediateVal, auto dummyDstVal, auto numChannelsVal)
+            {
+                using InT                       = decltype(dummySrcVal);
+                using IntermediateT             = decltype(intermediateVal);
+                using OutT                      = decltype(dummyDstVal);
+                constexpr int numStaticChannels = decltype(numChannelsVal)::value;
+                static_assert(numStaticChannels == -1 || numStaticChannels == cuda::NumElements<InT>);
+                static_assert(cuda::NumElements<IntermediateT> == cuda::NumElements<InT>);
+                static_assert(cuda::NumElements<OutT> == cuda::NumElements<IntermediateT>);
+
+                auto &[srcAccess, dstAccess, numSamples, numChannels, srcDtype, dstDtype] = tensorAccess;
+                RunPasses<OutT, IntermediateT, InT, numStaticChannels>(sampleDesc, *dstAccess, *srcAccess, intermediate,
+                                                                       numSamples, ws, stream);
+            });
+    }
+
+    template<typename BatchContainer>
+    void operator()(cudaStream_t stream, const cvcuda::Workspace &ws, const BatchContainer &src,
+                    const BatchContainer &dst, const NVCVInterpolationType minInterpolation,
+                    const NVCVInterpolationType magInterpolation, const bool antialias, const HQResizeRoisF rois) const
+    {
+        int            numSamples;
+        int            uniqueNumChannels; // numChannels for ImageBatchVarShape, -1 for TensorBatch
+        nvcv::DataType srcDtype, dstDtype;
+        validate::srcDst(numSamples, uniqueNumChannels, srcDtype, dstDtype, src, dst);
+        validate::roiBatch(numSamples, kSpatialNDim, rois);
+
+        const auto [minFilter, magFilter] = filter::GetFilterModes(minInterpolation, magInterpolation, antialias);
+        cvcuda::WorkspaceAllocator allocator(ws);
+        if (ws.pinnedMem.ready != nullptr)
+        {
+            NVCV_CHECK_THROW(cudaEventSynchronize(ws.pinnedMem.ready));
+        }
+        if (ws.cudaMem.ready != nullptr)
+        {
+            NVCV_CHECK_THROW(cudaStreamWaitEvent(stream, ws.cudaMem.ready));
+        }
+        SampleDescT *sampleDescsCpu = allocator.getPinned<SampleDescT>(numSamples);
+        SampleDescT *sampleDescsGpu = allocator.getCuda<SampleDescT>(numSamples);
+        size_t       intermediateSizes[kNumTmpBuffers]{};
+        for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+        {
+            const VecI<kSpatialNDim> srcShape  = shape::SampleShape<kSpatialNDim>(src, sampleIdx);
+            const VecI<kSpatialNDim> dstShape  = shape::SampleShape<kSpatialNDim>(dst, sampleIdx);
+            const HQResizeRoiF      *sampleRoi = shape::SampleRoi(rois, sampleIdx);
+            int                      numChannels;
+            if constexpr (std::is_same_v<BatchContainer, nvcv::ImageBatchVarShape>)
+            {
+                numChannels = uniqueNumChannels;
+            }
+            else if constexpr (!std::is_same_v<BatchContainer, nvcv::ImageBatchVarShape>)
+            {
+                static_assert(std::is_same_v<BatchContainer, nvcv::TensorBatch>);
+                numChannels = shape::SampleNumChannels(src, dst, sampleIdx);
+            }
+            SampleDescT &sampleDesc = sampleDescsCpu[sampleIdx];
+            SetupSampleDesc(sampleDesc, srcShape, dstShape, numChannels, sampleRoi, minFilter, magFilter);
+            for (int t = 0; t < kNumTmpBuffers; t++)
+            {
+                intermediateSizes[t] += GetPassOutputVolume(sampleDesc, t);
+            }
+        }
+        NVCV_CHECK_THROW(cudaMemcpyAsync(sampleDescsGpu, sampleDescsCpu, numSamples * sizeof(SampleDescT),
+                                         cudaMemcpyHostToDevice, stream));
+
+        // allocate space for pointers and strides for intermediate wrappers
+        DynamicBatchWrapMeta intermediateMeta[kNumTmpBuffers];
+        IntermediateBaseT   *intermediate[kNumTmpBuffers];
+        for (int t = 0; t < kNumTmpBuffers; t++)
+        {
+            intermediateMeta[t] = batch_wrapper::dynamic::AllocateDynamicBatchWrapMeta(allocator, numSamples);
+        }
+        // allocate space for intermediate data
+        for (int t = 0; t < kNumTmpBuffers; t++)
+        {
+            intermediate[t] = allocator.getCuda<IntermediateBaseT>(intermediateSizes[t], kIntermediateAlignment);
+        }
+
+        RunTyped(sampleDescsCpu, sampleDescsGpu, src, dst, intermediate, intermediateMeta, numSamples, srcDtype,
+                 dstDtype, uniqueNumChannels, ws, stream);
+    }
+
+private:
+    void RunTyped(const SampleDescT *sampleDescsCpu, const SampleDescT *sampleDescsGpu,
+                  const nvcv::ImageBatchVarShape &src, const nvcv::ImageBatchVarShape &dst,
+                  IntermediateBaseT         *intermediate[kNumTmpBuffers],
+                  const DynamicBatchWrapMeta intermediateMeta[kNumTmpBuffers], int numSamples,
+                  const nvcv::DataType srcDtype, const nvcv::DataType dstDtype, int uniqueNumChannels,
+                  const cvcuda::Workspace &ws, cudaStream_t stream) const
+    {
+        static_assert(kSpatialNDim == 2, "ImageBatchVarShape does not support 3D spatial resampling");
+
+        auto srcData = src.exportData<nvcv::ImageBatchVarShapeDataStridedCuda>(stream);
+        auto dstData = dst.exportData<nvcv::ImageBatchVarShapeDataStridedCuda>(stream);
+        if (!srcData)
+        {
+            throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                  "Input must be cuda-accessible, varshape pitch-linear image batch");
+        }
+
+        if (!dstData)
+        {
+            throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                  "Output must be cuda-accessible, varshape pitch-linear image batch");
+        }
+
+        RunTypedSwitch<IntermediateBaseT>(
+            srcDtype, dstDtype, uniqueNumChannels,
+            [&](auto dummySrcVal, auto intermediateVal, auto dummyDstVal, auto numChannelsVal)
+            {
+                using InT                       = decltype(dummySrcVal);
+                using IntermediateT             = decltype(intermediateVal);
+                using OutT                      = decltype(dummyDstVal);
+                constexpr int numStaticChannels = decltype(numChannelsVal)::value;
+                if constexpr (numStaticChannels == -1)
+                {
+                    throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                          "Unsupported number of channels for ImageBatchVarShape input.");
+                }
+                else if constexpr (numStaticChannels != -1)
+                {
+                    static_assert(numStaticChannels == cuda::NumElements<InT>);
+                    static_assert(cuda::NumElements<IntermediateT> == cuda::NumElements<InT>);
+                    static_assert(cuda::NumElements<OutT> == cuda::NumElements<IntermediateT>);
+                    RunPasses<OutT, IntermediateT, InT, numStaticChannels>(sampleDescsCpu, sampleDescsGpu, *dstData,
+                                                                           *srcData, intermediate, intermediateMeta,
+                                                                           numSamples, ws, stream);
+                }
+            });
+    }
+
+    void RunTyped(const SampleDescT *sampleDescsCpu, const SampleDescT *sampleDescsGpu, const nvcv::TensorBatch &src,
+                  const nvcv::TensorBatch &dst, IntermediateBaseT *intermediate[kNumTmpBuffers],
+                  const DynamicBatchWrapMeta intermediateMeta[kNumTmpBuffers], int numSamples,
+                  const nvcv::DataType srcDtype, const nvcv::DataType dstDtype, int uniqueNumChannels,
+                  const cvcuda::Workspace &ws, cudaStream_t stream) const
+    {
+        // Other cointainer allow exporting data with const qualifiers
+        const auto srcData
+            = const_cast<nvcv::TensorBatch &>(src).exportData(stream).cast<nvcv::TensorBatchDataStridedCuda>();
+        const auto dstData
+            = const_cast<nvcv::TensorBatch &>(dst).exportData(stream).cast<nvcv::TensorBatchDataStridedCuda>();
+
+        if (!srcData)
+        {
+            throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                  "Input must be cuda-accessible, varshape pitch-linear image batch");
+        }
+
+        if (!dstData)
+        {
+            throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                  "Output must be cuda-accessible, varshape pitch-linear image batch");
+        }
+
+        uniqueNumChannels = -1;
+        for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+        {
+            if (sampleIdx == 0)
+            {
+                uniqueNumChannels = sampleDescsCpu[sampleIdx].channels;
+            }
+            else if (uniqueNumChannels != sampleDescsCpu[sampleIdx].channels)
+            {
+                uniqueNumChannels = -1;
+                break;
+            }
+        }
+
+        RunTypedSwitch<IntermediateBaseT>(
+            srcDtype, dstDtype, uniqueNumChannels,
+            [&](auto dummySrcVal, auto intermediateVal, auto dummyDstVal, auto numChannelsVal)
+            {
+                using InT                       = decltype(dummySrcVal);
+                using IntermediateT             = decltype(intermediateVal);
+                using OutT                      = decltype(dummyDstVal);
+                constexpr int numStaticChannels = decltype(numChannelsVal)::value;
+                static_assert(numStaticChannels == -1 || numStaticChannels == cuda::NumElements<InT>);
+                static_assert(cuda::NumElements<IntermediateT> == cuda::NumElements<InT>);
+                static_assert(cuda::NumElements<OutT> == cuda::NumElements<IntermediateT>);
+
+                RunPasses<OutT, IntermediateT, InT, numStaticChannels>(sampleDescsCpu, sampleDescsGpu, *dstData,
+                                                                       *srcData, intermediate, intermediateMeta,
+                                                                       numSamples, ws, stream);
+            });
+    }
+
+    template<typename OutT, typename IntermediateT, typename InT, int kNumStaticChannels, int ndim = kSpatialNDim>
+    std::enable_if_t<ndim == 2> RunPasses(const SampleDescT                              &sampleDesc,
+                                          const nvcv::TensorDataAccessStridedImagePlanar &dstAccess,
+                                          const nvcv::TensorDataAccessStridedImagePlanar &srcAccess,
+                                          IntermediateBaseT *intermediate[kNumTmpBuffers], int numSamples,
+                                          const cvcuda::Workspace &ws, cudaStream_t stream) const
+    {
+        static_assert(kSpatialNDim == 2);
+        constexpr bool kHasDynamicChannels = kNumStaticChannels == -1;
+        // sample extent, spatial extents, optional dynamic channel extent
+        constexpr int  kWrapNDim = 1 + kSpatialNDim + kHasDynamicChannels;
+        using OutWrap            = cuda::TensorNDWrap<OutT, kWrapNDim>;
+        using InWrap             = cuda::TensorNDWrap<const InT, kWrapNDim>;
+        using InterWrap          = cuda::TensorNDWrap<IntermediateT, kWrapNDim>;
+        static_assert(std::is_trivially_copyable_v<OutWrap>);
+        static_assert(std::is_trivially_copyable_v<InWrap>);
+        static_assert(std::is_trivially_copyable_v<InterWrap>);
+        const OutWrap outWrap = batch_wrapper::tensor::WrapTensor<kHasDynamicChannels, kSpatialNDim, OutT>(dstAccess);
+        const InWrap  inWrap  = batch_wrapper::tensor::WrapTensor<kHasDynamicChannels, kSpatialNDim, InT>(
+            srcAccess, sampleDesc.inRoiOffset);
+        const InterWrap interWrap = batch_wrapper::tensor::CreateDenseWrap<kHasDynamicChannels, IntermediateT>(
+            intermediate[0], sampleDesc.channels, sampleDesc.shapes[1]);
+        RunPass<kNumStaticChannels, 0>(sampleDesc, interWrap, inWrap, numSamples, stream);
+        RunPass<kNumStaticChannels, 1>(sampleDesc, outWrap, interWrap, numSamples, stream);
+        if (ws.cudaMem.ready != nullptr)
+        {
+            NVCV_CHECK_THROW(cudaEventRecord(ws.cudaMem.ready, stream));
+        }
+    }
+
+    template<typename OutT, typename IntermediateT, typename InT, int kNumStaticChannels, int ndim = kSpatialNDim>
+    std::enable_if_t<ndim == 3> RunPasses(const SampleDescT                              &sampleDesc,
+                                          const nvcv::TensorDataAccessStridedImagePlanar &dstAccess,
+                                          const nvcv::TensorDataAccessStridedImagePlanar &srcAccess,
+                                          IntermediateBaseT *intermediate[kNumTmpBuffers], int numSamples,
+                                          const cvcuda::Workspace &ws, cudaStream_t stream) const
+    {
+        static_assert(kSpatialNDim == 3);
+        constexpr bool kHasDynamicChannels = kNumStaticChannels == -1;
+        // sample extent, spatial extents, optional dynamic channel extent
+        constexpr int  kWrapNDim = 1 + kSpatialNDim + kHasDynamicChannels;
+        using OutWrap            = cuda::TensorNDWrap<OutT, kWrapNDim>;
+        using InWrap             = cuda::TensorNDWrap<const InT, kWrapNDim>;
+        using InterWrap          = cuda::TensorNDWrap<IntermediateT, kWrapNDim>;
+        static_assert(std::is_trivially_copyable_v<OutWrap>);
+        static_assert(std::is_trivially_copyable_v<InWrap>);
+        static_assert(std::is_trivially_copyable_v<InterWrap>);
+        const OutWrap outWrap = batch_wrapper::tensor::WrapTensor<kHasDynamicChannels, kSpatialNDim, OutT>(dstAccess);
+        const InWrap  inWrap  = batch_wrapper::tensor::WrapTensor<kHasDynamicChannels, kSpatialNDim, InT>(
+            srcAccess, sampleDesc.inRoiOffset);
+        const InterWrap interWrap0 = batch_wrapper::tensor::CreateDenseWrap<kHasDynamicChannels, IntermediateT>(
+            intermediate[0], sampleDesc.channels, sampleDesc.shapes[1]);
+        const InterWrap interWrap1 = batch_wrapper::tensor::CreateDenseWrap<kHasDynamicChannels, IntermediateT>(
+            intermediate[1], sampleDesc.channels, sampleDesc.shapes[2]);
+        RunPass<kNumStaticChannels, 0>(sampleDesc, interWrap0, inWrap, numSamples, stream);
+        RunPass<kNumStaticChannels, 1>(sampleDesc, interWrap1, interWrap0, numSamples, stream);
+        RunPass<kNumStaticChannels, 2>(sampleDesc, outWrap, interWrap1, numSamples, stream);
+        if (ws.cudaMem.ready != nullptr)
+        {
+            NVCV_CHECK_THROW(cudaEventRecord(ws.cudaMem.ready, stream));
+        }
+    }
+
+    template<typename OutT, typename IntermediateT, typename InT, int kNumStaticChannels, typename BatchDataStridedCuda,
+             int ndim = kSpatialNDim>
+    std::enable_if_t<ndim == 2> RunPasses(const SampleDescT *sampleDescsCpu, const SampleDescT *sampleDescsGpu,
+                                          const BatchDataStridedCuda &dstData, const BatchDataStridedCuda &srcData,
+                                          IntermediateBaseT         *intermediate[kNumTmpBuffers],
+                                          const DynamicBatchWrapMeta intermediateMeta[kNumTmpBuffers], int numSamples,
+                                          const cvcuda::Workspace &ws, cudaStream_t stream) const
+    {
+        static_assert(kSpatialNDim == 2);
+        constexpr bool kHasDynamicChannels = kNumStaticChannels == -1;
+        // sample extent, spatial extents, optional dynamic channel extent
+        constexpr int  kWrapNDim = 1 + kSpatialNDim + kHasDynamicChannels;
+        using BatchWrapOutT
+            = std::conditional_t<std::is_same_v<BatchDataStridedCuda, nvcv::ImageBatchVarShapeDataStridedCuda>,
+                                 batch_wrapper::ImageBatchVarShapeWrapAdapter<OutT>,
+                                 batch_wrapper::TensorBatchWrapAdapter<OutT, kWrapNDim>>;
+        using BatchWrapInT
+            = std::conditional_t<std::is_same_v<BatchDataStridedCuda, nvcv::ImageBatchVarShapeDataStridedCuda>,
+                                 batch_wrapper::ImageBatchVarShapeWrapAdapter<const InT>,
+                                 batch_wrapper::TensorBatchWrapAdapter<const InT, kWrapNDim>>;
+        using DynamicBatchWrap = batch_wrapper::dynamic::DynamicBatchWrap<IntermediateT, kWrapNDim>;
+        static_assert(std::is_trivially_copyable_v<BatchWrapOutT>);
+        static_assert(std::is_trivially_copyable_v<BatchWrapInT>);
+        static_assert(std::is_trivially_copyable_v<DynamicBatchWrap>);
+        const BatchWrapOutT    outWrap(dstData);
+        const BatchWrapInT     inWrap(srcData);
+        const DynamicBatchWrap intermediateWrap
+            = batch_wrapper::dynamic::CreateDynamicBatchWrap<kHasDynamicChannels, IntermediateT>(
+                0, intermediate[0], intermediateMeta[0], sampleDescsCpu, numSamples, stream);
+        if (ws.pinnedMem.ready != nullptr)
+        {
+            NVCV_CHECK_THROW(cudaEventRecord(ws.pinnedMem.ready, stream));
+        }
+        RunPass<kNumStaticChannels, 0>(sampleDescsCpu, sampleDescsGpu, intermediateWrap, inWrap, numSamples, stream);
+        RunPass<kNumStaticChannels, 1>(sampleDescsCpu, sampleDescsGpu, outWrap, intermediateWrap, numSamples, stream);
+        if (ws.cudaMem.ready != nullptr)
+        {
+            NVCV_CHECK_THROW(cudaEventRecord(ws.cudaMem.ready, stream));
+        }
+    }
+
+    template<typename OutT, typename IntermediateT, typename InT, int kNumStaticChannels, int ndim = kSpatialNDim>
+    std::enable_if_t<ndim == 3> RunPasses(const SampleDescT *sampleDescsCpu, const SampleDescT *sampleDescsGpu,
+                                          const nvcv::TensorBatchDataStridedCuda &dstData,
+                                          const nvcv::TensorBatchDataStridedCuda &srcData,
+                                          IntermediateBaseT                      *intermediate[kNumTmpBuffers],
+                                          const DynamicBatchWrapMeta intermediateMeta[kNumTmpBuffers], int numSamples,
+                                          const cvcuda::Workspace &ws, cudaStream_t stream) const
+    {
+        static_assert(kSpatialNDim == 3);
+        constexpr bool kHasDynamicChannels = kNumStaticChannels == -1;
+        // sample extent, spatial extents, optional dynamic channel extent
+        constexpr int  kWrapNDim  = 1 + kSpatialNDim + kHasDynamicChannels;
+        using TensorBatchWrapOutT = batch_wrapper::TensorBatchWrapAdapter<OutT, kWrapNDim>;
+        using TensorBatchWrapInT  = batch_wrapper::TensorBatchWrapAdapter<const InT, kWrapNDim>;
+        using DynamicBatchWrap    = batch_wrapper::dynamic::DynamicBatchWrap<IntermediateT, kWrapNDim>;
+        static_assert(std::is_trivially_copyable_v<TensorBatchWrapOutT>);
+        static_assert(std::is_trivially_copyable_v<TensorBatchWrapInT>);
+        static_assert(std::is_trivially_copyable_v<DynamicBatchWrap>);
+        const TensorBatchWrapOutT outWrap(dstData);
+        const TensorBatchWrapInT  inWrap(srcData);
+        const DynamicBatchWrap    intermediateWrap0
+            = batch_wrapper::dynamic::CreateDynamicBatchWrap<kHasDynamicChannels, IntermediateT>(
+                0, intermediate[0], intermediateMeta[0], sampleDescsCpu, numSamples, stream);
+        const DynamicBatchWrap intermediateWrap1
+            = batch_wrapper::dynamic::CreateDynamicBatchWrap<kHasDynamicChannels, IntermediateT>(
+                1, intermediate[1], intermediateMeta[1], sampleDescsCpu, numSamples, stream);
+        if (ws.pinnedMem.ready != nullptr)
+        {
+            NVCV_CHECK_THROW(cudaEventRecord(ws.pinnedMem.ready, stream));
+        }
+        RunPass<kNumStaticChannels, 0>(sampleDescsCpu, sampleDescsGpu, intermediateWrap0, inWrap, numSamples, stream);
+        RunPass<kNumStaticChannels, 1>(sampleDescsCpu, sampleDescsGpu, intermediateWrap1, intermediateWrap0, numSamples,
+                                       stream);
+        RunPass<kNumStaticChannels, 2>(sampleDescsCpu, sampleDescsGpu, outWrap, intermediateWrap1, numSamples, stream);
+        if (ws.cudaMem.ready != nullptr)
+        {
+            NVCV_CHECK_THROW(cudaEventRecord(ws.cudaMem.ready, stream));
+        }
+    }
+
+    template<int kNumStaticChannels, int kWhichPass, typename PassOutWrap, typename PassInWrap>
+    void RunPass(const SampleDescT &sampleDesc, const PassOutWrap &outWrap, const PassInWrap &inWrap, int numSamples,
+                 cudaStream_t stream) const
+    {
+        using GridHelperT = resampling::GridHelper<kSpatialNDim>;
+
+        VecI<kSpatialNDim> numBlocks;
+        {
+            VecI<kSpatialNDim> outputShape = sampleDesc.shapes[kWhichPass + 1];
+            VecI<kSpatialNDim> blockShape  = sampleDesc.blockShape[kWhichPass];
+            numBlocks                      = utils::DivCeil(outputShape, blockShape);
+            if (utils::Volume(numBlocks) == 0)
+            {
+                return;
+            }
+        }
+
+        GridHelperT gridHelper{numBlocks, numSamples};
+        dim3        block(kBlockDim.x, kBlockDim.y, kBlockDim.z);
+        dim3        grid          = gridHelper.GetKernelGrid();
+        const auto  devGridHelper = gridHelper.GetDeviceGridHelper();
+
+        int sharedMemSize = RequiredSharedMemorySize(sampleDesc, kWhichPass);
+        resampling::SeparableResamplingKernel<kNumStaticChannels, kWhichPass>
+            <<<grid, block, sharedMemSize, stream>>>(sampleDesc, outWrap, inWrap, devGridHelper);
+        NVCV_CHECK_THROW(cudaGetLastError());
+    }
+
+    template<int kNumStaticChannels, int kWhichPass, typename PassOutWrap, typename PassInWrap>
+    void RunPass(const SampleDescT *sampleDescsCpu, const SampleDescT *sampleDescsGpu, const PassOutWrap &outWrap,
+                 const PassInWrap &inWrap, int numSamples, cudaStream_t stream) const
+    {
+        using GridHelperT = resampling::GridHelper<kSpatialNDim>;
+
+        int                maxSharedMemSize = 0;
+        VecI<kSpatialNDim> maxNumBlocks{};
+        for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+        {
+            const SampleDescT &sampleDesc    = sampleDescsCpu[sampleIdx];
+            int                sharedMemSize = RequiredSharedMemorySize(sampleDesc, kWhichPass);
+            maxSharedMemSize                 = std::max(maxSharedMemSize, sharedMemSize);
+
+            VecI<kSpatialNDim> outputShape = sampleDesc.shapes[kWhichPass + 1];
+            VecI<kSpatialNDim> blockShape  = sampleDesc.blockShape[kWhichPass];
+            VecI<kSpatialNDim> numBlocks   = utils::DivCeil(outputShape, blockShape);
+            maxNumBlocks                   = cuda::max(maxNumBlocks, numBlocks);
+        }
+
+        if (utils::Volume(maxNumBlocks) == 0)
+        {
+            return;
+        }
+
+        GridHelperT gridHelper{maxNumBlocks, numSamples};
+        dim3        block(kBlockDim.x, kBlockDim.y, kBlockDim.z);
+        dim3        grid          = gridHelper.GetKernelGrid();
+        const auto  devGridHelper = gridHelper.GetDeviceGridHelper();
+
+        resampling::SeparableResamplingKernel<kNumStaticChannels, kWhichPass>
+            <<<grid, block, maxSharedMemSize, stream>>>(sampleDescsGpu, outWrap, inWrap, devGridHelper);
+        NVCV_CHECK_THROW(cudaGetLastError());
+    }
+
+    int RequiredSharedMemorySize(const SampleDescT &sampleDesc, int whichPass) const
+    {
+        using resampling::interpolate::filter_support::RequiredSharedMemoryElements;
+        if (sampleDesc.filterKind[whichPass] != filter::FilterTypeKind::ShmFilter)
+        {
+            return 0;
+        }
+        int support = sampleDesc.filter[whichPass].support();
+        int axis    = cuda::GetElement(sampleDesc.processingOrder, whichPass);
+        // for depth resampling y is used as well
+        int resamplingAxisBlockSize = axis == 0 ? kBlockDim.x : kBlockDim.y;
+        return sizeof(IntermediateBaseT) * RequiredSharedMemoryElements(support, resamplingAxisBlockSize);
+    }
+
+    void SetupSampleDesc(SampleDescT &sampleDesc, const VecI<kSpatialNDim> &srcShape,
+                         const VecI<kSpatialNDim> &dstShape, int numChannels, const HQResizeRoiF *roi,
+                         const filter::FilterMode &minFilter, const filter::FilterMode &magFilter) const
+    {
+        SetupSampleDescFilterShapeScale(sampleDesc, srcShape, dstShape, numChannels, minFilter, magFilter, roi);
+        SetupBlockLayout(sampleDesc);
+    }
+
+    void SetupSampleDescFilterShapeScale(SampleDescT &sampleDesc, const VecI<kSpatialNDim> &inShape,
+                                         const VecI<kSpatialNDim> &outShape, int numChannels,
+                                         const filter::FilterMode &minFilter, const filter::FilterMode &magFilter,
+                                         const HQResizeRoiF *roi) const
+    {
+        // get user provided roi
+        const shape::Roi<float, kSpatialNDim> parsedRoi = ParseROI(roi, inShape);
+        // setup filter based on user provided filter types and the input/output size
+        filter::FilterTypeKind                filterKinds[kSpatialNDim];
+        filter::ResamplingFilter              filters[kSpatialNDim];
+        SetupFilters(filterKinds, filters, parsedRoi.Size(), outShape, minFilter, magFilter);
+        // get the ROI that is normalized (so that roiLo <= roiHi), adjusted for filter's "halo",
+        // and clampped to input shape
+        const shape::Roi<int, kSpatialNDim> adjustedRoi     = AdjustRoiForFilter(parsedRoi, inShape, filters);
+        VecI<kSpatialNDim>                  adjustedRoiSize = adjustedRoi.Size();
+        // the processing order is permutation that maps pass number to axis resampled during given pass
+        sampleDesc.processingOrder = SetupProcessingOrder(adjustedRoiSize, outShape, filters);
+        // now, use filters, roi and processingOrder to populate sample descriptor
+        sampleDesc.channels  = numChannels;
+        sampleDesc.shapes[0] = inShape;
+        // set output shapes, scaling, roi, and relevant filters for each pass
+        // according to the best processingOrder of axes
+        {
+            VecI<kSpatialNDim> intermediateShape = adjustedRoiSize;
+            for (int pass = 0; pass < kSpatialNDim; pass++)
+            {
+                const int   axis         = cuda::GetElement(sampleDesc.processingOrder, pass);
+                const int   axisOutShape = cuda::GetElement(outShape, axis);
+                const float roiStart     = cuda::GetElement(parsedRoi.lo, axis);
+                const float roiEnd       = cuda::GetElement(parsedRoi.hi, axis);
+
+                cuda::GetElement(intermediateShape, axis) = axisOutShape;
+                sampleDesc.filterKind[pass]               = filterKinds[axis];
+                sampleDesc.filter[pass]                   = filters[axis];
+                sampleDesc.shapes[pass + 1]               = intermediateShape;
+
+                cuda::GetElement(sampleDesc.origin, pass) = roiStart;
+                cuda::GetElement(sampleDesc.scale, pass)  = (roiEnd - roiStart) / axisOutShape;
+
+                // "Clamp" the axes processed in later passes to the input ROI
+                if (pass == 0)
+                {
+                    // the first processed axis roi is handled simply with the `origin`
+                    cuda::GetElement(sampleDesc.inRoiOffset, axis) = 0;
+                }
+                else
+                {
+                    // for the axes not resampled in the first pass, we can just use offset when accesing data
+                    // (adjustedRoi.lo) and pretend the input shape is the adjustedRoi.Size()
+                    cuda::GetElement(sampleDesc.shapes[0], axis)   = cuda::GetElement(adjustedRoiSize, axis);
+                    cuda::GetElement(sampleDesc.inRoiOffset, axis) = cuda::GetElement(adjustedRoi.lo, axis);
+                    cuda::GetElement(sampleDesc.origin, pass)
+                        -= cuda::GetElement(adjustedRoi.lo, axis); // parsedRoi.lo - adjustedRoi.lo
+                }
+            }
+        }
+    }
+
+    /**
+     * @brief If user specified the roi, it's returned with reversed dims oreder ((d)hw -> wh(d)),
+     * otherwise the input shape is used to create whole-plane roi.
+     * Note, that in the first case, some lo and hi may be flipped (i.e. lo[d] > hi[d]).
+     */
+    shape::Roi<float, kSpatialNDim> ParseROI(const HQResizeRoiF *roi, VecI<kSpatialNDim> inShape) const
+    {
+        shape::Roi<float, kSpatialNDim> retRoi;
+        for (int dim = 0; dim < kSpatialNDim; dim++)
+        {
+            int   axis     = kSpatialNDim - 1 - dim;
+            auto  axisSize = cuda::GetElement(inShape, axis);
+            float roiStart, roiEnd;
+            if (roi != nullptr)
+            {
+                roiStart = roi->lo[dim];
+                roiEnd   = roi->hi[dim];
+            }
+            else
+            {
+                roiStart = 0;
+                roiEnd   = axisSize;
+            }
+            cuda::GetElement(retRoi.lo, axis) = roiStart;
+            cuda::GetElement(retRoi.hi, axis) = roiEnd;
+        }
+        return retRoi;
+    }
+
+    void SetupFilters(filter::FilterTypeKind filterKind[kSpatialNDim], filter::ResamplingFilter filters[kSpatialNDim],
+                      VecF<kSpatialNDim> roiShape, const VecI<kSpatialNDim> &outShape,
+                      const filter::FilterMode &minFilter, const filter::FilterMode &magFilter) const
+    {
+        using resampling::interpolate::filter_support::kMaxGPUFilterSupport;
+        static_assert(kSpatialNDim == 2 || kSpatialNDim == 3,
+                      "Currently, the resampling operator supports only 2 or 3 spatial dimensions");
+
+        for (int axis = 0; axis < kSpatialNDim; axis++)
+        {
+            float      inSize     = std::abs(cuda::GetElement(roiShape, axis));
+            float      outSize    = cuda::GetElement(outShape, axis);
+            const auto filterMode = outSize < inSize ? minFilter : magFilter;
+            filterKind[axis]      = filter::GetFilterTypeKind(filterMode.filterType);
+            auto &filter          = filters[axis];
+            filter                = filter::GetResamplingFilter(m_filtersFactory, filterMode, inSize, outSize);
+
+            // for very small outputs, the required support may be too big for avialable shm
+            if (filter.support() > kMaxGPUFilterSupport)
+            {
+                filter.rescale(kMaxGPUFilterSupport);
+            }
+        }
+    }
+
+    /**
+     * @brief Computes normalized ROI (i.e. so that roiLo <= roiHow), which is adjusted for filter's halo,
+     * converted to int and clamped to the input shape
+     */
+    shape::Roi<int, kSpatialNDim> AdjustRoiForFilter(const shape::Roi<float, kSpatialNDim> &roi,
+                                                     const VecI<kSpatialNDim>              &inShape,
+                                                     const filter::ResamplingFilter         filters[kSpatialNDim]) const
+    {
+        shape::Roi<int, kSpatialNDim> ajustedRoi;
+        for (int axis = 0; axis < kSpatialNDim; axis++)
+        {
+            const float &axisLo  = cuda::GetElement(roi.lo, axis);
+            const float &axisHi  = cuda::GetElement(roi.hi, axis);
+            const auto  &filter  = filters[axis];
+            int          support = filter.numCoeffs ? filter.support() : 1;
+            float        adjustedAxisLo, adjustedAxisHi;
+            if (axisLo <= axisHi)
+            {
+                adjustedAxisLo = axisLo - filter.anchor;
+                adjustedAxisHi = axisHi - filter.anchor + support;
+            }
+            else
+            { // flipped
+                adjustedAxisLo = axisHi - filter.anchor;
+                adjustedAxisHi = axisLo - filter.anchor + support;
+            }
+            const int axisSize = cuda::GetElement(inShape, axis);
+            cuda::GetElement(ajustedRoi.lo, axis)
+                = std::max<int>(0, std::min<int>(axisSize, std::floor(adjustedAxisLo)));
+            cuda::GetElement(ajustedRoi.hi, axis)
+                = std::max<int>(0, std::min<int>(axisSize, std::ceil(adjustedAxisHi)));
+        }
+        return ajustedRoi;
+    }
+
+    VecI<kSpatialNDim> SetupProcessingOrder(const VecI<kSpatialNDim> &inRoiSize, const VecI<kSpatialNDim> &outSize,
+                                            const filter::ResamplingFilter filters[kSpatialNDim]) const
+    {
+        VecI<kSpatialNDim> filterSupport;
+        for (int i = 0; i < kSpatialNDim; i++)
+        {
+            int support = filters[i].support();
+            // NN filter has support -1, so we need the max() below
+            cuda::GetElement(filterSupport, i) = std::max(1, support);
+        }
+
+        return ProcessingOrderCalculator<kSpatialNDim>(inRoiSize, outSize, filterSupport)();
+    }
+
+    int64_t GetPassOutputVolume(SampleDescT sampleDesc, int pass) const
+    {
+        return utils::Volume(sampleDesc.shapes[pass + 1]) * sampleDesc.channels;
+    }
+
+    /**
+     * @brief Calculates block layout for a 2D sample
+     *
+     */
+    template<int ndim = kSpatialNDim>
+    std::enable_if_t<ndim == 2> SetupBlockLayout(SampleDescT &sampleDesc) const
+    {
+        static_assert(kSpatialNDim == 2);
+        int lanes = resampling::GetResizeBlockLanes();
+        for (int pass = 0; pass < kSpatialNDim; pass++)
+        {
+            int     resamplingAxis = cuda::GetElement(sampleDesc.processingOrder, pass);
+            // The threadblock is (kBlockDim.x, kBlockDim.y) for all passes.
+            // In horizontal pass (resamplingAxis == 0), a single block will
+            // process output slice of (kBlockDim.x, lanes * kBlockDim.y).
+            // In vertical pass (resamplingAxis == 1), each block will handle
+            // output slice of (kBlockDim.x * lanes, kBlockDim.y).
+            VecI<2> blockShape{kBlockDim.x, kBlockDim.y};
+            cuda::GetElement(blockShape, 1 - resamplingAxis) *= lanes;
+            auto outputShape            = sampleDesc.shapes[pass + 1];
+            sampleDesc.blockShape[pass] = cuda::clamp(blockShape, VecI<2>{1, 1}, outputShape);
+        }
+    }
+
+    /**
+    * @brief Calculates block layout for a 3D sample
+    */
+    template<int ndim = kSpatialNDim>
+    std::enable_if_t<ndim == 3> SetupBlockLayout(SampleDescT &sampleDesc) const
+    {
+        static_assert(kSpatialNDim == 3);
+        int lanes = resampling::GetResizeBlockLanes();
+        for (int pass = 0; pass < kSpatialNDim; pass++)
+        {
+            auto outputShape    = sampleDesc.shapes[pass + 1];
+            int  resamplingAxis = cuda::GetElement(sampleDesc.processingOrder, pass);
+            if (resamplingAxis < 2)
+            {
+                VecI<3> blockShape{kBlockDim.x, kBlockDim.y, kBlockDim.z * lanes};
+                sampleDesc.blockShape[pass] = cuda::clamp(blockShape, VecI<3>{1, 1, 1}, outputShape);
+            }
+            else
+            {
+                assert(resamplingAxis == 2);
+                VecI<3> blockShape{kBlockDim.x, kBlockDim.z * lanes, kBlockDim.y};
+                sampleDesc.blockShape[pass] = cuda::clamp(blockShape, VecI<3>{1, 1, 1}, outputShape);
+            }
+        }
+    }
+
+    const filter::ResamplingFiltersFactory &m_filtersFactory;
+};
+} // namespace
+
+namespace cvcuda::priv {
+namespace hq_resize {
+
+// Implements the IHQResizeImpl interface and keeps the filters fatory with initilized
+// supports. The actual implementation is in a stateless HQResizeRun that is parametrized
+// with the number of resampled dimensions.
+class HQResizeImpl final : public IHQResizeImpl
+{
+public:
+    cvcuda::WorkspaceRequirements getWorkspaceRequirements(int numSamples, const HQResizeTensorShapeI inputShape,
+                                                           const HQResizeTensorShapeI  outputShape,
+                                                           const NVCVInterpolationType minInterpolation,
+                                                           const NVCVInterpolationType magInterpolation, bool antialias,
+                                                           const HQResizeRoiF *roi) const override
+    {
+        if (inputShape.ndim == 2)
+        {
+            HQResizeRun<2> resize(m_filtersFactory);
+            return resize.getWorkspaceRequirements(numSamples, inputShape, outputShape, minInterpolation,
+                                                   magInterpolation, antialias, roi);
+        }
+        else if (inputShape.ndim == 3)
+        {
+            HQResizeRun<3> resize(m_filtersFactory);
+            return resize.getWorkspaceRequirements(numSamples, inputShape, outputShape, minInterpolation,
+                                                   magInterpolation, antialias, roi);
+        }
+        else
+        {
+            throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                  "Only 2D or 3D resize is supported. Got unexpected number of extents to resize.");
+        }
+    }
+
+    cvcuda::WorkspaceRequirements getWorkspaceRequirements(int numSamples, const HQResizeTensorShapesI inputShapes,
+                                                           const HQResizeTensorShapesI outputShapes,
+                                                           const NVCVInterpolationType minInterpolation,
+                                                           const NVCVInterpolationType magInterpolation, bool antialias,
+                                                           const HQResizeRoisF rois) const override
+    {
+        if (inputShapes.ndim == 2)
+        {
+            HQResizeRun<2> resize(m_filtersFactory);
+            return resize.getWorkspaceRequirements(numSamples, inputShapes, outputShapes, minInterpolation,
+                                                   magInterpolation, antialias, rois);
+        }
+        else if (inputShapes.ndim == 3)
+        {
+            HQResizeRun<3> resize(m_filtersFactory);
+            return resize.getWorkspaceRequirements(numSamples, inputShapes, outputShapes, minInterpolation,
+                                                   magInterpolation, antialias, rois);
+        }
+        else
+        {
+            throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                  "Only 2D or 3D resize is supported. Got unexpected number of extents to resize.");
+        }
+    }
+
+    cvcuda::WorkspaceRequirements getWorkspaceRequirements(int                        maxBatchSize,
+                                                           const HQResizeTensorShapeI maxShape) const override
+    {
+        if (maxShape.ndim == 2)
+        {
+            HQResizeRun<2> resize(m_filtersFactory);
+            return resize.getWorkspaceRequirements(maxBatchSize, maxShape);
+        }
+        else if (maxShape.ndim == 3)
+        {
+            HQResizeRun<3> resize(m_filtersFactory);
+            return resize.getWorkspaceRequirements(maxBatchSize, maxShape);
+        }
+        else
+        {
+            throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT,
+                                  "Only 2D or 3D resize is supported. Got unexpected number of extents to resize.");
+        }
+    }
+
+    void operator()(cudaStream_t stream, const cvcuda::Workspace &ws, const nvcv::Tensor &src, const nvcv::Tensor &dst,
+                    const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation,
+                    bool antialias, const HQResizeRoiF *roi) override
+    {
+        if (src.layout().find('D') < 0)
+        {
+            HQResizeRun<2> resize(m_filtersFactory);
+            resize(stream, ws, src, dst, minInterpolation, magInterpolation, antialias, roi);
+        }
+        else
+        {
+            HQResizeRun<3> resize(m_filtersFactory);
+            resize(stream, ws, src, dst, minInterpolation, magInterpolation, antialias, roi);
+        }
+    }
+
+    void operator()(cudaStream_t stream, const cvcuda::Workspace &ws, const nvcv::ImageBatchVarShape &src,
+                    const nvcv::ImageBatchVarShape &dst, const NVCVInterpolationType minInterpolation,
+                    const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF rois) override
+    {
+        HQResizeRun<2> resize(m_filtersFactory);
+        resize(stream, ws, src, dst, minInterpolation, magInterpolation, antialias, rois);
+    }
+
+    void operator()(cudaStream_t stream, const cvcuda::Workspace &ws, const nvcv::TensorBatch &src,
+                    const nvcv::TensorBatch &dst, const NVCVInterpolationType minInterpolation,
+                    const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF rois) override
+    {
+        if (src.layout().find('D') < 0)
+        {
+            HQResizeRun<2> resize(m_filtersFactory);
+            resize(stream, ws, src, dst, minInterpolation, magInterpolation, antialias, rois);
+        }
+        else
+        {
+            HQResizeRun<3> resize(m_filtersFactory);
+            resize(stream, ws, src, dst, minInterpolation, magInterpolation, antialias, rois);
+        }
+    }
+
+private:
+    filter::ResamplingFiltersFactory m_filtersFactory;
+};
+
+} // namespace hq_resize
+
+// Constructor -----------------------------------------------------------------
+
+HQResize::HQResize()
+
+{
+    m_impl = std::make_unique<hq_resize::HQResizeImpl>();
+}
+
+// Operator --------------------------------------------------------------------
+
+// Workspace esitmation for Tensor input
+cvcuda::WorkspaceRequirements HQResize::getWorkspaceRequirements(int batchSize, const HQResizeTensorShapeI inputShape,
+                                                                 const HQResizeTensorShapeI  outputShape,
+                                                                 const NVCVInterpolationType minInterpolation,
+                                                                 const NVCVInterpolationType magInterpolation,
+                                                                 bool antialias, const HQResizeRoiF *roi) const
+{
+    return m_impl->getWorkspaceRequirements(batchSize, inputShape, outputShape, minInterpolation, magInterpolation,
+                                            antialias, roi);
+}
+
+// Workspace esitmation for ImageBatch and TensorBatch input
+cvcuda::WorkspaceRequirements HQResize::getWorkspaceRequirements(int batchSize, const HQResizeTensorShapesI inputShapes,
+                                                                 const HQResizeTensorShapesI outputShapes,
+                                                                 const NVCVInterpolationType minInterpolation,
+                                                                 const NVCVInterpolationType magInterpolation,
+                                                                 bool antialias, const HQResizeRoisF rois) const
+{
+    return m_impl->getWorkspaceRequirements(batchSize, inputShapes, outputShapes, minInterpolation, magInterpolation,
+                                            antialias, rois);
+}
+
+cvcuda::WorkspaceRequirements HQResize::getWorkspaceRequirements(int                        maxBatchSize,
+                                                                 const HQResizeTensorShapeI maxShape) const
+{
+    return m_impl->getWorkspaceRequirements(maxBatchSize, maxShape);
+}
+
+// Tensor variant
+void HQResize::operator()(cudaStream_t stream, const cvcuda::Workspace &ws, const nvcv::Tensor &src,
+                          const nvcv::Tensor &dst, const NVCVInterpolationType minInterpolation,
+                          const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoiF *roi) const
+{
+    assert(m_impl);
+    m_impl->operator()(stream, ws, src, dst, minInterpolation, magInterpolation, antialias, roi);
+}
+
+// ImageBatchVarShape variant
+void HQResize::operator()(cudaStream_t stream, const cvcuda::Workspace &ws, const nvcv::ImageBatchVarShape &src,
+                          const nvcv::ImageBatchVarShape &dst, const NVCVInterpolationType minInterpolation,
+                          const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF rois) const
+{
+    assert(m_impl);
+    m_impl->operator()(stream, ws, src, dst, minInterpolation, magInterpolation, antialias, rois);
+}
+
+// TensorBatch variant
+void HQResize::operator()(cudaStream_t stream, const cvcuda::Workspace &ws, const nvcv::TensorBatch &src,
+                          const nvcv::TensorBatch &dst, const NVCVInterpolationType minInterpolation,
+                          const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF rois) const
+{
+    assert(m_impl);
+    m_impl->operator()(stream, ws, src, dst, minInterpolation, magInterpolation, antialias, rois);
+}
+
+} // namespace cvcuda::priv
diff --git a/src/cvcuda/priv/OpHQResize.hpp b/src/cvcuda/priv/OpHQResize.hpp
new file mode 100644
index 000000000..85a89ee2c
--- /dev/null
+++ b/src/cvcuda/priv/OpHQResize.hpp
@@ -0,0 +1,115 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file HQResize.hpp
+ *
+ * @brief Defines the private C++ Class for the HQResize operation.
+ */
+
+#ifndef CVCUDA_PRIV_HQ_RESIZE_HPP
+#define CVCUDA_PRIV_HQ_RESIZE_HPP
+#include "IOperator.hpp"
+#include "cvcuda/Workspace.hpp"
+
+#include <cvcuda/OpHQResize.h>
+#include <nvcv/ImageBatch.hpp>
+#include <nvcv/Tensor.hpp>
+#include <nvcv/TensorBatch.hpp>
+
+#include <memory>
+
+namespace cvcuda::priv {
+
+namespace hq_resize {
+
+class IHQResizeImpl
+{
+public:
+    virtual WorkspaceRequirements getWorkspaceRequirements(int batchSize, const HQResizeTensorShapeI inputShape,
+                                                           const HQResizeTensorShapeI  outputShape,
+                                                           const NVCVInterpolationType minInterpolation,
+                                                           const NVCVInterpolationType magInterpolation, bool antialias,
+                                                           const HQResizeRoiF *roi) const = 0;
+
+    virtual WorkspaceRequirements getWorkspaceRequirements(int batchSize, const HQResizeTensorShapesI inputShapes,
+                                                           const HQResizeTensorShapesI outputShapes,
+                                                           const NVCVInterpolationType minInterpolation,
+                                                           const NVCVInterpolationType magInterpolation, bool antialias,
+                                                           const HQResizeRoisF rois) const = 0;
+
+    virtual WorkspaceRequirements getWorkspaceRequirements(int                        maxBatchSize,
+                                                           const HQResizeTensorShapeI maxShape) const = 0;
+
+    virtual void operator()(cudaStream_t stream, const Workspace &ws, const nvcv::Tensor &src, const nvcv::Tensor &dst,
+                            const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation,
+                            bool antialias, const HQResizeRoiF *roi)
+        = 0;
+
+    virtual void operator()(cudaStream_t stream, const Workspace &ws, const nvcv::ImageBatchVarShape &src,
+                            const nvcv::ImageBatchVarShape &dst, const NVCVInterpolationType minInterpolation,
+                            const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF roi)
+        = 0;
+
+    virtual void operator()(cudaStream_t stream, const Workspace &ws, const nvcv::TensorBatch &src,
+                            const nvcv::TensorBatch &dst, const NVCVInterpolationType minInterpolation,
+                            const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF roi)
+        = 0;
+
+    virtual ~IHQResizeImpl() = default;
+};
+
+} // namespace hq_resize
+
+class HQResize final : public IOperator
+{
+public:
+    explicit HQResize();
+
+    WorkspaceRequirements getWorkspaceRequirements(int batchSize, const HQResizeTensorShapeI inputShape,
+                                                   const HQResizeTensorShapeI  outputShape,
+                                                   const NVCVInterpolationType minInterpolation,
+                                                   const NVCVInterpolationType magInterpolation, bool antialias,
+                                                   const HQResizeRoiF *roi) const;
+
+    WorkspaceRequirements getWorkspaceRequirements(int batchSize, const HQResizeTensorShapesI inputShapes,
+                                                   const HQResizeTensorShapesI outputShapes,
+                                                   const NVCVInterpolationType minInterpolation,
+                                                   const NVCVInterpolationType magInterpolation, bool antialias,
+                                                   const HQResizeRoisF rois) const;
+
+    WorkspaceRequirements getWorkspaceRequirements(int maxBatchSize, const HQResizeTensorShapeI maxShape) const;
+
+    void operator()(cudaStream_t stream, const Workspace &ws, const nvcv::Tensor &src, const nvcv::Tensor &dst,
+                    const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation,
+                    bool antialias, const HQResizeRoiF *roi) const;
+
+    void operator()(cudaStream_t stream, const Workspace &ws, const nvcv::ImageBatchVarShape &src,
+                    const nvcv::ImageBatchVarShape &dst, const NVCVInterpolationType minInterpolation,
+                    const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF roi) const;
+
+    void operator()(cudaStream_t stream, const Workspace &ws, const nvcv::TensorBatch &src,
+                    const nvcv::TensorBatch &dst, const NVCVInterpolationType minInterpolation,
+                    const NVCVInterpolationType magInterpolation, bool antialias, const HQResizeRoisF roi) const;
+
+private:
+    std::unique_ptr<hq_resize::IHQResizeImpl> m_impl;
+};
+
+} // namespace cvcuda::priv
+
+#endif // CVCUDA_PRIV_HQ_RESIZE_HPP
diff --git a/src/cvcuda/priv/OpHQResizeBatchWrap.cuh b/src/cvcuda/priv/OpHQResizeBatchWrap.cuh
new file mode 100644
index 000000000..8f7b69411
--- /dev/null
+++ b/src/cvcuda/priv/OpHQResizeBatchWrap.cuh
@@ -0,0 +1,408 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef CVCUDA_PRIV_HQ_RESIZE_BATCH_WRAP_CUH
+#define CVCUDA_PRIV_HQ_RESIZE_BATCH_WRAP_CUH
+
+#include "cvcuda/Workspace.hpp"
+
+#include <cuda_runtime.h>
+#include <cvcuda/priv/WorkspaceUtil.hpp>
+#include <nvcv/TensorBatch.hpp>
+#include <nvcv/TensorData.hpp>
+#include <nvcv/TensorDataAccess.hpp>
+#include <nvcv/cuda/ImageBatchVarShapeWrap.hpp>
+#include <nvcv/cuda/TensorBatchWrap.hpp>
+#include <nvcv/cuda/TensorWrap.hpp>
+#include <util/Assert.h>
+#include <util/CheckError.hpp>
+
+// This file contains three kind of helpers
+// 1. Helpers to wrap contigious batch with uniform sample stride into TensorWrap
+// 2. DynamicBatchWrap class & helpers to wrap dynamically created batch (of intermediate samples)
+//    with rigged sample stride
+// 3. ImageBatchVarShapeWrap/TensorBatchWrap adapters that handle ROI on top of the usual wrappers
+
+namespace cvcuda::priv::hq_resize::batch_wrapper {
+
+namespace cuda = nvcv::cuda;
+
+template<typename T, int N>
+using Vec = typename cuda::MakeType<T, N>;
+
+template<int N>
+using VecI = Vec<int, N>;
+
+template<typename T, typename... ExtentParams>
+auto ComputeDenseStrides(ExtentParams... extentParams)
+{
+    constexpr int N = sizeof...(extentParams);
+    static_assert(N >= 1);
+    static_assert(std::conjunction_v<std::is_same<int, ExtentParams>...>);
+    std::array<int, N>     extents = {extentParams...};
+    std::array<int64_t, N> strides;
+    strides[0] = extents[0] * sizeof(T);
+    for (int d = 1; d < N; d++)
+    {
+        strides[d] = strides[d - 1] * extents[d];
+    }
+    return strides;
+}
+
+template<typename T, typename... Channels>
+auto ComputeDenseStrides(VecI<2> shape, Channels... channels)
+{
+    static_assert(sizeof...(channels) <= 1);
+    return ComputeDenseStrides<T>(channels..., shape.x, shape.y);
+}
+
+template<typename T, typename... Channels>
+auto ComputeDenseStrides(VecI<3> shape, Channels... channels)
+{
+    static_assert(sizeof...(channels) <= 1);
+    return ComputeDenseStrides<T>(channels..., shape.x, shape.y, shape.z);
+}
+
+namespace tensor {
+template<typename T, int kNStrides>
+auto CreateDenseWrap(cuda::BaseType<T> *base, const std::array<int64_t, kNStrides> strides)
+{
+    constexpr int N = kNStrides + 1;
+    for (auto stride : strides)
+    {
+        NVCV_ASSERT(stride <= cuda::TypeTraits<int>::max);
+    }
+    static_assert(2 <= N && N <= 5);
+    if constexpr (N == 5)
+    {
+        return cuda::TensorNDWrap<T, N>(base, static_cast<int>(strides[3]), static_cast<int>(strides[2]),
+                                        static_cast<int>(strides[1]), static_cast<int>(strides[0]));
+    }
+    else if constexpr (N == 4)
+    {
+        return cuda::TensorNDWrap<T, N>(base, static_cast<int>(strides[2]), static_cast<int>(strides[1]),
+                                        static_cast<int>(strides[0]));
+    }
+    else if constexpr (N == 3)
+    {
+        return cuda::TensorNDWrap<T, N>(base, static_cast<int>(strides[1]), static_cast<int>(strides[0]));
+    }
+    else if constexpr (N == 2)
+    {
+        return cuda::TensorNDWrap<T, N>(base, static_cast<int>(strides[0]));
+    }
+}
+
+template<bool kHasDynamicChannels, typename T, typename ShapeT>
+auto CreateDenseWrap(cuda::BaseType<T> *base, int numChannels, ShapeT shape)
+{
+    static constexpr int kNStrides = cuda::NumElements<ShapeT> + kHasDynamicChannels;
+    if constexpr (kHasDynamicChannels)
+    {
+        auto strides = ComputeDenseStrides<T>(shape, numChannels);
+        return CreateDenseWrap<T, kNStrides>(base, strides);
+    }
+    else if constexpr (!kHasDynamicChannels)
+    {
+        auto strides = ComputeDenseStrides<T>(shape);
+        return CreateDenseWrap<T, kNStrides>(base, strides);
+    }
+}
+
+template<bool kHasDynamicChannels, int kSpatialNDim, typename T, int N = 1 + kSpatialNDim + kHasDynamicChannels>
+std::enable_if_t<kSpatialNDim == 2, cuda::TensorNDWrap<T, N>> WrapTensor(
+    const nvcv::TensorDataAccessStridedImagePlanar &tensorAccess, const ptrdiff_t roiOffset = 0)
+{
+    NVCV_ASSERT(tensorAccess.sampleStride() <= cuda::TypeTraits<int>::max);
+    NVCV_ASSERT(tensorAccess.rowStride() <= cuda::TypeTraits<int>::max);
+    NVCV_ASSERT(tensorAccess.colStride() <= cuda::TypeTraits<int>::max);
+
+    if constexpr (kHasDynamicChannels)
+    {
+        return cuda::TensorNDWrap<T, N>(
+            tensorAccess.sampleData(0) + roiOffset, static_cast<int>(tensorAccess.sampleStride()),
+            static_cast<int>(tensorAccess.rowStride()), static_cast<int>(tensorAccess.colStride()));
+    }
+    else
+    {
+        return cuda::TensorNDWrap<T, N>(tensorAccess.sampleData(0) + roiOffset,
+                                        static_cast<int>(tensorAccess.sampleStride()),
+                                        static_cast<int>(tensorAccess.rowStride()));
+    }
+}
+
+template<bool kHasDynamicChannels, int kSpatialNDim, typename T, int N = 1 + kSpatialNDim + kHasDynamicChannels>
+std::enable_if_t<kSpatialNDim == 3, cuda::TensorNDWrap<T, N>> WrapTensor(
+    const nvcv::TensorDataAccessStridedImagePlanar &tensorAccess, const ptrdiff_t roiOffset = 0)
+{
+    NVCV_ASSERT(tensorAccess.sampleStride() <= cuda::TypeTraits<int>::max);
+    NVCV_ASSERT(tensorAccess.depthStride() <= cuda::TypeTraits<int>::max);
+    NVCV_ASSERT(tensorAccess.rowStride() <= cuda::TypeTraits<int>::max);
+    NVCV_ASSERT(tensorAccess.colStride() <= cuda::TypeTraits<int>::max);
+
+    if constexpr (kHasDynamicChannels)
+    {
+        return cuda::TensorNDWrap<T, N>(
+            tensorAccess.sampleData(0) + roiOffset, static_cast<int>(tensorAccess.sampleStride()),
+            static_cast<int>(tensorAccess.depthStride()), static_cast<int>(tensorAccess.rowStride()),
+            static_cast<int>(tensorAccess.colStride()));
+    }
+    else
+    {
+        return cuda::TensorNDWrap<T, N>(
+            tensorAccess.sampleData(0) + roiOffset, static_cast<int>(tensorAccess.sampleStride()),
+            static_cast<int>(tensorAccess.depthStride()), static_cast<int>(tensorAccess.rowStride()));
+    }
+}
+
+template<bool kHasDynamicChannels, int kSpatialNDim, typename T, int N = 1 + kSpatialNDim + kHasDynamicChannels>
+std::enable_if_t<kSpatialNDim == 2, cuda::TensorNDWrap<T, N>> WrapTensor(
+    const nvcv::TensorDataAccessStridedImagePlanar &tensorAccess, const VecI<2> &roiOffset)
+{
+    ptrdiff_t offset = tensorAccess.rowStride() * roiOffset.y + tensorAccess.colStride() * roiOffset.x;
+    return WrapTensor<kHasDynamicChannels, kSpatialNDim, T>(tensorAccess, offset);
+}
+
+template<bool kHasDynamicChannels, int kSpatialNDim, typename T, int N = 1 + kSpatialNDim + kHasDynamicChannels>
+std::enable_if_t<kSpatialNDim == 3, cuda::TensorNDWrap<T, N>> WrapTensor(
+    const nvcv::TensorDataAccessStridedImagePlanar &tensorAccess, const VecI<3> &roiOffset)
+{
+    ptrdiff_t offset = tensorAccess.depthStride() * roiOffset.z + tensorAccess.rowStride() * roiOffset.y
+                     + tensorAccess.colStride() * roiOffset.x;
+    return WrapTensor<kHasDynamicChannels, kSpatialNDim, T>(tensorAccess, offset);
+}
+
+template<typename TensorWrap>
+auto __device__ GetSampleView(const TensorWrap &batchTensorWrap, const int sampleIdx)
+{
+    using T                               = typename TensorWrap::ValueType;
+    static constexpr int kNumDimensions   = TensorWrap::kNumDimensions;
+    static constexpr int kNumSampleDim    = kNumDimensions - 1; // not including sample (N) dim
+    static constexpr int kVariableStrides = kNumSampleDim - 1;  // the innermost stride is static - sizeof type
+    using TensorWrapT                     = cuda::TensorNDWrap<T, kNumSampleDim>;
+    static_assert(kVariableStrides == TensorWrapT::kVariableStrides);
+    static_assert(kVariableStrides + 1 == TensorWrap::kVariableStrides);
+    static_assert(1 <= kVariableStrides && kVariableStrides <= 3);
+    auto      *basePtr = batchTensorWrap.ptr(sampleIdx);
+    const int *strides = batchTensorWrap.strides();
+    if constexpr (kVariableStrides == 1)
+    {
+        return TensorWrapT{basePtr, strides[1]};
+    }
+    else if constexpr (kVariableStrides == 2)
+    {
+        return TensorWrapT{basePtr, strides[1], strides[2]};
+    }
+    else if constexpr (kVariableStrides == 3)
+    {
+        return TensorWrapT{basePtr, strides[1], strides[2], strides[3]};
+    }
+}
+
+} // namespace tensor
+
+namespace dynamic {
+struct TensorAccessDesc
+{
+    static constexpr int kMaxNStrides = 3;
+
+    unsigned char *basePtr;
+    int            strides[kMaxNStrides];
+};
+
+template<int kNStrides>
+void SetupTensorAccessStrides(TensorAccessDesc &tensorAccessDesc, const std::array<int64_t, kNStrides> strides)
+{
+    // we ignore the last stride (sample stride), it's not needed for a single sample
+    // as the samples are not assumed to be uniform
+    static constexpr int kNSampleStrides = kNStrides - 1;
+    static_assert(kNSampleStrides <= TensorAccessDesc::kMaxNStrides);
+    for (int d = 0; d < kNSampleStrides; d++)
+    {
+        NVCV_ASSERT(strides[d] <= cuda::TypeTraits<int>::max);
+        tensorAccessDesc.strides[kNSampleStrides - 1 - d] = strides[d];
+    }
+}
+
+/**
+ * @brief Wrapper for batch of dynamically created samples
+ *  (here, batch of intermediate samples between resampling passes)
+ */
+template<typename T, int N>
+struct DynamicBatchWrap
+{
+    using ValueType                       = T;
+    static constexpr int kNumDimensions   = N;
+    static constexpr int kNumSampleDim    = kNumDimensions - 1; // not including sample (N) dim
+    static constexpr int kVariableStrides = kNumSampleDim - 1;  // the innermost stride is static - sizeof type
+    using TensorWrapT                     = cuda::TensorNDWrap<T, kNumSampleDim>;
+    static_assert(kVariableStrides == TensorWrapT::kVariableStrides);
+    static_assert(kVariableStrides >= 1 && kVariableStrides <= TensorAccessDesc::kMaxNStrides);
+
+    DynamicBatchWrap(TensorAccessDesc *samples)
+        : m_samples{samples}
+    {
+    }
+
+    inline __device__ TensorWrapT GetSampleView(const int sampleIdx) const
+    {
+        static_assert(1 <= kVariableStrides && kVariableStrides <= 3);
+
+        auto                 sample  = m_samples[sampleIdx];
+        const unsigned char *basePtr = sample.basePtr;
+
+        if constexpr (kVariableStrides == 1)
+        {
+            return TensorWrapT{basePtr, sample.strides[0]};
+        }
+        else if constexpr (kVariableStrides == 2)
+        {
+            return TensorWrapT{basePtr, sample.strides[0], sample.strides[1]};
+        }
+        else if constexpr (kVariableStrides == 3)
+        {
+            return TensorWrapT{basePtr, sample.strides[0], sample.strides[1], sample.strides[2]};
+        }
+    }
+
+private:
+    TensorAccessDesc *m_samples;
+};
+
+struct DynamicBatchWrapMeta
+{
+    TensorAccessDesc *cpu;
+    TensorAccessDesc *gpu;
+};
+
+inline void AddDynamicBatchWrapMeta(WorkspaceEstimator &est, int numSamples)
+{
+    est.addPinned<TensorAccessDesc>(numSamples);
+    est.addCuda<TensorAccessDesc>(numSamples);
+}
+
+inline DynamicBatchWrapMeta AllocateDynamicBatchWrapMeta(WorkspaceAllocator &allocator, int numSamples)
+{
+    DynamicBatchWrapMeta meta;
+    meta.cpu = allocator.getPinned<TensorAccessDesc>(numSamples);
+    meta.gpu = allocator.getCuda<TensorAccessDesc>(numSamples);
+    return meta;
+}
+
+template<bool kHasDynamicChannels, typename T, typename SampleDescT,
+         int  N = 1 + SampleDescT::kSpatialNDim + kHasDynamicChannels>
+DynamicBatchWrap<T, N> CreateDynamicBatchWrap(int pass, cuda::BaseType<T> *intermediate,
+                                              const DynamicBatchWrapMeta tensorBatchMeta,
+                                              const SampleDescT *sampleDescsCpu, int numSamples, cudaStream_t stream)
+{
+    static constexpr int kSpatialNDim = SampleDescT::kSpatialNDim;
+    static_assert(N == 1 + kSpatialNDim + kHasDynamicChannels);
+
+    ptrdiff_t sampleOffset = 0;
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        const SampleDescT &sampleDesc   = sampleDescsCpu[sampleIdx];
+        VecI<kSpatialNDim> outputShape  = sampleDesc.shapes[pass + 1];
+        TensorAccessDesc  &tensorAccess = tensorBatchMeta.cpu[sampleIdx];
+        tensorAccess.basePtr            = reinterpret_cast<unsigned char *>(intermediate) + sampleOffset;
+        if constexpr (kHasDynamicChannels)
+        {
+            constexpr int kNStrides = kSpatialNDim + 1;
+            auto          strides   = ComputeDenseStrides<T>(outputShape, sampleDesc.channels);
+            SetupTensorAccessStrides<kNStrides>(tensorAccess, strides);
+            sampleOffset += strides[kNStrides - 1];
+        }
+        else if constexpr (!kHasDynamicChannels)
+        {
+            constexpr int kNStrides = kSpatialNDim;
+            auto          strides   = ComputeDenseStrides<T>(outputShape);
+            SetupTensorAccessStrides<kNStrides>(tensorAccess, strides);
+            sampleOffset += strides[kNStrides - 1];
+        }
+    }
+    NVCV_CHECK_THROW(cudaMemcpyAsync(tensorBatchMeta.gpu, tensorBatchMeta.cpu, numSamples * sizeof(TensorAccessDesc),
+                                     cudaMemcpyHostToDevice, stream));
+
+    return {tensorBatchMeta.gpu};
+}
+} // namespace dynamic
+
+template<typename T>
+struct ImageBatchVarShapeWrapAdapter
+{
+    using ValueType                       = T;
+    static constexpr int kNumDimensions   = 3; // NHW
+    static constexpr int kNumSampleDim    = 2; // HW
+    static constexpr int kVariableStrides = 1; // the innermost stride is static - sizeof type
+    using TensorWrapT                     = cuda::TensorNDWrap<T, kNumSampleDim>;
+    static_assert(kVariableStrides == TensorWrapT::kVariableStrides);
+
+    ImageBatchVarShapeWrapAdapter(const nvcv::ImageBatchVarShapeDataStridedCuda &batchData)
+        : m_batch{cuda::ImageBatchVarShapeWrap<T>{batchData}}
+    {
+    }
+
+    inline __device__ TensorWrapT GetSampleView(const int sampleIdx, const VecI<2> roi) const
+    {
+        return TensorWrapT{m_batch.ptr(sampleIdx, 0, roi.y, roi.x), m_batch.rowStride(sampleIdx)};
+    }
+
+    inline __device__ TensorWrapT GetSampleView(const int sampleIdx) const
+    {
+        return TensorWrapT{m_batch.ptr(sampleIdx, 0, 0, 0), m_batch.rowStride(sampleIdx)};
+    }
+
+private:
+    cuda::ImageBatchVarShapeWrap<T> m_batch;
+};
+
+template<typename T, int N>
+struct TensorBatchWrapAdapter
+{
+    using ValueType                       = T;
+    static constexpr int kNumDimensions   = N;
+    static constexpr int kNumSampleDim    = kNumDimensions - 1; // not including sample (N) dim
+    static constexpr int kVariableStrides = kNumSampleDim - 1;
+    using TensorWrapT                     = cuda::TensorNDWrap<T, kNumSampleDim>;
+    using TensorBatchWrapT                = cuda::TensorBatchNDWrap<T, kNumSampleDim>;
+    static_assert(kVariableStrides == TensorWrapT::kVariableStrides);
+    static_assert(kVariableStrides == TensorBatchWrapT::kVariableStrides);
+
+    TensorBatchWrapAdapter(const nvcv::TensorBatchDataStridedCuda &batchData)
+        : m_batch{TensorBatchWrapT{batchData}}
+    {
+    }
+
+    inline __device__ TensorWrapT GetSampleView(const int sampleIdx, const VecI<2> roi) const
+    {
+        return TensorWrapT{m_batch.ptr(sampleIdx, roi.y, roi.x), m_batch.strides(sampleIdx)};
+    }
+
+    inline __device__ TensorWrapT GetSampleView(const int sampleIdx, const VecI<3> roi) const
+    {
+        return TensorWrapT{m_batch.ptr(sampleIdx, roi.z, roi.y, roi.x), m_batch.strides(sampleIdx)};
+    }
+
+    inline __device__ TensorWrapT GetSampleView(const int sampleIdx) const
+    {
+        return m_batch.tensor(sampleIdx);
+    }
+
+private:
+    TensorBatchWrapT m_batch;
+};
+} // namespace cvcuda::priv::hq_resize::batch_wrapper
+#endif // CVCUDA_PRIV_HQ_RESIZE_BATCH_WRAP_CUH
diff --git a/src/cvcuda/priv/OpHQResizeFilter.cuh b/src/cvcuda/priv/OpHQResizeFilter.cuh
new file mode 100644
index 000000000..e32f5d270
--- /dev/null
+++ b/src/cvcuda/priv/OpHQResizeFilter.cuh
@@ -0,0 +1,402 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef CVCUDA_PRIV_HQ_RESIZE_FILTER_CUH
+#define CVCUDA_PRIV_HQ_RESIZE_FILTER_CUH
+
+#include <cuda_runtime.h>
+#include <cvcuda/Types.h> // for NVCVInterpolationType, etc.
+#include <nvcv/Exception.hpp>
+#include <util/Assert.h>
+#include <util/CheckError.hpp>
+#include <util/Math.hpp>
+
+#include <functional>
+#include <memory>
+#include <tuple>
+#include <type_traits>
+
+/* This file implements ResamplingFiltersFactory.
+   The class precomputes the coefficients for the supported filter kinds once
+   and stores them in the memory of current device.
+   Then, the facotory can be used to create ResamplingFilters
+   that interpolates the coefficients for a given support size.*/
+namespace cvcuda::priv::hq_resize::filter {
+
+/**
+ * @brief Internally supported filters.
+ *
+ * Triangual is short for Linear + antialias (that requiers precomupting an explicit filter's support)
+ */
+enum class FilterType : uint8_t
+{
+    Nearest,
+    Linear,
+    Triangular,
+    Gaussian,
+    Cubic,
+    Lanczos3,
+};
+
+/**
+ * @brief Internally supported kinds of filters - all the FilterTypes that
+ * require coefficients kept in shared memory are mapped to the same kind:
+ * `ShmFilter`.
+ *
+ */
+enum class FilterTypeKind : uint8_t
+{
+    Nearest,
+    Linear,
+    ShmFilter,
+};
+
+inline FilterTypeKind GetFilterTypeKind(FilterType filterType)
+{
+    FilterTypeKind filterKind;
+    switch (filterType)
+    {
+    case FilterType::Nearest:
+        filterKind = FilterTypeKind::Nearest;
+        break;
+    case FilterType::Linear:
+        filterKind = FilterTypeKind::Linear;
+        break;
+    default:
+        filterKind = FilterTypeKind::ShmFilter;
+        break;
+    }
+    return filterKind;
+}
+
+struct FilterMode
+{
+    FilterType filterType;
+    bool       antialias;
+};
+
+inline FilterMode GetFilterMode(NVCVInterpolationType interpolation, bool antialias)
+
+{
+    FilterType filterType;
+    switch (interpolation)
+    {
+    case NVCV_INTERP_NEAREST:
+        filterType = FilterType::Nearest;
+        break;
+    case NVCV_INTERP_LINEAR:
+        filterType = FilterType::Linear;
+        break;
+    case NVCV_INTERP_CUBIC:
+        filterType = FilterType::Cubic;
+        break;
+    case NVCV_INTERP_LANCZOS:
+        filterType = FilterType::Lanczos3;
+        break;
+    case NVCV_INTERP_GAUSSIAN:
+        filterType = FilterType::Gaussian;
+        break;
+    default:
+        throw nvcv::Exception(nvcv::Status::ERROR_NOT_IMPLEMENTED,
+                              "The resize operator does not support the selected interpolation method");
+    }
+    if (antialias && filterType == FilterType::Linear)
+    {
+        filterType = FilterType::Triangular;
+    }
+    return {filterType, antialias};
+}
+
+inline std::tuple<FilterMode, FilterMode> GetFilterModes(NVCVInterpolationType minInterpolation,
+                                                         NVCVInterpolationType magInterpolation, bool antialias)
+{
+    std::tuple<FilterMode, FilterMode> modes;
+    auto &[minFilter, magFilter] = modes;
+    minFilter                    = GetFilterMode(minInterpolation, antialias);
+    magFilter                    = GetFilterMode(magInterpolation, false);
+    return modes;
+}
+
+struct ResamplingFilter
+{
+    float *coeffs;
+    int    numCoeffs;
+    float  anchor; // support / 2
+    float  scale;  // (numCoeffs - 1) / support
+
+    void rescale(float support)
+    {
+        float old_scale = scale;
+        scale           = (numCoeffs - 1) / support;
+        anchor          = anchor * old_scale / scale;
+    }
+
+    __host__ __device__ int support() const
+    {
+        return ceilf((numCoeffs - 1) / scale);
+    }
+
+    __device__ float operator()(float x) const
+    {
+        if (!(x > -1)) // negative and NaN arguments
+            return 0;
+        if (x >= numCoeffs)
+            return 0;
+        int   x0 = floorf(x);
+        int   x1 = x0 + 1;
+        float d  = x - x0;
+        float f0 = x0 < 0 ? 0.0f : __ldg(coeffs + x0);
+        float f1 = x1 >= numCoeffs ? 0.0f : __ldg(coeffs + x1);
+        return f0 + d * (f1 - f0);
+    }
+};
+
+static_assert(std::is_pod_v<ResamplingFilter>);
+
+inline float LanczosWindow(float x, float a)
+{
+    if (fabsf(x) >= a)
+        return 0.0f;
+    return nvcv::util::sinc(x) * nvcv::util::sinc(x / a);
+}
+
+inline float CubicWindow(float x)
+{
+    x = fabsf(x);
+    if (x >= 2)
+        return 0;
+
+    float x2 = x * x;
+    float x3 = x2 * x;
+    if (x > 1)
+        return -0.5f * x3 + 2.5f * x2 - 4.0f * x + 2.0f;
+    else
+        return 1.5f * x3 - 2.5f * x2 + 1.0f;
+}
+
+template<typename Function>
+inline void InitFilter(ResamplingFilter &filter, Function F)
+{
+    for (int i = 0; i < filter.numCoeffs; i++) filter.coeffs[i] = F(i);
+}
+
+inline void InitTriangularFilter(ResamplingFilter filter)
+{
+    filter.coeffs[0] = 0;
+    filter.coeffs[1] = 1;
+    filter.coeffs[2] = 0;
+}
+
+inline void InitGaussianFilter(ResamplingFilter filter)
+{
+    InitFilter(filter,
+               [&](int i)
+               {
+                   float x = 4 * (i - (filter.numCoeffs - 1) * 0.5f) / (filter.numCoeffs - 1);
+                   return expf(-x * x);
+               });
+}
+
+inline void InitLanczosFilter(ResamplingFilter filter, float a)
+{
+    InitFilter(filter,
+               [&](int i)
+               {
+                   float x = 2 * a * (i - (filter.numCoeffs - 1) * 0.5f) / (filter.numCoeffs - 1);
+                   return LanczosWindow(x, a);
+               });
+    filter.rescale(6); // rescaling to the minimal allowed support
+}
+
+inline void InitCubicFilter(ResamplingFilter filter)
+{
+    InitFilter(filter,
+               [&](int i)
+               {
+                   float x = 4 * (i - (filter.numCoeffs - 1) * 0.5f) / (filter.numCoeffs - 1);
+                   return CubicWindow(x);
+               });
+    filter.rescale(4); // rescaling to the minimal allowed support
+}
+
+class ResamplingFiltersFactory
+{
+public:
+    enum FilterIdx
+    {
+        Idx_Triangular = 0,
+        Idx_Gaussian,
+        Idx_Lanczos3,
+        Idx_Cubic,
+        kNumFilters
+    };
+
+    static constexpr int kLanczosResolution = 32;
+    static constexpr int kLanczosA          = 3;
+
+    static constexpr int kTriangularSize = 3;
+    static constexpr int kGaussianSize   = 65;
+    static constexpr int kCubicSize      = 129;
+    static constexpr int kLanczosSize    = (2 * kLanczosA * kLanczosResolution + 1);
+
+    static constexpr int kTotalSize = kTriangularSize + kGaussianSize + kCubicSize + kLanczosSize;
+
+    ResamplingFiltersFactory()
+        : m_deviceId{[]()
+                     {
+                         int deviceId;
+                         NVCV_CHECK_THROW(cudaGetDevice(&deviceId));
+                         return deviceId;
+                     }()}
+
+    {
+        // Pinned memory is needed for proper synchronization of the synchronous copy
+        std::unique_ptr<float, std::function<void(void *)>> filterDataPinned;
+        {
+            float *ptr = nullptr;
+            NVCV_CHECK_THROW(cudaMallocHost(&ptr, kTotalSize * sizeof(float)));
+            filterDataPinned = {ptr, [](void *ptr)
+                                {
+                                    NVCV_CHECK_THROW(cudaFreeHost(ptr));
+                                }};
+        }
+        {
+            float *ptr = nullptr;
+            NVCV_CHECK_THROW(cudaMalloc(&ptr, kTotalSize * sizeof(float)));
+            m_filterDataGpu = {ptr, [](void *ptr)
+                               {
+                                   NVCV_CHECK_THROW(cudaFree(ptr));
+                               }};
+        }
+        auto addFilter = [&](FilterIdx filterIdx, int size)
+        {
+            float *base          = filterIdx == 0 ? filterDataPinned.get()
+                                                  : m_filters[filterIdx - 1].coeffs + m_filters[filterIdx - 1].numCoeffs;
+            m_filters[filterIdx] = {base, size, 1, (size - 1) * 0.5f};
+        };
+        addFilter(Idx_Triangular, kTriangularSize);
+        InitTriangularFilter(m_filters[Idx_Triangular]);
+        addFilter(Idx_Gaussian, kGaussianSize);
+        InitGaussianFilter(m_filters[Idx_Gaussian]);
+        addFilter(Idx_Lanczos3, kLanczosSize);
+        InitLanczosFilter(m_filters[Idx_Lanczos3], kLanczosA);
+        addFilter(Idx_Cubic, kCubicSize);
+        InitCubicFilter(m_filters[Idx_Cubic]);
+
+        // According to cuda-driver-api: For transfers from pinned host memory to device memory,
+        // the cudaMemcpy is synchronous with respect to the host.
+        NVCV_CHECK_THROW(cudaMemcpy(m_filterDataGpu.get(), filterDataPinned.get(), kTotalSize * sizeof(float),
+                                    cudaMemcpyHostToDevice));
+        // Set the pointers to the corresponding offsets in m_filterDataGpu
+        ptrdiff_t diff = m_filterDataGpu.get() - filterDataPinned.get();
+        for (auto &f : m_filters)
+        {
+            f.coeffs += diff;
+        }
+    }
+
+    ResamplingFilter CreateCubic(float radius = 2.0f) const noexcept
+    {
+        validateDeviceId();
+        auto flt = m_filters[Idx_Cubic];
+        flt.rescale(2.0f * std::max(2.0f, radius));
+        return flt;
+    }
+
+    ResamplingFilter CreateGaussian(float sigma) const noexcept
+    {
+        validateDeviceId();
+        auto flt = m_filters[Idx_Gaussian];
+        flt.rescale(std::max(1.0f, static_cast<float>(4 * M_SQRT2) * sigma));
+        return flt;
+    }
+
+    ResamplingFilter CreateLanczos3(float radius = 3.0f) const noexcept
+    {
+        validateDeviceId();
+        auto flt = m_filters[Idx_Lanczos3];
+        flt.rescale(2.0f * std::max(3.0f, radius));
+        return flt;
+    }
+
+    ResamplingFilter CreateTriangular(float radius) const noexcept
+    {
+        validateDeviceId();
+        auto flt = m_filters[Idx_Triangular];
+        flt.rescale(std::max(1.0f, 2 * radius));
+        return flt;
+    }
+
+private:
+    void validateDeviceId() const
+    {
+        int deviceId;
+        NVCV_CHECK_THROW(cudaGetDevice(&deviceId));
+        if (deviceId != m_deviceId)
+        {
+            throw nvcv::Exception(nvcv::Status::ERROR_DEVICE,
+                                  "The HQ resize operator was initialized and called with different current device.");
+        }
+    }
+
+    int                                                 m_deviceId;
+    std::unique_ptr<float, std::function<void(void *)>> m_filterDataGpu;
+    ResamplingFilter                                    m_filters[kNumFilters];
+};
+
+inline ResamplingFilter GetResamplingFilter(const ResamplingFiltersFactory &filtersFactory,
+                                            const FilterMode &filterMode, const float inSize, const float outSize)
+{
+    bool antialias = filterMode.antialias && (outSize < inSize);
+    switch (filterMode.filterType)
+    {
+    case FilterType::Linear:
+    {
+        return filtersFactory.CreateTriangular(1);
+    }
+    break;
+    case FilterType::Triangular:
+    {
+        const float radius = antialias ? inSize / outSize : 1;
+        return filtersFactory.CreateTriangular(radius);
+    }
+    break;
+    case FilterType::Gaussian:
+    {
+        const float radius = antialias ? inSize / outSize : 1;
+        return filtersFactory.CreateGaussian(radius * 0.5f / M_SQRT2);
+    }
+    break;
+    case FilterType::Cubic:
+    {
+        const float radius = antialias ? (2 * inSize / outSize) : 2;
+        return filtersFactory.CreateCubic(radius);
+    }
+    break;
+    case FilterType::Lanczos3:
+    {
+        const float radius = antialias ? (3 * inSize / outSize) : 3;
+        return filtersFactory.CreateLanczos3(radius);
+    }
+    default: // Nearest neighbour
+    {
+        return {nullptr, 0, 0, 1};
+    }
+    }
+}
+
+} // namespace cvcuda::priv::hq_resize::filter
+#endif // CVCUDA_PRIV_HQ_RESIZE_FILTER_CUH
diff --git a/src/cvcuda/priv/legacy/channel_reorder_var_shape.cu b/src/cvcuda/priv/legacy/channel_reorder_var_shape.cu
index 93688bd23..af21bd1a9 100644
--- a/src/cvcuda/priv/legacy/channel_reorder_var_shape.cu
+++ b/src/cvcuda/priv/legacy/channel_reorder_var_shape.cu
@@ -92,6 +92,12 @@ ErrorCode ChannelReorderVarShape::infer(const ImageBatchVarShapeDataStridedCuda
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
+    if (inData.numImages() == 0)
+    {
+        // nothing to do, move above the calling of GetLegacyDataType to avoid error: "All planes must have the same data type"
+        return ErrorCode::SUCCESS;
+    }
+
     DataType data_type;
     int      channels;
     {
@@ -113,6 +119,12 @@ ErrorCode ChannelReorderVarShape::infer(const ImageBatchVarShapeDataStridedCuda
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
+    if (helpers::GetLegacyDataType(orderData.dtype()) != kCV_32S)
+    {
+        LOG_ERROR("Invalid Order tensor DataType " << helpers::GetLegacyDataType(orderData.dtype()));
+        return ErrorCode::INVALID_DATA_SHAPE;
+    }
+
     if (orderData.layout()[0] != nvcv::LABEL_BATCH)
     {
         LOG_ERROR("Label of the first dimension of order tensor must be " << nvcv::LABEL_BATCH);
@@ -144,14 +156,14 @@ ErrorCode ChannelReorderVarShape::infer(const ImageBatchVarShapeDataStridedCuda
 
         if (outFmt.numPlanes() != 1)
         {
-            LOG_ERROR("Format of input image #" << i << " must have only 1 plane");
+            LOG_ERROR("Format of output image #" << i << " must have only 1 plane");
             return ErrorCode::INVALID_DATA_FORMAT;
         }
 
         // Legacy code has this check, let's stick to it.
         if (inFmt.numChannels() != channels)
         {
-            LOG_ERROR("Invalid input");
+            LOG_ERROR("Input channel " << inFmt.numChannels() << " differs from " << channels);
             return ErrorCode::INVALID_DATA_SHAPE;
         }
 
@@ -185,12 +197,6 @@ ErrorCode ChannelReorderVarShape::infer(const ImageBatchVarShapeDataStridedCuda
         }
     }
 
-    if (inData.numImages() == 0)
-    {
-        // nothing to do
-        return ErrorCode::SUCCESS;
-    }
-
     typedef void (*func_t)(const ImageBatchVarShapeDataStridedCuda &inData,
                            const ImageBatchVarShapeDataStridedCuda &outData, const TensorDataStridedCuda &orderData,
                            int numChannels, cudaStream_t stream);
diff --git a/src/cvcuda/priv/legacy/cvt_color_var_shape.cu b/src/cvcuda/priv/legacy/cvt_color_var_shape.cu
index 2dc01bbd2..0d469ca71 100644
--- a/src/cvcuda/priv/legacy/cvt_color_var_shape.cu
+++ b/src/cvcuda/priv/legacy/cvt_color_var_shape.cu
@@ -746,8 +746,9 @@ inline ErrorCode BGR_to_RGB(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_FORMAT;
     }
 
-    int      channels  = inData.uniqueFormat().numChannels();
-    DataType data_type = helpers::GetLegacyDataType(inData.uniqueFormat());
+    int      channels      = inData.uniqueFormat().numChannels();
+    DataType data_type     = helpers::GetLegacyDataType(inData.uniqueFormat());
+    DataType out_data_type = helpers::GetLegacyDataType(outData.uniqueFormat());
 
     if (channels != sch)
     {
@@ -755,6 +756,12 @@ inline ErrorCode BGR_to_RGB(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
+    if (data_type != out_data_type)
+    {
+        LOG_ERROR("Unsupported input/output DataType " << data_type << "/" << out_data_type);
+        return ErrorCode::INVALID_DATA_TYPE;
+    }
+
     if (!outData.uniqueFormat())
     {
         LOG_ERROR("Images in the output batch must all have the same format");
@@ -837,8 +844,9 @@ inline ErrorCode GRAY_to_BGR(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_FORMAT;
     }
 
-    int      channels  = inData.uniqueFormat().numChannels();
-    DataType data_type = helpers::GetLegacyDataType(inData.uniqueFormat());
+    int      channels      = inData.uniqueFormat().numChannels();
+    DataType data_type     = helpers::GetLegacyDataType(inData.uniqueFormat());
+    DataType out_data_type = helpers::GetLegacyDataType(outData.uniqueFormat());
 
     if (channels != 1)
     {
@@ -846,6 +854,12 @@ inline ErrorCode GRAY_to_BGR(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
+    if (data_type != out_data_type)
+    {
+        LOG_ERROR("Unsupported input/output DataType " << data_type << "/" << out_data_type);
+        return ErrorCode::INVALID_DATA_TYPE;
+    }
+
     if (!outData.uniqueFormat())
     {
         LOG_ERROR("Images in the output batch must all have the same format");
@@ -929,8 +943,9 @@ inline ErrorCode BGR_to_GRAY(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_FORMAT;
     }
 
-    int      channels  = inData.uniqueFormat().numChannels();
-    DataType data_type = helpers::GetLegacyDataType(inData.uniqueFormat());
+    int      channels      = inData.uniqueFormat().numChannels();
+    DataType data_type     = helpers::GetLegacyDataType(inData.uniqueFormat());
+    DataType out_data_type = helpers::GetLegacyDataType(outData.uniqueFormat());
 
     if (channels != sch)
     {
@@ -938,6 +953,12 @@ inline ErrorCode BGR_to_GRAY(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
+    if (data_type != out_data_type)
+    {
+        LOG_ERROR("Unsupported input/output DataType " << data_type << "/" << out_data_type);
+        return ErrorCode::INVALID_DATA_TYPE;
+    }
+
     if (!outData.uniqueFormat())
     {
         LOG_ERROR("Images in the output batch must all have the same format");
@@ -1004,8 +1025,9 @@ inline ErrorCode BGR_to_YUV(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_FORMAT;
     }
 
-    int      channels  = inData.uniqueFormat().numChannels();
-    DataType data_type = helpers::GetLegacyDataType(inData.uniqueFormat());
+    int      channels      = inData.uniqueFormat().numChannels();
+    DataType data_type     = helpers::GetLegacyDataType(inData.uniqueFormat());
+    DataType out_data_type = helpers::GetLegacyDataType(outData.uniqueFormat());
 
     if (channels != 3)
     {
@@ -1013,6 +1035,12 @@ inline ErrorCode BGR_to_YUV(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
+    if (data_type != out_data_type)
+    {
+        LOG_ERROR("Unsupported input/output DataType " << data_type << "/" << out_data_type);
+        return ErrorCode::INVALID_DATA_TYPE;
+    }
+
     if (!outData.uniqueFormat())
     {
         LOG_ERROR("Images in the output batch must all have the same format");
@@ -1079,8 +1107,9 @@ inline ErrorCode YUV_to_BGR(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_FORMAT;
     }
 
-    int      channels  = inData.uniqueFormat().numChannels();
-    DataType data_type = helpers::GetLegacyDataType(inData.uniqueFormat());
+    int      channels      = inData.uniqueFormat().numChannels();
+    DataType data_type     = helpers::GetLegacyDataType(inData.uniqueFormat());
+    DataType out_data_type = helpers::GetLegacyDataType(outData.uniqueFormat());
 
     if (channels != 3)
     {
@@ -1088,6 +1117,12 @@ inline ErrorCode YUV_to_BGR(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
+    if (data_type != out_data_type)
+    {
+        LOG_ERROR("Unsupported input/output DataType " << data_type << "/" << out_data_type);
+        return ErrorCode::INVALID_DATA_TYPE;
+    }
+
     if (!outData.uniqueFormat())
     {
         LOG_ERROR("Images in the output batch must all have the same format");
@@ -1155,8 +1190,9 @@ inline ErrorCode BGR_to_HSV(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_FORMAT;
     }
 
-    int      channels  = inData.uniqueFormat().numChannels();
-    DataType data_type = helpers::GetLegacyDataType(inData.uniqueFormat());
+    int      channels      = inData.uniqueFormat().numChannels();
+    DataType data_type     = helpers::GetLegacyDataType(inData.uniqueFormat());
+    DataType out_data_type = helpers::GetLegacyDataType(outData.uniqueFormat());
 
     if (channels != 3)
     {
@@ -1164,6 +1200,12 @@ inline ErrorCode BGR_to_HSV(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
+    if (data_type != out_data_type)
+    {
+        LOG_ERROR("Unsupported input/output DataType " << data_type << "/" << out_data_type);
+        return ErrorCode::INVALID_DATA_TYPE;
+    }
+
     if (!outData.uniqueFormat())
     {
         LOG_ERROR("Images in the output batch must all have the same format");
@@ -1223,8 +1265,9 @@ inline ErrorCode HSV_to_BGR(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_FORMAT;
     }
 
-    int      channels  = inData.uniqueFormat().numChannels();
-    DataType data_type = helpers::GetLegacyDataType(inData.uniqueFormat());
+    int      channels      = inData.uniqueFormat().numChannels();
+    DataType data_type     = helpers::GetLegacyDataType(inData.uniqueFormat());
+    DataType out_data_type = helpers::GetLegacyDataType(outData.uniqueFormat());
 
     if (channels != 3)
     {
@@ -1232,6 +1275,12 @@ inline ErrorCode HSV_to_BGR(const ImageBatchVarShapeDataStridedCuda &inData,
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
+    if (data_type != out_data_type)
+    {
+        LOG_ERROR("Unsupported input/output DataType " << data_type << "/" << out_data_type);
+        return ErrorCode::INVALID_DATA_TYPE;
+    }
+
     if (!outData.uniqueFormat())
     {
         LOG_ERROR("Images in the output batch must all have the same format");
diff --git a/src/cvcuda/priv/legacy/gaussian_noise.cu b/src/cvcuda/priv/legacy/gaussian_noise.cu
index e9da8c573..77d09fef1 100644
--- a/src/cvcuda/priv/legacy/gaussian_noise.cu
+++ b/src/cvcuda/priv/legacy/gaussian_noise.cu
@@ -199,7 +199,7 @@ GaussianNoise::GaussianNoise(DataShape max_input_shape, DataShape max_output_sha
     if (maxBatchSize < 0)
     {
         LOG_ERROR("Invalid num of max batch size " << maxBatchSize);
-        throw std::runtime_error("Parameter error!");
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Parameter error!");
     }
     cudaError_t err = cudaMalloc((void **)&m_states, sizeof(curandState) * BLOCK * maxBatchSize);
     if (err != cudaSuccess)
@@ -253,7 +253,7 @@ ErrorCode GaussianNoise::infer(const TensorDataStridedCuda &inData, const Tensor
     DataType out_data_type = GetLegacyDataType(outData.dtype());
     if (in_data_type != out_data_type)
     {
-        LOG_ERROR("Invalid DataType " << out_data_type);
+        LOG_ERROR("DataType of input and output must be equal, but got " << in_data_type << " and " << out_data_type);
         return ErrorCode::INVALID_DATA_TYPE;
     }
 
diff --git a/src/cvcuda/priv/legacy/gaussian_noise_var_shape.cu b/src/cvcuda/priv/legacy/gaussian_noise_var_shape.cu
index 70b4f8ab2..c97515cfa 100644
--- a/src/cvcuda/priv/legacy/gaussian_noise_var_shape.cu
+++ b/src/cvcuda/priv/legacy/gaussian_noise_var_shape.cu
@@ -258,7 +258,7 @@ ErrorCode GaussianNoiseVarShape::infer(const ImageBatchVarShapeDataStridedCuda &
     DataType out_data_type = helpers::GetLegacyDataType(outData.uniqueFormat());
     if (in_data_type != out_data_type)
     {
-        LOG_ERROR("Invalid DataType " << out_data_type);
+        LOG_ERROR("DataType of input and output must be equal, but got " << in_data_type << " and " << out_data_type);
         return ErrorCode::INVALID_DATA_TYPE;
     }
 
diff --git a/src/cvcuda/priv/legacy/inpaint.cu b/src/cvcuda/priv/legacy/inpaint.cu
index 077fc6643..909c2c987 100644
--- a/src/cvcuda/priv/legacy/inpaint.cu
+++ b/src/cvcuda/priv/legacy/inpaint.cu
@@ -634,7 +634,7 @@ ErrorCode Inpaint::infer(const TensorDataStridedCuda &inData, const TensorDataSt
 
     if (in_data_type != out_data_type)
     {
-        LOG_ERROR("Invalid DataType " << out_data_type);
+        LOG_ERROR("DataType of input and output must be equal, but got " << in_data_type << " and " << out_data_type);
         return ErrorCode::INVALID_DATA_TYPE;
     }
 
diff --git a/src/cvcuda/priv/legacy/median_blur_var_shape.cu b/src/cvcuda/priv/legacy/median_blur_var_shape.cu
index 0c65efbab..3abf39adb 100644
--- a/src/cvcuda/priv/legacy/median_blur_var_shape.cu
+++ b/src/cvcuda/priv/legacy/median_blur_var_shape.cu
@@ -91,7 +91,7 @@ __device__ T fetch(T *shared, const cuda::ImageBatchVarShapeWrapNHWC<T> src, int
  */
 template<typename T>
 __global__ void median(const cuda::ImageBatchVarShapeWrapNHWC<T> src, cuda::ImageBatchVarShapeWrapNHWC<T> dst,
-                       const cuda::Tensor2DWrap<int> ksize)
+                       cuda::Tensor1DWrap<const int2> ksize)
 {
 #define fetch_(gx, gy, block_size) \
     fetch<T>(tails, src, batchIdx, h, w, channel, blockX, blockY, (gx), (gy), (block_size))
@@ -105,8 +105,10 @@ __global__ void median(const cuda::ImageBatchVarShapeWrapNHWC<T> src, cuda::Imag
     int channel  = blockIdx.z % dst.numChannels();
     int batchIdx = blockIdx.z / dst.numChannels();
     int h = src.height(batchIdx), w = src.width(batchIdx);
-    int kWidth  = *ksize.ptr(batchIdx, 0); //kWidths[batchIdx];
-    int kHeight = *ksize.ptr(batchIdx, 1); //kHeights[batchIdx];
+
+    int2 kernelSize = ksize[batchIdx];
+    int  kWidth     = kernelSize.x;
+    int  kHeight    = kernelSize.y;
 
     __shared__ T tails[GENERAL_KERNEL_BLOCK * GENERAL_KERNEL_BLOCK];
     if (x < w && y < h)
@@ -277,7 +279,7 @@ __inline__ __device__ T placePivot(T *arr, int length)
 
 template<typename T>
 __global__ void medianForSmallKernel(const cuda::ImageBatchVarShapeWrapNHWC<T> src,
-                                     cuda::ImageBatchVarShapeWrapNHWC<T> dst, const cuda::Tensor2DWrap<int> ksize)
+                                     cuda::ImageBatchVarShapeWrapNHWC<T> dst, cuda::Tensor1DWrap<const int2> ksize)
 {
     int tx = threadIdx.x, ty = threadIdx.y;
     int blockX   = blockIdx.x * blockDim.x;
@@ -287,8 +289,10 @@ __global__ void medianForSmallKernel(const cuda::ImageBatchVarShapeWrapNHWC<T> s
     int channel  = blockIdx.z % dst.numChannels();
     int batchIdx = blockIdx.z / dst.numChannels();
     int h = src.height(batchIdx), w = src.width(batchIdx);
-    int kWidth  = *ksize.ptr(batchIdx, 0); //kWidths[batchIdx];
-    int kHeight = *ksize.ptr(batchIdx, 1); //kHeights[batchIdx];
+
+    int2 kernelSize = ksize[batchIdx];
+    int  kWidth     = kernelSize.x;
+    int  kHeight    = kernelSize.y;
 
     __shared__ T tails[SMALL_KERNEL_BLOCK * SMALL_KERNEL_BLOCK];
     if (x < w && y < h)
@@ -350,8 +354,6 @@ void median(const ImageBatchVarShapeDataStridedCuda &in, const ImageBatchVarShap
     cuda::ImageBatchVarShapeWrapNHWC<T> src(in, channels);
     cuda::ImageBatchVarShapeWrapNHWC<T> dst(out, channels);
 
-    cuda::Tensor2DWrap<int> ksizePtr(ksize);
-
 #ifdef CUDA_DEBUG_LOG
     checkCudaErrors(cudaStreamSynchronize(stream));
     checkCudaErrors(cudaGetLastError());
diff --git a/src/cvcuda/priv/legacy/min_area_rect.cu b/src/cvcuda/priv/legacy/min_area_rect.cu
index 384c011cb..cdf463249 100644
--- a/src/cvcuda/priv/legacy/min_area_rect.cu
+++ b/src/cvcuda/priv/legacy/min_area_rect.cu
@@ -1,4 +1,4 @@
-/* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+/* Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  *
  * SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
  * SPDX-License-Identifier: Apache-2.0
@@ -42,7 +42,6 @@ void calculateRotateCoefCUDA(cuda::Tensor2DWrap<float> rotateCoefBuf, const int
 template<typename T>
 __global__ void resetRotatedPointsBuf(cuda::Tensor3DWrap<T> rotatedPointsTensor, const int numOfDegrees)
 {
-    // int pointIdx    = blockIdx.x * blockDim.x + threadIdx.x;
     int contourIdx = blockIdx.x;
     int angleIdx   = threadIdx.x;
     if (angleIdx < numOfDegrees)
@@ -124,56 +123,83 @@ template<typename TensorWrapper>
 __global__ void findMinAreaAndAngle(TensorWrapper rotatedPointsTensor, cuda::Tensor2DWrap<float> outMinAreaRectBox,
                                     const int numOfDegrees)
 {
+    // Determine the angle index from the thread's X-dimension index.
     int angleIdx = threadIdx.x;
+
+    // If the angle index exceeds the number of degrees, exit the thread to avoid out-of-bounds access.
     if (angleIdx > numOfDegrees)
     {
         return;
     }
 
+    // Determine the rectangle index from the block's X-dimension index.
     int                   rectIdx = blockIdx.x;
     extern __shared__ int areaAngleBuf_sm[];
-    areaAngleBuf_sm[2 * angleIdx]     = *rotatedPointsTensor.ptr(rectIdx, angleIdx, 4);
-    areaAngleBuf_sm[2 * angleIdx + 1] = *rotatedPointsTensor.ptr(rectIdx, angleIdx, 5);
+    // Load area and angle data from the input tensor into shared memory for efficient access.
+    // rotatedPointsTensor is a 3D tensor with dimensions <int>(rectIdx (N), angleIdx (0-90), 6).
+    areaAngleBuf_sm[2 * angleIdx]       = *rotatedPointsTensor.ptr(rectIdx, angleIdx, 4);
+    areaAngleBuf_sm[(2 * angleIdx) + 1] = *rotatedPointsTensor.ptr(rectIdx, angleIdx, 5);
+
+    // Synchronize threads within a block to ensure shared memory is fully populated.
     __syncthreads();
 
+    // Iterate over strides, halving the stride each time, for a parallel reduction algorithm.
+    // Each thread in the block will compare the area of the rectangle at its current angleInxed (threadIdx.x)
     for (int stride = numOfDegrees / 2; stride > 0; stride >>= 1)
     {
+        // Only process elements within the current stride length.
         if (angleIdx < stride)
         {
+            // Pointers to the current and next elements in the shared memory for comparison.
             int *curAreaIdx   = &areaAngleBuf_sm[2 * angleIdx];
             int *nextAreaIdx  = &areaAngleBuf_sm[2 * (angleIdx + stride)];
             int *curAngleIdx  = &areaAngleBuf_sm[2 * angleIdx + 1];
             int *nextAngleIdx = &areaAngleBuf_sm[2 * (angleIdx + stride) + 1];
+
+            // Compare and store the minimum area and corresponding angle.
             if (*curAreaIdx > *nextAreaIdx)
             {
                 *curAreaIdx  = *nextAreaIdx;
                 *curAngleIdx = *nextAngleIdx;
             }
         }
+
+        // Synchronize threads within a block after each iteration.
         __syncthreads();
 
+        // Handle the case when stride is odd, ensuring the first element is the minimum.
         if (stride % 2 == 1 && areaAngleBuf_sm[0] > areaAngleBuf_sm[2 * (stride - 1)])
         {
             areaAngleBuf_sm[0] = areaAngleBuf_sm[2 * (stride - 1)];
             areaAngleBuf_sm[1] = areaAngleBuf_sm[2 * (stride - 1) + 1];
         }
+
+        // Synchronize threads within a block after handling the odd stride case.
         __syncthreads();
     }
+
+    // Handle the case for odd number of degrees.
     if (numOfDegrees % 2 == 1 && areaAngleBuf_sm[0] > areaAngleBuf_sm[2 * (numOfDegrees - 1)])
     {
         areaAngleBuf_sm[0] = areaAngleBuf_sm[2 * (numOfDegrees - 1)];
         areaAngleBuf_sm[1] = areaAngleBuf_sm[2 * (numOfDegrees - 1) + 1];
     }
+
+    // The following calculations are performed only by the first thread in each block.
     if (threadIdx.x == 0)
     {
-        int   minRotateAngle = areaAngleBuf_sm[1];
-        float cos_coeff      = cos(-minRotateAngle * PI / 180);
-        float sin_coeff      = sin(-minRotateAngle * PI / 180);
-        float xmin           = *rotatedPointsTensor.ptr(rectIdx, areaAngleBuf_sm[1], 0);
-        float ymin           = *rotatedPointsTensor.ptr(rectIdx, areaAngleBuf_sm[1], 1);
-        float xmax           = *rotatedPointsTensor.ptr(rectIdx, areaAngleBuf_sm[1], 2);
-        float ymax           = *rotatedPointsTensor.ptr(rectIdx, areaAngleBuf_sm[1], 3);
-
+        // Retrieve the minimum rotation angle from shared memory.
+        int minRotateAngle = areaAngleBuf_sm[1];
+
+        // Extract the coordinates of the rectangle corners for the minimum rotation angle.
+        float cos_coeff = cos(-minRotateAngle * PI / 180);
+        float sin_coeff = sin(-minRotateAngle * PI / 180);
+        float xmin      = *rotatedPointsTensor.ptr(rectIdx, areaAngleBuf_sm[1], 0);
+        float ymin      = *rotatedPointsTensor.ptr(rectIdx, areaAngleBuf_sm[1], 1);
+        float xmax      = *rotatedPointsTensor.ptr(rectIdx, areaAngleBuf_sm[1], 2);
+        float ymax      = *rotatedPointsTensor.ptr(rectIdx, areaAngleBuf_sm[1], 3);
+
+        // Calculate cosine and sine coefficients for the rotation.
         float tl_x = (xmin * cos_coeff) - (ymin * sin_coeff);
         float tl_y = (xmin * sin_coeff) + (ymin * cos_coeff);
         float br_x = (xmax * cos_coeff) - (ymax * sin_coeff);
@@ -183,6 +209,7 @@ __global__ void findMinAreaAndAngle(TensorWrapper rotatedPointsTensor, cuda::Ten
         float bl_x = (xmin * cos_coeff) - (ymax * sin_coeff);
         float bl_y = (xmin * sin_coeff) + (ymax * cos_coeff);
 
+        // Store the transformed coordinates back into the output tensor.
         *outMinAreaRectBox.ptr(rectIdx, 0) = bl_x;
         *outMinAreaRectBox.ptr(rectIdx, 1) = bl_y;
         *outMinAreaRectBox.ptr(rectIdx, 2) = tl_x;
@@ -205,7 +232,7 @@ void minAreaRect(const TensorDataStridedCuda &inData, void *rotatedPointsDev,
     cuda::Tensor3DWrap<T> inContourPointsData(inData);
 
     int                       kernelPitch2 = static_cast<int>(_MIN_AREA_EACH_ANGLE_STRID * sizeof(int));
-    int                       kernelPitch1 = _MAX_ROTATE_DEGREES * kernelPitch2;
+    int                       kernelPitch1 = (_MAX_ROTATE_DEGREES + 1) * kernelPitch2;
     cuda::Tensor3DWrap<int>   rotatedPointsTensor(rotatedPointsDev, kernelPitch1, kernelPitch2);
     cuda::Tensor2DWrap<float> outMinAreaRectData(outData);
     cuda::Tensor2DWrap<int>   pointsInContourData(numPointsInContour);
@@ -217,13 +244,15 @@ void minAreaRect(const TensorDataStridedCuda &inData, void *rotatedPointsDev,
 
     dim3   block2(256);
     dim3   grid2(divUp(maxNumPointsInContour, block2.x), contourBatch, _MAX_ROTATE_DEGREES);
-    size_t smem_size = 2 * _MAX_ROTATE_DEGREES * sizeof(float);
+    // Shared mem should be ((2 * (_MAX_ROTATE_DEGREES + 1))* sizeof(int) since there are 2 entries per angle and its inclusive (0-90)
+    size_t smem_size = (2 * (_MAX_ROTATE_DEGREES + 1)) * sizeof(int);
     calculateRotateArea<<<grid2, block2, smem_size, stream>>>(inContourPointsData, rotatedPointsTensor,
                                                               rotateCoeffsData, pointsInContourData);
     checkKernelErrors();
     cudaStreamSynchronize(stream);
 
     dim3 grid3(contourBatch);
+
     findMinAreaAndAngle<<<grid3, block2, smem_size, stream>>>(rotatedPointsTensor, outMinAreaRectData,
                                                               _MAX_ROTATE_DEGREES);
     checkKernelErrors();
@@ -232,9 +261,10 @@ void minAreaRect(const TensorDataStridedCuda &inData, void *rotatedPointsDev,
 MinAreaRect::MinAreaRect(DataShape max_input_shape, DataShape max_output_shape, int maxContourNum)
     : mMaxContourNum(maxContourNum)
 {
+    // This needs to be _MAX_ROTATE_DEGREES + 1 since we look at 0-90 degrees inclusive.
+    int rotatedPtsAllocSize = ((maxContourNum * (_MAX_ROTATE_DEGREES + 1) * _MIN_AREA_EACH_ANGLE_STRID) * sizeof(int));
     NVCV_CHECK_THROW(cudaMalloc(&mRotateCoeffsBufDev, _MAX_ROTATE_DEGREES * 2 * sizeof(float)));
-    NVCV_CHECK_THROW(
-        cudaMalloc(&mRotatedPointsDev, maxContourNum * _MAX_ROTATE_DEGREES * _MIN_AREA_EACH_ANGLE_STRID * sizeof(int)));
+    NVCV_CHECK_THROW(cudaMalloc(&mRotatedPointsDev, rotatedPtsAllocSize));
 }
 
 MinAreaRect::~MinAreaRect()
@@ -245,7 +275,7 @@ MinAreaRect::~MinAreaRect()
 
 size_t MinAreaRect::calBufferSize(DataShape max_input_shape, DataShape max_output_shape, int maxContourNum)
 {
-    return maxContourNum * _MAX_ROTATE_DEGREES * _MIN_AREA_EACH_ANGLE_STRID * sizeof(int);
+    return maxContourNum * (_MAX_ROTATE_DEGREES + 1) * _MIN_AREA_EACH_ANGLE_STRID * sizeof(int);
 }
 
 ErrorCode MinAreaRect::infer(const TensorDataStridedCuda &inData, const TensorDataStridedCuda &outData,
diff --git a/src/cvcuda/priv/legacy/pillow_resize_var_shape.cu b/src/cvcuda/priv/legacy/pillow_resize_var_shape.cu
index e95cc20ec..7539e30f5 100644
--- a/src/cvcuda/priv/legacy/pillow_resize_var_shape.cu
+++ b/src/cvcuda/priv/legacy/pillow_resize_var_shape.cu
@@ -1,4 +1,4 @@
-/* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+/* Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  *
  * SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
  * SPDX-License-Identifier: Apache-2.0
@@ -536,7 +536,7 @@ WorkspaceRequirements PillowResizeVarShape::getWorkspaceRequirements(DataShape m
 
     WorkspaceRequirements req{};
 
-    int    max_support = 1; //3
+    int    max_support = 3; // Needed for various filtes Cubic needs 2 and Lanczos needs 3. Just use worst case.
     size_t size        = std::ceil(
                max_output_shape.H
                    * (((1.0 * max_input_shape.H / max_output_shape.H + 1) * max_support * 2 + 1) * sizeof(work_type)
diff --git a/src/cvcuda/priv/legacy/random_resized_crop.cu b/src/cvcuda/priv/legacy/random_resized_crop.cu
index bd038f48a..7f0959624 100644
--- a/src/cvcuda/priv/legacy/random_resized_crop.cu
+++ b/src/cvcuda/priv/legacy/random_resized_crop.cu
@@ -375,7 +375,7 @@ RandomResizedCrop::RandomResizedCrop(DataShape max_input_shape, DataShape max_ou
     if (min_scale_ > max_scale_ || min_ratio_ > max_ratio_)
     {
         LOG_ERROR("Invalid Parameter: scale and ratio should be of kind (min, max)");
-        throw std::runtime_error("Memory allocation error!");
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Parameter error!");
     }
     if (maxBatchSize > 0)
     {
@@ -488,8 +488,9 @@ ErrorCode RandomResizedCrop::infer(const TensorDataStridedCuda &inData, const Te
     auto inAccess = TensorDataAccessStridedImagePlanar::Create(inData);
     NVCV_ASSERT(inAccess);
 
-    DataType  data_type   = helpers::GetLegacyDataType(inData.dtype());
-    DataShape input_shape = helpers::GetLegacyDataShape(inAccess->infoShape());
+    DataType  in_data_type  = helpers::GetLegacyDataType(inData.dtype());
+    DataType  out_data_type = helpers::GetLegacyDataType(outData.dtype());
+    DataShape input_shape   = helpers::GetLegacyDataShape(inAccess->infoShape());
 
     int channels = input_shape.C;
 
@@ -499,9 +500,15 @@ ErrorCode RandomResizedCrop::infer(const TensorDataStridedCuda &inData, const Te
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
-    if (!(data_type == kCV_8U || data_type == kCV_16U || data_type == kCV_16S || data_type == kCV_32F))
+    if (!(in_data_type == kCV_8U || in_data_type == kCV_16U || in_data_type == kCV_16S || in_data_type == kCV_32F))
     {
-        LOG_ERROR("Invalid DataType " << data_type);
+        LOG_ERROR("Invalid DataType " << in_data_type);
+        return ErrorCode::INVALID_DATA_TYPE;
+    }
+
+    if (in_data_type != out_data_type)
+    {
+        LOG_ERROR("DataType of input and output must be equal, but got " << in_data_type << " and " << out_data_type);
         return ErrorCode::INVALID_DATA_TYPE;
     }
 
@@ -559,7 +566,7 @@ ErrorCode RandomResizedCrop::infer(const TensorDataStridedCuda &inData, const Te
         {      resize<float>,  0 /*resize<float2>*/,      resize<float3>,      resize<float4>}
     };
 
-    const func_t func = funcs[data_type][channels - 1];
+    const func_t func = funcs[in_data_type][channels - 1];
     func(inData, outData, interpolation, stream, tops_gpu, lefts_gpu, scale_x_gpu, scale_y_gpu);
     return SUCCESS;
 }
diff --git a/src/cvcuda/priv/legacy/random_resized_crop_var_shape.cu b/src/cvcuda/priv/legacy/random_resized_crop_var_shape.cu
index 26453a0fa..279b2c875 100644
--- a/src/cvcuda/priv/legacy/random_resized_crop_var_shape.cu
+++ b/src/cvcuda/priv/legacy/random_resized_crop_var_shape.cu
@@ -397,7 +397,7 @@ ErrorCode RandomResizedCropVarShape::infer(const ImageBatchVarShape &in, const I
 
     if (m_maxBatchSize <= 0 || inData.numImages() > m_maxBatchSize)
     {
-        LOG_ERROR("Invalid maximum batch size");
+        LOG_ERROR("Invalid maximum batch size" << m_maxBatchSize);
         return ErrorCode::INVALID_PARAMETER;
     }
 
@@ -432,11 +432,18 @@ ErrorCode RandomResizedCropVarShape::infer(const ImageBatchVarShape &in, const I
         return ErrorCode::INVALID_DATA_SHAPE;
     }
 
-    DataType data_type = helpers::GetLegacyDataType(inData.uniqueFormat());
+    DataType in_data_type  = helpers::GetLegacyDataType(inData.uniqueFormat());
+    DataType out_data_type = helpers::GetLegacyDataType(outData.uniqueFormat());
 
-    if (!(data_type == kCV_8U || data_type == kCV_16U || data_type == kCV_16S || data_type == kCV_32F))
+    if (!(in_data_type == kCV_8U || in_data_type == kCV_16U || in_data_type == kCV_16S || in_data_type == kCV_32F))
     {
-        LOG_ERROR("Invalid DataType " << data_type);
+        LOG_ERROR("Invalid DataType " << in_data_type);
+        return ErrorCode::INVALID_DATA_TYPE;
+    }
+
+    if (in_data_type != out_data_type)
+    {
+        LOG_ERROR("DataType of input and output must be equal, but got " << in_data_type << " and " << out_data_type);
         return ErrorCode::INVALID_DATA_TYPE;
     }
 
@@ -491,7 +498,7 @@ ErrorCode RandomResizedCropVarShape::infer(const ImageBatchVarShape &in, const I
         {      resize<float>,  0 /*resize<float2>*/,      resize<float3>,      resize<float4>}
     };
 
-    const func_t func = funcs[data_type][channels - 1];
+    const func_t func = funcs[in_data_type][channels - 1];
     func(inData, outData, interpolation, stream, scale_y_gpu, scale_x_gpu, tops_gpu, lefts_gpu);
     return SUCCESS;
 }
diff --git a/src/cvcuda/priv/legacy/threshold.cu b/src/cvcuda/priv/legacy/threshold.cu
index 8c5b620df..fd6c85791 100644
--- a/src/cvcuda/priv/legacy/threshold.cu
+++ b/src/cvcuda/priv/legacy/threshold.cu
@@ -793,7 +793,7 @@ Threshold::Threshold(DataShape max_input_shape, DataShape max_output_shape, uint
     if (maxBatchSize < 0)
     {
         LOG_ERROR("Invalid num of max batch size " << maxBatchSize);
-        throw std::runtime_error("Parameter error!");
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Parameter error!");
     }
     m_automatic_thresh = (m_type & ~NVCV_THRESH_MASK);
     if (m_automatic_thresh != 0)
@@ -833,7 +833,7 @@ ErrorCode Threshold::infer(const TensorDataStridedCuda &inData, const TensorData
     DataType out_data_type = GetLegacyDataType(outData.dtype());
     if (in_data_type != out_data_type)
     {
-        LOG_ERROR("Invalid Data Type " << out_data_type);
+        LOG_ERROR("DataType of input and output must be equal, but got " << in_data_type << " and " << out_data_type);
         return ErrorCode::INVALID_DATA_TYPE;
     }
 
diff --git a/src/cvcuda/priv/legacy/threshold_var_shape.cu b/src/cvcuda/priv/legacy/threshold_var_shape.cu
index dbbebb767..deaf70db3 100644
--- a/src/cvcuda/priv/legacy/threshold_var_shape.cu
+++ b/src/cvcuda/priv/legacy/threshold_var_shape.cu
@@ -1003,7 +1003,7 @@ ThresholdVarShape::ThresholdVarShape(DataShape max_input_shape, DataShape max_ou
     if (maxBatchSize < 0)
     {
         LOG_ERROR("Invalid num of max batch size " << maxBatchSize);
-        throw std::runtime_error("Parameter error!");
+        throw nvcv::Exception(nvcv::Status::ERROR_INVALID_ARGUMENT, "Parameter error!");
     }
     m_automatic_thresh = (m_type & ~NVCV_THRESH_MASK);
     if (m_automatic_thresh != 0)
@@ -1044,7 +1044,7 @@ ErrorCode ThresholdVarShape::infer(const ImageBatchVarShapeDataStridedCuda &inDa
     DataType out_data_type = helpers::GetLegacyDataType(outData.uniqueFormat());
     if (in_data_type != out_data_type)
     {
-        LOG_ERROR("Invalid Data Type " << out_data_type);
+        LOG_ERROR("DataType of input and output must be equal, but got " << in_data_type << " and " << out_data_type);
         return ErrorCode::INVALID_DATA_TYPE;
     }
 
diff --git a/src/nvcv_types/Tensor.cpp b/src/nvcv_types/Tensor.cpp
index 89b352398..146b4d3cd 100644
--- a/src/nvcv_types/Tensor.cpp
+++ b/src/nvcv_types/Tensor.cpp
@@ -49,6 +49,16 @@ NVCV_DEFINE_API(0, 2, NVCVStatus, nvcvTensorCalcRequirementsForImages,
                 throw priv::Exception(NVCV_ERROR_INVALID_ARGUMENT, "Pointer to output requirements must not be NULL");
             }
 
+            if (batch < 0)
+            {
+                throw priv::Exception(NVCV_ERROR_INVALID_ARGUMENT, "numImages must >= 0");
+            }
+
+            if (width < 0 || height < 0)
+            {
+                throw priv::Exception(NVCV_ERROR_INVALID_ARGUMENT, "width and height must >= 0");
+            }
+
             priv::ImageFormat fmt{format};
 
             *reqs = priv::Tensor::CalcRequirements(batch, {width, height}, fmt, baseAlign, rowAlign);
diff --git a/src/nvcv_types/include/nvcv/alloc/Requirements.h b/src/nvcv_types/include/nvcv/alloc/Requirements.h
index ec8fde3ef..b1a7fbf07 100644
--- a/src/nvcv_types/include/nvcv/alloc/Requirements.h
+++ b/src/nvcv_types/include/nvcv/alloc/Requirements.h
@@ -64,7 +64,7 @@ typedef struct NVCVRequirementsRec
  * @param [out] req Requirements to be initialized to zero
  *                  + Must not be NULL
  *
- * @retval NVCV_STATUS_INVALID_ARGUMENTS Some parameter is outside its valid range.
+ * @retval NVCV_ERROR_INVALID_ARGUMENT Some parameter is outside its valid range.
  * @retval NVCV_SUCCESS                  Operation completed successfully.
  */
 NVCV_PUBLIC NVCVStatus nvcvRequirementsInit(NVCVRequirements *req);
@@ -82,7 +82,7 @@ NVCV_PUBLIC NVCVStatus nvcvRequirementsInit(NVCVRequirements *req);
  * @param [in] req Requirements to be added.
  *                 + Must not be NULL
  *
- * @retval NVCV_STATUS_INVALID_ARGUMENTS Some parameter is outside its valid range.
+ * @retval NVCV_ERROR_INVALID_ARGUMENT Some parameter is outside its valid range.
  * @retval NVCV_SUCCESS                  Operation completed successfully.
  */
 NVCV_PUBLIC NVCVStatus nvcvRequirementsAdd(NVCVRequirements *reqSum, const NVCVRequirements *req);
@@ -95,7 +95,7 @@ NVCV_PUBLIC NVCVStatus nvcvRequirementsAdd(NVCVRequirements *reqSum, const NVCVR
  * @param [out] size_t Calculated size in bytes.
  *                 + Must not be NULL
  *
- * @retval NVCV_STATUS_INVALID_ARGUMENTS Some parameter is outside its valid range.
+ * @retval NVCV_ERROR_INVALID_ARGUMENT Some parameter is outside its valid range.
  * @retval NVCV_SUCCESS                  Operation completed successfully.
  */
 NVCV_PUBLIC NVCVStatus nvcvMemRequirementsCalcTotalSizeBytes(const NVCVMemRequirements *memReq, int64_t *sizeBytes);
@@ -115,7 +115,7 @@ NVCV_PUBLIC NVCVStatus nvcvMemRequirementsCalcTotalSizeBytes(const NVCVMemRequir
  *
  * @param [in] bufAlignment Alignment of the memory buffer, in bytes.
  *
- * @retval NVCV_STATUS_INVALID_ARGUMENTS Some parameter is outside its valid range.
+ * @retval NVCV_ERROR_INVALID_ARGUMENT Some parameter is outside its valid range.
  * @retval NVCV_SUCCESS                  Operation completed successfully.
  */
 NVCV_PUBLIC NVCVStatus nvcvMemRequirementsAddBuffer(NVCVMemRequirements *memReq, int64_t bufSize, int64_t bufAlignment);
diff --git a/src/nvcv_types/include/nvcv/cuda/TensorBatchWrap.hpp b/src/nvcv_types/include/nvcv/cuda/TensorBatchWrap.hpp
new file mode 100644
index 000000000..ebc838658
--- /dev/null
+++ b/src/nvcv_types/include/nvcv/cuda/TensorBatchWrap.hpp
@@ -0,0 +1,386 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file TensorBatchWrap.hpp
+ *
+ * @brief Defines a wrapper of a tensor batch.
+ */
+
+#ifndef NVCV_CUDA_TENSOR_BATCH_WRAP_HPP
+#define NVCV_CUDA_TENSOR_BATCH_WRAP_HPP
+
+#include "TypeTraits.hpp" // for HasTypeTraits, etc
+#include "nvcv/TensorBatchData.hpp"
+#include "nvcv/cuda/TensorWrap.hpp"
+
+#include <type_traits>
+
+namespace nvcv::cuda {
+
+/**
+ * @defgroup NVCV_CPP_CUDATOOLS_TENSORBATCHWRAP TensorBatchWrap classes
+ * @{
+ */
+
+/**
+ * TensorBatchWrap class is a non-owning wrap of a batch of N-D tensors used for easy access of its elements in CUDA device.
+ *
+ * TensorBatchWrap is a wrapper of a batch of multi-dimensional tensors that can have one or more of its N dimension strides, or
+ * pitches, defined either at compile-time or at run-time. Each pitch in \p Strides represents the offset in bytes
+ * as a compile-time template parameter that will be applied from the first (slowest changing) dimension to the
+ * last (fastest changing) dimension of the tensor, in that order.  Each dimension with run-time pitch is specified
+ * as -1 in the \p Strides template parameter.
+ *
+ * Template arguments:
+ * - T type of the values inside the tensors
+ * - Strides sequence of compile- or run-time pitches (-1 indicates run-time)
+ *   - Y compile-time pitches
+ *   - X run-time pitches
+ *   - N dimensions, where N = X + Y
+ *
+ * For example, in the code below a wrap is defined for a batch of HWC 3D tensors where each row in H
+ * has a run-time row pitch (second -1), a pixel in W has a compile-time constant pitch as
+ * the size of the pixel type and a channel in C has also a compile-time constant pitch as
+ * the size of the channel type.
+ *
+ * @code
+ * using DataType = ...;
+ * using ChannelType = BaseType<DataType>;
+ * using TensorBatchWrap = TensorBatchWrap<ChannelType, -1, sizeof(DataType), sizeof(ChannelType)>;
+ * TensorBatch tensorBatch = ...;
+ * TensorBatchWrap tensorBatchWrap(tensorBatch.data());
+ * // Elements may be accessed via operator[] using an int4 argument.  They can also be accessed via pointer using
+ * // the ptr method with up to 4 integer arguments or by accessing each TensorWrap separately with tensor(...) method.
+ * @endcode
+ *
+ * @sa NVCV_CPP_CUDATOOLS_TENSORBATCHWRAPS
+ *
+ * @tparam T Type (it can be const) of each element inside the tensor wrapper.
+ * @tparam Strides Each compile-time (use -1 for run-time) pitch in bytes from first to last dimension.
+ */
+template<typename T, int... Strides>
+class TensorBatchWrap;
+
+template<typename T, int... Strides>
+class TensorBatchWrap<const T, Strides...>
+{
+    static_assert(HasTypeTraits<T>, "TensorBatchWrap<T> can only be used if T has type traits");
+
+public:
+    // The type provided as template parameter is the value type, i.e. the type of each element inside this wrapper.
+    using ValueType = const T;
+
+    static constexpr int kNumDimensions   = sizeof...(Strides);
+    static constexpr int kVariableStrides = ((Strides == -1) + ...);
+    static constexpr int kConstantStrides = kNumDimensions - kVariableStrides;
+
+    TensorBatchWrap() = default;
+
+    /**
+     * Constructs a constant TensorBatchWrap by wrapping a \p data argument.
+     *
+     * @param[in] data Tensor batch data to wrap.
+     */
+    __host__ TensorBatchWrap(const TensorBatchDataStridedCuda &data)
+        : TensorBatchWrap(data.cdata())
+    {
+    }
+
+    /**
+     * Constructs a constant TensorBatchWrap by wrapping a \p data argument.
+     *
+     * @param[in] data Tensor batch data to wrap.
+     */
+    __host__ __device__ TensorBatchWrap(const NVCVTensorBatchData &data)
+        : m_numTensors(data.numTensors)
+        , m_tensors(data.buffer.strided.tensors)
+    {
+    }
+
+    /**
+     * Get a read-only proxy (as pointer) of the given tensor at the given coordinates.
+     *
+     * @param[in] t Tensor index in the list.
+     * @param[in] c Coordinates in the given tensor;
+     *
+     * @return The const pointer to the beginning of the given coordinates.
+     */
+    template<typename... Coords>
+    inline const __host__ __device__ T *ptr(int t, Coords... c) const
+    {
+        return doGetPtr(t, c...);
+    }
+
+    /**
+     * Subscript operator for read-and-write access.
+     *
+     * @param[in] t Tensor index in the list.
+     * @param[in] c (N+1)-D coordinates tensor index and coords (from last to first dimension) to be accessed.
+     *              E.g. for a 2-dimensional tensors, the coordinates would be: {tensor_id, column, row}
+     *
+     * @return Accessed reference.
+     */
+    template<typename DimType, class = Require<std::is_same_v<int, BaseType<DimType>>>>
+    inline const __host__ __device__ T &operator[](DimType c) const
+    {
+        static_assert(NumElements<DimType> == kNumDimensions + 1,
+                      "Coordinates in the subscript operator must be (N+1)-dimensional, "
+                      "where N is a dimensionality of a single tensor in the batch.");
+        if constexpr (NumElements<DimType> == 1)
+        {
+            return *doGetPtr(c.x);
+        }
+        if constexpr (NumElements<DimType> == 2)
+        {
+            return *doGetPtr(c.x, c.y);
+        }
+        else if constexpr (NumElements<DimType> == 3)
+        {
+            return *doGetPtr(c.x, c.z, c.y);
+        }
+        else if constexpr (NumElements<DimType> == 4)
+        {
+            return *doGetPtr(c.x, c.w, c.z, c.y);
+        }
+    }
+
+    /**
+     * @brief Constructs a read-only wrapper for the tensor on index \p t
+     * The list of static strides can be provided as a template parameter.
+     * It should be a list of N outer strides (from inner to outer).
+     *
+     * @tparam Strides static strides
+     * @param t index of the tensor
+     */
+    inline const __host__ __device__ auto tensor(int t) const
+    {
+        return TensorWrap<ValueType, Strides...>(doGetPtr(t), strides(t));
+    }
+
+    /**
+     * @brief Returns a number of tensors in the batch.
+     */
+    inline __host__ __device__ int32_t numTensors() const
+    {
+        return m_numTensors;
+    }
+
+    /**
+     * @brief Returns a pointer to shape buffer of the tensor at index \p t
+     *
+     * @param t tensor index
+     */
+    inline const __host__ __device__ int64_t *shape(int t) const
+    {
+        assert(t >= 0 && t < m_numTensors);
+        return m_tensors[t].shape;
+    }
+
+    /**
+     * @brief Returns a pointer to a stride buffer of the tensor at index \p t
+     *
+     * @param t tensor index
+     */
+    inline const __host__ __device__ int64_t *strides(int t) const
+    {
+        assert(t >= 0 && t < m_numTensors);
+        return m_tensors[t].stride;
+    }
+
+protected:
+    template<typename... Args>
+    inline __host__ __device__ T *doGetPtr(int t, Args... c) const
+    {
+        static_assert(std::conjunction_v<std::is_same<int, Args>...>);
+        static_assert(sizeof...(Args) <= kNumDimensions);
+
+        constexpr int kArgSize  = sizeof...(Args);
+        constexpr int kVarSize  = kArgSize < kVariableStrides ? kArgSize : kVariableStrides;
+        constexpr int kDimSize  = kArgSize < kNumDimensions ? kArgSize : kNumDimensions;
+        constexpr int kStride[] = {std::forward<int>(Strides)...};
+
+        // Computing offset first potentially postpones or avoids 64-bit math during addressing
+        int offset = 0;
+        if constexpr (kArgSize > 0)
+        {
+            int            coords[] = {std::forward<int>(c)...};
+            const int64_t *strides  = m_tensors[t].stride;
+
+#pragma unroll
+            for (int i = 0; i < kVarSize; ++i)
+            {
+                offset += coords[i] * strides[i];
+            }
+#pragma unroll
+            for (int i = kVariableStrides; i < kDimSize; ++i)
+            {
+                offset += coords[i] * kStride[i];
+            }
+        }
+
+        NVCVByte *dataPtr = m_tensors[t].data;
+        return reinterpret_cast<T *>(dataPtr + offset);
+    }
+
+    int32_t                           m_numTensors;
+    NVCVTensorBatchElementStridedRec *m_tensors;
+};
+
+/**
+ * TensorBatch wrapper class specialized for non-constant value type.
+ *
+ * @tparam T Type (non-const) of each element inside the tensor batch wrapper.
+ * @tparam Strides Each compile-time (use -1 for run-time) pitch in bytes from first to last dimension.
+ */
+template<typename T, int... Strides>
+class TensorBatchWrap : public TensorBatchWrap<const T, Strides...>
+{
+    using Base = TensorBatchWrap<const T, Strides...>;
+
+public:
+    using ValueType = T;
+    using Base::doGetPtr;
+    using Base::kNumDimensions;
+    using Base::m_tensors;
+    using Base::strides;
+
+    /**
+     * Constructs a TensorBatchWrap by wrapping a \p data argument.
+     *
+     * @param[in] data Tensor batch data to wrap.
+     */
+    __host__ TensorBatchWrap(const TensorBatchDataStridedCuda &data)
+        : Base(data)
+    {
+    }
+
+    /**
+     * Constructs a TensorBatchWrap by wrapping a \p data argument.
+     *
+     * @param[in] data Tensor batch data to wrap.
+     */
+    __host__ __device__ TensorBatchWrap(NVCVTensorBatchData &data)
+        : Base(data)
+    {
+    }
+
+    /**
+     * Get a read-and-write proxy (as pointer) of the given tensor at the given coordinates.
+     *
+     * @param[in] t Tensor index in the list.
+     * @param[in] c Coordinates in the given tensor;
+     *
+     * @return The const pointer to the beginning of the given coordinates.
+     */
+    template<typename... Coords>
+    inline __host__ __device__ T *ptr(int t, Coords... c) const
+    {
+        return doGetPtr(t, c...);
+    }
+
+    /**
+     * @brief Constructs a read-and-write wrapper for the tensor on index \p t
+     * The list of static strides can be provided as a template parameter.
+     * It should be a list of N outer strides (from inner to outer).
+     *
+     * @tparam Strides static strides
+     * @param t index of the tensor
+     */
+    inline __host__ __device__ auto tensor(int t) const
+    {
+        return TensorWrap<ValueType, Strides...>(doGetPtr(t), strides(t));
+    }
+
+    /**
+     * Subscript operator for read-and-write access.
+     *
+     * @param[in] t Tensor index in the list.
+     * @param[in] c (N+1)-D coordinates - tensor index and coords (from inner to outer) to be accessed.
+     *              E.g. for a 2-dimensional tensors, the coordinates would be: {tensor_id, column, row}
+     *
+     * @return Accessed reference.
+     */
+    template<typename DimType, class = Require<std::is_same_v<int, BaseType<DimType>>>>
+    inline __host__ __device__ T &operator[](DimType c) const
+    {
+        static_assert(NumElements<DimType> == kNumDimensions + 1,
+                      "Coordinates in the subscript operator must be (N+1)-dimensional, "
+                      "where N is a dimensionality of a single tensor in the batch.");
+        if constexpr (NumElements<DimType> == 1)
+        {
+            return *doGetPtr(c.x);
+        }
+        if constexpr (NumElements<DimType> == 2)
+        {
+            return *doGetPtr(c.x, c.y);
+        }
+        else if constexpr (NumElements<DimType> == 3)
+        {
+            return *doGetPtr(c.x, c.z, c.y);
+        }
+        else if constexpr (NumElements<DimType> == 4)
+        {
+            return *doGetPtr(c.x, c.w, c.z, c.y);
+        }
+    }
+};
+
+/**@}*/
+
+/**
+ *  Specializes \ref TensorBatchWrap template classes to different dimensions.
+ *
+ *  The specializations have the last dimension as the only compile-time dimension as size of T.  All other
+ *  dimensions have run-time pitch and must be provided.
+ *
+ *  Template arguments:
+ *  - T data type of each element in \ref TensorBatchWrap
+ *
+ *  @sa NVCV_CPP_CUDATOOLS_TENSORBATCHWRAP
+ *
+ *  @defgroup NVCV_CPP_CUDATOOLS_TENSORBATCHWRAPS TensorBatchWrap shortcuts
+ *  @{
+ */
+
+template<typename T>
+using TensorBatch1DWrap = TensorBatchWrap<T, sizeof(T)>;
+
+template<typename T>
+using TensorBatch2DWrap = TensorBatchWrap<T, -1, sizeof(T)>;
+
+template<typename T>
+using TensorBatch3DWrap = TensorBatchWrap<T, -1, -1, sizeof(T)>;
+
+template<typename T>
+using TensorBatch4DWrap = TensorBatchWrap<T, -1, -1, -1, sizeof(T)>;
+
+template<typename T>
+using TensorBatch5DWrap = TensorBatchWrap<T, -1, -1, -1, -1, sizeof(T)>;
+
+template<typename T, int N>
+using TensorBatchNDWrap = std::conditional_t<
+    N == 1, TensorBatch1DWrap<T>,
+    std::conditional_t<N == 2, TensorBatch2DWrap<T>,
+                       std::conditional_t<N == 3, TensorBatch3DWrap<T>,
+                                          std::conditional_t<N == 4, TensorBatch4DWrap<T>,
+                                                             std::conditional_t<N == 5, TensorBatch5DWrap<T>, void>>>>>;
+/**@}*/
+
+} // namespace nvcv::cuda
+
+#endif // NVCV_CUDA_TENSOR_BATCH_WRAP_HPP
diff --git a/src/nvcv_types/include/nvcv/cuda/TensorWrap.hpp b/src/nvcv_types/include/nvcv/cuda/TensorWrap.hpp
index 487b4f82c..1cc7143be 100644
--- a/src/nvcv_types/include/nvcv/cuda/TensorWrap.hpp
+++ b/src/nvcv_types/include/nvcv/cuda/TensorWrap.hpp
@@ -110,6 +110,23 @@ class TensorWrap<const T, Strides...>
         static_assert(sizeof...(Args) == kVariableStrides);
     }
 
+    /**
+     * Constructs a constant TensorWrap by wrapping a const \p data pointer argument
+     * and copying the dyncamic strides from a given buffer.
+     *
+     * @param[in] data Pointer to the data that will be wrapped.
+     * @param[in] strides Pointer to stride data
+     */
+    template<typename DataType, typename StrideType>
+    explicit __host__ __device__ TensorWrap(const DataType *data, StrideType *strides)
+        : m_data(reinterpret_cast<const std::byte *>(data))
+    {
+        for (int i = 0; i < kVariableStrides; ++i)
+        {
+            m_strides[i] = strides[i];
+        }
+    }
+
     /**
      * Constructs a constant TensorWrap by wrapping an \p image argument.
      *
@@ -278,6 +295,19 @@ class TensorWrap : public TensorWrap<const T, Strides...>
     {
     }
 
+    /**
+     * Constructs a TensorWrap by wrapping a const \p data pointer argument
+     * and copying the dyncamic strides from a given buffer.
+     *
+     * @param[in] data Pointer to the data that will be wrapped.
+     * @param[in] strides Pointer to stride data
+     */
+    template<typename DataType, typename StrideType>
+    explicit __host__ __device__ TensorWrap(DataType *data, StrideType *strides)
+        : Base(data, strides)
+    {
+    }
+
     /**
      * Constructs a TensorWrap by wrapping an \p image argument.
      *
@@ -385,11 +415,16 @@ using Tensor3DWrap = TensorWrap<T, -1, -1, sizeof(T)>;
 template<typename T>
 using Tensor4DWrap = TensorWrap<T, -1, -1, -1, sizeof(T)>;
 
+template<typename T>
+using Tensor5DWrap = TensorWrap<T, -1, -1, -1, -1, sizeof(T)>;
+
 template<typename T, int N>
 using TensorNDWrap = std::conditional_t<
     N == 1, Tensor1DWrap<T>,
     std::conditional_t<N == 2, Tensor2DWrap<T>,
-                       std::conditional_t<N == 3, Tensor3DWrap<T>, std::conditional_t<N == 4, Tensor4DWrap<T>, void>>>>;
+                       std::conditional_t<N == 3, Tensor3DWrap<T>,
+                                          std::conditional_t<N == 4, Tensor4DWrap<T>,
+                                                             std::conditional_t<N == 5, Tensor5DWrap<T>, void>>>>>;
 
 /**@}*/
 
diff --git a/src/nvcv_types/priv/IAllocator.cpp b/src/nvcv_types/priv/IAllocator.cpp
index 6807fe36a..dbd7f03e4 100644
--- a/src/nvcv_types/priv/IAllocator.cpp
+++ b/src/nvcv_types/priv/IAllocator.cpp
@@ -73,7 +73,7 @@ void *IAllocator::allocHostPinnedMem(int64_t size, int32_t align)
     if (util::RoundUp(size, align) != size)
     {
         throw Exception(NVCV_ERROR_INVALID_ARGUMENT,
-                        "Host memory allocator size must be an integral multiple of alignment %d, not %ld", align,
+                        "Pinned memory allocator size must be an integral multiple of alignment %d, not %ld", align,
                         size);
     }
 
@@ -101,7 +101,7 @@ void *IAllocator::allocCudaMem(int64_t size, int32_t align)
     if (util::RoundUp(size, align) != size)
     {
         throw Exception(NVCV_ERROR_INVALID_ARGUMENT,
-                        "Host memory allocator size must be an integral multiple of alignment %d, not %ld", align,
+                        "Device memory allocator size must be an integral multiple of alignment %d, not %ld", align,
                         size);
     }
 
diff --git a/src/util/Compat.cpp b/src/util/Compat.cpp
index 753a64799..77f8f046e 100644
--- a/src/util/Compat.cpp
+++ b/src/util/Compat.cpp
@@ -108,7 +108,7 @@ static std::vector<DestructorInfo> *g_ListMainThread = nullptr;
 
 int my_thread_atexit_impl(void (*func)(void *), void *arg, void *d)
 {
-    std::vector<DestructorInfo> *list;
+    std::vector<DestructorInfo> *list = nullptr;
 
     if (IsMainThread())
     {
diff --git a/src/util/Math.hpp b/src/util/Math.hpp
index 5e20f1b40..c558a8ee3 100644
--- a/src/util/Math.hpp
+++ b/src/util/Math.hpp
@@ -22,6 +22,7 @@
 #include "Metaprogramming.hpp"
 
 #include <cassert>
+#include <cmath>
 #include <type_traits>
 
 namespace nvcv::util {
@@ -123,6 +124,18 @@ NVCV_CUDA_HOST_DEVICE constexpr auto DivUpPowerOfTwo(T num, TypeIdentity<T> den)
     return (num >> ILog2(den)) + !!(num & (den - 1));
 }
 
+/// @brief Calculates normalized sinc i.e. `sin(pi * x) / (pi * x)`
+template<typename T, typename = std::enable_if_t<std::is_floating_point_v<T>>>
+NVCV_CUDA_HOST_DEVICE NVCV_FORCE_INLINE T sinc(T x)
+{
+    static_assert(sizeof(T) >= sizeof(float)); // not analyzed for smaller floats, eps may require adjustment
+    constexpr T eps = sizeof(T) <= sizeof(float) ? 1e-5 : 1e-8;
+    x *= static_cast<T>(M_PI);
+    if (std::abs(x) < eps)
+        return static_cast<T>(1.0) - x * x * (static_cast<T>(1.0) / 6); // remove singularity by using Taylor expansion
+    return std::sin(x) / x;
+}
+
 } // namespace nvcv::util
 
 #endif // NVCV_UTIL_MATH_HPP
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 88ceacc58..2fb4c84d7 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -16,7 +16,14 @@
 project(cvcuda_tests)
 set(CMAKE_FOLDER tests)
 
-# Tests require C++20
+# Tests require C++20:
+# The goal for the infrastructure written for tests is to make it easy to
+# - add test cases for new parameter ranges,
+# - make the tested parameter set visually match what's defined in the reference doc
+# - ...
+# so that we can quickly check if everything we claim is being tested.
+# In order to achieve this, we created "tests/common/ValueList.hpp" that implements a domain-specific embedded language making it easier to define the above.
+# To make usage easier, we had to use C++20 language features.
 set(CMAKE_CXX_STANDARD 20)
 
 enable_testing()
diff --git a/tests/cvcuda/python/cvcuda_test_python.in b/tests/cvcuda/python/cvcuda_test_python.in
index eb648d827..f1242e371 100755
--- a/tests/cvcuda/python/cvcuda_test_python.in
+++ b/tests/cvcuda/python/cvcuda_test_python.in
@@ -16,25 +16,30 @@
 # limitations under the License.
 
 tests_dir="@PYTHON_TEST_DIR@"
-python_versions="@PYTHON_TEST_VERSIONS@"
+python_versions_tentative="@PYTHON_TEST_VERSIONS@"
+
+python_versions=""
 
 # Verify if correct package dependencies are installed --------
 pip_depends="pytest torch"
 
-declare -a install_commands
-
-for ver in $python_versions; do
+# Collect all python versions that are indeed installed and have proper dependencies installed
+# Two behaviors:
+# - default: skip Python versions that are not installed or don't have pytest and torch installed
+# - if NVCV_FORCE_PYTHON is set: exit with error
+for ver in $python_versions_tentative; do
     if ! python$ver -c "import pytest, torch" > /dev/null 2>&1; then
-        install_commands+=("sudo python$ver -m pip install $pip_depends")
+        echo "WARNING: Python version $ver not installed or missing proper dependencies"
+        echo "Please install Python version $ver and run the following commands before running tests: sudo python$ver -m pip install $pip_depends"
+        if [[ "$NVCV_FORCE_PYTHON" == 1 || "$NVCV_FORCE_PYTHON" == yes ]]; then
+            exit 1 #hard exit
+        fi
+    else
+        echo "Found Python version $ver installed with proper dependencies, adding to tests"
+        python_versions+="$ver "
     fi
 done
 
-if [[ "${install_commands[*]}" ]]; then
-    echo "Please run the following commands before running $(basename $0): "
-    ( IFS=$'\n'; echo -e "${install_commands[*]}" )
-    exit 1
-fi
-
 # Run tests --------
 
 tmpdir=$(mktemp -d)
diff --git a/tests/cvcuda/python/test_ophqresize.py b/tests/cvcuda/python/test_ophqresize.py
new file mode 100644
index 000000000..3a571aebd
--- /dev/null
+++ b/tests/cvcuda/python/test_ophqresize.py
@@ -0,0 +1,306 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import nvcv
+import cvcuda
+import pytest as t
+import cvcuda_util as util
+import numpy as np
+
+RNG = np.random.default_rng(12345)
+
+
+def get_shape(in_shape, layout, out_size):
+    assert len(out_size) in (2, 3)
+    out_size_layout = "HW" if len(out_size) == 2 else "DHW"
+    assert len(out_size) == len(out_size_layout)
+    out_size_map = dict(zip(out_size_layout, out_size))
+    assert len(in_shape) == len(layout)
+    return tuple(
+        out_size_map.get(name, extent) for name, extent in zip(layout, in_shape)
+    )
+
+
+@t.mark.parametrize(
+    "src_args, dst_args, interpolation_args, roi",
+    [
+        (
+            ((2, 244, 244, 3), nvcv.Type.U8, "NHWC"),
+            ((122, 122), nvcv.Type.U8),
+            (cvcuda.Interp.NEAREST, cvcuda.Interp.NEAREST, False),
+            None,
+        ),
+        (
+            ((2, 244, 244, 3), nvcv.Type.U8, "NHWC"),
+            ((122, 244), nvcv.Type.U8),
+            (cvcuda.Interp.LINEAR, cvcuda.Interp.LINEAR, False),
+            None,
+        ),
+        (
+            ((1, 244, 244, 2), nvcv.Type.U8, "NHWC"),
+            ((122, 122), nvcv.Type.F32),
+            (cvcuda.Interp.LINEAR, cvcuda.Interp.CUBIC, True),
+            (50, 10, 230, 220),
+        ),
+        (
+            ((3, 101, 244, 301, 3), nvcv.Type.U16, "NDHWC"),
+            ((122, 54, 101), nvcv.Type.U16),
+            (cvcuda.Interp.GAUSSIAN, cvcuda.Interp.CUBIC, True),
+            None,
+        ),
+        (
+            ((54, 54, 54, 4), nvcv.Type.U8, "DHWC"),
+            ((100, 100, 100), nvcv.Type.U8),
+            (cvcuda.Interp.LANCZOS, cvcuda.Interp.LINEAR, True),
+            (54, 0, 0, 0, 54, 54),
+        ),
+        (
+            ((101, 102, 103), nvcv.Type.U8, "DHW"),
+            ((41, 45, 49), nvcv.Type.F32),
+            (cvcuda.Interp.NEAREST, cvcuda.Interp.LINEAR, False),
+            None,
+        ),
+        (
+            ((101, 102, 103), nvcv.Type.U8, "DHW"),
+            ((101, 45, 49), nvcv.Type.F32),
+            (cvcuda.Interp.NEAREST, cvcuda.Interp.LINEAR, False),
+            None,
+        ),
+    ],
+)
+def test_op_hq_resize_api(src_args, dst_args, interpolation_args, roi):
+    stream = cvcuda.Stream()
+    src_shape, src_type, layout = src_args
+    assert len(layout) == len(src_shape)
+    dst_size, dst_type = dst_args
+    min_interpolation, mag_interpolation, antialias = interpolation_args
+    out_shape = get_shape(src_shape, layout, dst_size)
+
+    t_src = util.create_tensor(*src_args)
+
+    if src_type != dst_type:
+        t_dst = util.create_tensor(out_shape, dst_type, layout)
+        t_tmp = cvcuda.hq_resize_into(
+            t_dst,
+            t_src,
+            min_interpolation=min_interpolation,
+            mag_interpolation=mag_interpolation,
+            antialias=antialias,
+            stream=stream,
+            roi=roi,
+        )
+        assert t_tmp is t_dst
+    else:
+        t_dst = cvcuda.hq_resize(
+            t_src,
+            dst_size,
+            min_interpolation=min_interpolation,
+            mag_interpolation=mag_interpolation,
+            antialias=antialias,
+            stream=stream,
+            roi=roi,
+        )
+        assert t_dst.layout == t_src.layout
+        assert t_dst.dtype == dst_type
+        assert t_dst.shape == out_shape
+
+
+@t.mark.parametrize(
+    "num_samples, src_args, dst_type, interpolation_args, roi",
+    [
+        (
+            1,
+            ((512, 1024, 3), np.uint8, "HWC"),
+            np.uint8,
+            (cvcuda.Interp.LINEAR, cvcuda.Interp.LINEAR, True),
+            None,
+        ),
+        (
+            5,
+            ((122, 244, 4), np.float32, "HWC"),
+            np.float32,
+            (cvcuda.Interp.CUBIC, cvcuda.Interp.CUBIC, False),
+            [(100, 200, 10, 10)],
+        ),
+        (
+            3,
+            ((244, 122), np.uint8, "HW"),
+            np.float32,
+            (cvcuda.Interp.NEAREST, cvcuda.Interp.NEAREST, False),
+            [(200, 100, 10, 10)],
+        ),
+    ],
+)
+def test_op_hq_resize_var_shape_api(
+    num_samples, src_args, dst_type, interpolation_args, roi
+):
+    stream = cvcuda.Stream()
+
+    src_shape, src_type, layout = src_args
+    assert len(layout) == len(src_shape)
+    min_interpolation, mag_interpolation, antialias = interpolation_args
+
+    b_src = nvcv.ImageBatchVarShape(num_samples)
+    out_sizes = []
+    for _ in range(num_samples):
+        sample_size = tuple(
+            RNG.integers(1, extent + 1)
+            for name, extent in zip(layout, src_shape)
+            if name in "HW"
+        )
+        sample_shape = get_shape(src_shape, layout, sample_size)
+        h_data = util.generate_data(sample_shape, src_type, rng=RNG)
+        image = util.to_nvcv_image(h_data)
+        b_src.pushback(image)
+        out_sizes.append(
+            tuple(RNG.integers(1, 2 * extent + 1) for extent in sample_size)
+        )
+
+    if src_type != dst_type:
+        b_dst = nvcv.ImageBatchVarShape(num_samples)
+        assert len(out_sizes) == num_samples
+        for out_size in out_sizes:
+            out_shape = get_shape(src_shape, layout, out_size)
+            h_data = util.generate_data(out_shape, dst_type, rng=RNG)
+            image = util.to_nvcv_image(h_data)
+            b_dst.pushback(image)
+
+        b_tmp = cvcuda.hq_resize_into(
+            b_dst,
+            b_src,
+            min_interpolation=min_interpolation,
+            mag_interpolation=mag_interpolation,
+            antialias=antialias,
+            stream=stream,
+            roi=roi,
+        )
+        assert b_tmp is b_dst
+    else:
+        b_dst = cvcuda.hq_resize(
+            b_src,
+            out_sizes,
+            min_interpolation=min_interpolation,
+            mag_interpolation=mag_interpolation,
+            antialias=antialias,
+            stream=stream,
+            roi=roi,
+        )
+
+        assert len(b_dst) == len(b_src)
+        assert b_dst.capacity == b_src.capacity
+        assert b_dst.uniqueformat == b_src.uniqueformat
+        assert b_dst.maxsize == tuple(
+            max(extent) for extent in reversed(list(zip(*out_sizes)))
+        )
+
+
+@t.mark.parametrize(
+    "num_samples, src_args, dst_type, interpolation_args, use_roi",
+    [
+        (
+            7,
+            ((244, 244, 3), nvcv.Type.U8, "HWC"),
+            nvcv.Type.U8,
+            (cvcuda.Interp.NEAREST, cvcuda.Interp.NEAREST, False),
+            False,
+        ),
+        (
+            5,
+            ((244, 244), nvcv.Type.U8, "HW"),
+            nvcv.Type.F32,
+            (cvcuda.Interp.LINEAR, cvcuda.Interp.CUBIC, True),
+            True,
+        ),
+        (
+            3,
+            ((101, 244, 301, 3), nvcv.Type.U16, "DHWC"),
+            nvcv.Type.U16,
+            (cvcuda.Interp.GAUSSIAN, cvcuda.Interp.CUBIC, True),
+            True,
+        ),
+        (
+            1,
+            ((101, 102, 103), nvcv.Type.U8, "DHW"),
+            nvcv.Type.F32,
+            (cvcuda.Interp.NEAREST, cvcuda.Interp.LINEAR, False),
+            False,
+        ),
+    ],
+)
+def test_op_hq_resize_tensor_batch_api(
+    num_samples, src_args, dst_type, interpolation_args, use_roi
+):
+    stream = cvcuda.Stream()
+
+    src_shape, src_type, layout = src_args
+    assert len(layout) == len(src_shape)
+    min_interpolation, mag_interpolation, antialias = interpolation_args
+
+    b_src = nvcv.TensorBatch(num_samples)
+    out_sizes = []
+    rois = []
+    for _ in range(num_samples):
+        sample_size = tuple(
+            RNG.integers(1, extent + 1)
+            for name, extent in zip(layout, src_shape)
+            if name in "DHW"
+        )
+        sample_shape = get_shape(src_shape, layout, sample_size)
+        t_src = util.create_tensor(sample_shape, src_type, layout)
+        b_src.pushback(t_src)
+        out_sizes.append(
+            tuple(RNG.integers(1, 2 * extent + 1) for extent in sample_size)
+        )
+        if use_roi:
+            roi = tuple(
+                RNG.integers(1, extent + 1) for _ in range(2) for extent in sample_size
+            )
+            rois.append(roi)
+
+    if src_type != dst_type:
+        b_dst = nvcv.TensorBatch(num_samples)
+        assert len(out_sizes) == num_samples
+        for out_size in out_sizes:
+            out_shape = get_shape(src_shape, layout, out_size)
+            t_dst = util.create_tensor(out_shape, dst_type, layout)
+            b_dst.pushback(t_dst)
+
+        b_tmp = cvcuda.hq_resize_into(
+            b_dst,
+            b_src,
+            min_interpolation=min_interpolation,
+            mag_interpolation=mag_interpolation,
+            antialias=antialias,
+            stream=stream,
+            roi=None if not use_roi else rois,
+        )
+        assert b_dst is b_tmp
+    else:
+        b_dst = cvcuda.hq_resize(
+            b_src,
+            out_sizes,
+            min_interpolation=min_interpolation,
+            mag_interpolation=mag_interpolation,
+            antialias=antialias,
+            stream=stream,
+            roi=None if not use_roi else rois,
+        )
+        assert len(b_dst) == len(b_src)
+        assert b_dst.capacity == b_src.capacity
+        assert b_dst.layout == b_src.layout
+        assert b_dst.ndim == b_src.ndim
+        assert b_dst.dtype == dst_type
+        for i in range(num_samples):
+            assert b_dst[i].shape == get_shape(src_shape, layout, out_sizes[i])
diff --git a/tests/cvcuda/system/CMakeLists.txt b/tests/cvcuda/system/CMakeLists.txt
index a9e1a6483..a14060961 100644
--- a/tests/cvcuda/system/CMakeLists.txt
+++ b/tests/cvcuda/system/CMakeLists.txt
@@ -88,6 +88,7 @@ add_executable(cvcuda_test_system
     GaussianNoiseUtils.cu
     TestOpInpaint.cpp
     TestOpFindHomography.cpp
+    TestOpHQResize.cpp
 )
 
 target_link_libraries(cvcuda_test_system
diff --git a/tests/cvcuda/system/ResizeUtils.cpp b/tests/cvcuda/system/ResizeUtils.cpp
index 9dea3ae34..401ed59f8 100644
--- a/tests/cvcuda/system/ResizeUtils.cpp
+++ b/tests/cvcuda/system/ResizeUtils.cpp
@@ -29,7 +29,8 @@
 namespace nvcv::test {
 
 void Resize(std::vector<uint8_t> &hDst, int dstRowStride, nvcv::Size2D dstSize, const std::vector<uint8_t> &hSrc,
-            int srcRowStride, nvcv::Size2D srcSize, nvcv::ImageFormat fmt, NVCVInterpolationType interpolation)
+            int srcRowStride, nvcv::Size2D srcSize, nvcv::ImageFormat fmt, NVCVInterpolationType interpolation,
+            bool isVarshape)
 {
     if (interpolation == NVCV_INTERP_NEAREST || interpolation == NVCV_INTERP_LINEAR
         || interpolation == NVCV_INTERP_CUBIC)
@@ -85,65 +86,107 @@ void Resize(std::vector<uint8_t> &hDst, int dstRowStride, nvcv::Size2D dstSize,
                     }
                     else
                     {
-                        double invscale
-                            = 1.f / (std::min(jScale, srcSize.w - fsx1) * std::min(iScale, srcSize.h - fsy1));
-
-                        for (int dy = sy1; dy < sy2; ++dy)
+                        if (!isVarshape || (iScale >= 1.0f && jScale >= 1.0f))
                         {
-                            for (int dx = sx1; dx < sx2; ++dx)
-                                if (dy >= 0 && dy < srcSize.h && dx >= 0 && dx < srcSize.w)
-                                    out = out + srcPtr[dy * srcRowStride + dx * elementsPerPixel + k] * invscale;
+                            double invscale
+                                = 1.f / (std::min(jScale, srcSize.w - fsx1) * std::min(iScale, srcSize.h - fsy1));
 
-                            if (sx1 > fsx1)
-                                if (dy >= 0 && dy < srcSize.h && sx1 - 1 >= 0 && sx1 - 1 < srcSize.w)
+                            for (int dy = sy1; dy < sy2; ++dy)
+                            {
+                                for (int dx = sx1; dx < sx2; ++dx)
+                                    if (dy >= 0 && dy < srcSize.h && dx >= 0 && dx < srcSize.w)
+                                        out = out + srcPtr[dy * srcRowStride + dx * elementsPerPixel + k] * invscale;
+
+                                if (sx1 > fsx1)
+                                    if (dy >= 0 && dy < srcSize.h && sx1 - 1 >= 0 && sx1 - 1 < srcSize.w)
+                                        out = out
+                                            + srcPtr[dy * srcRowStride + (sx1 - 1) * elementsPerPixel + k]
+                                                  * ((sx1 - fsx1) * invscale);
+
+                                if (sx2 < fsx2)
+                                    if (dy >= 0 && dy < srcSize.h && sx2 >= 0 && sx2 < srcSize.w)
+                                        out = out
+                                            + srcPtr[dy * srcRowStride + sx2 * elementsPerPixel + k]
+                                                  * ((fsx2 - sx2) * invscale);
+                            }
+
+                            if (sy1 > fsy1)
+                                for (int dx = sx1; dx < sx2; ++dx)
+                                    if (sy1 - 1 >= 0 && sy1 - 1 < srcSize.h && dx >= 0 && dx < srcSize.w)
+                                        out = out
+                                            + srcPtr[(sy1 - 1) * srcRowStride + dx * elementsPerPixel + k]
+                                                  * ((sy1 - fsy1) * invscale);
+
+                            if (sy2 < fsy2)
+                                for (int dx = sx1; dx < sx2; ++dx)
+                                    if (sy2 >= 0 && sy2 < srcSize.h && dx >= 0 && dx < srcSize.w)
+                                        out = out
+                                            + srcPtr[sy2 * srcRowStride + dx * elementsPerPixel + k]
+                                                  * ((fsy2 - sy2) * invscale);
+
+                            if ((sy1 > fsy1) && (sx1 > fsx1))
+                                if (sy1 - 1 >= 0 && sy1 - 1 < srcSize.h && sx1 - 1 >= 0 && sx1 - 1 < srcSize.w)
                                     out = out
-                                        + srcPtr[dy * srcRowStride + (sx1 - 1) * elementsPerPixel + k]
-                                              * ((sx1 - fsx1) * invscale);
+                                        + srcPtr[(sy1 - 1) * srcRowStride + (sx1 - 1) * elementsPerPixel + k]
+                                              * ((sy1 - fsy1) * (sx1 - fsx1) * invscale);
 
-                            if (sx2 < fsx2)
-                                if (dy >= 0 && dy < srcSize.h && sx2 >= 0 && sx2 < srcSize.w)
+                            if ((sy1 > fsy1) && (sx2 < fsx2))
+                                if (sy1 - 1 >= 0 && sy1 - 1 < srcSize.h && sx2 >= 0 && sx2 < srcSize.w)
                                     out = out
-                                        + srcPtr[dy * srcRowStride + sx2 * elementsPerPixel + k]
-                                              * ((fsx2 - sx2) * invscale);
-                        }
+                                        + srcPtr[(sy1 - 1) * srcRowStride + sx2 * elementsPerPixel + k]
+                                              * ((sy1 - fsy1) * (fsx2 - sx2) * invscale);
 
-                        if (sy1 > fsy1)
-                            for (int dx = sx1; dx < sx2; ++dx)
-                                if (sy1 - 1 >= 0 && sy1 - 1 < srcSize.h && dx >= 0 && dx < srcSize.w)
+                            if ((sy2 < fsy2) && (sx2 < fsx2))
+                                if (sy2 >= 0 && sy2 < srcSize.h && sx2 >= 0 && sx2 < srcSize.w)
                                     out = out
-                                        + srcPtr[(sy1 - 1) * srcRowStride + dx * elementsPerPixel + k]
-                                              * ((sy1 - fsy1) * invscale);
+                                        + srcPtr[sy2 * srcRowStride + sx2 * elementsPerPixel + k]
+                                              * ((fsy2 - sy2) * (fsx2 - sx2) * invscale);
 
-                        if (sy2 < fsy2)
-                            for (int dx = sx1; dx < sx2; ++dx)
-                                if (sy2 >= 0 && sy2 < srcSize.h && dx >= 0 && dx < srcSize.w)
+                            if ((sy2 < fsy2) && (sx1 > fsx1))
+                                if (sy2 >= 0 && sy2 < srcSize.h && sx1 - 1 >= 0 && sx1 - 1 < srcSize.w)
                                     out = out
-                                        + srcPtr[sy2 * srcRowStride + dx * elementsPerPixel + k]
-                                              * ((fsy2 - sy2) * invscale);
-
-                        if ((sy1 > fsy1) && (sx1 > fsx1))
-                            if (sy1 - 1 >= 0 && sy1 - 1 < srcSize.h && sx1 - 1 >= 0 && sx1 - 1 < srcSize.w)
-                                out = out
-                                    + srcPtr[(sy1 - 1) * srcRowStride + (sx1 - 1) * elementsPerPixel + k]
-                                          * ((sy1 - fsy1) * (sx1 - fsx1) * invscale);
-
-                        if ((sy1 > fsy1) && (sx2 < fsx2))
-                            if (sy1 - 1 >= 0 && sy1 - 1 < srcSize.h && sx2 >= 0 && sx2 < srcSize.w)
-                                out = out
-                                    + srcPtr[(sy1 - 1) * srcRowStride + sx2 * elementsPerPixel + k]
-                                          * ((sy1 - fsy1) * (fsx2 - sx2) * invscale);
-
-                        if ((sy2 < fsy2) && (sx2 < fsx2))
-                            if (sy2 >= 0 && sy2 < srcSize.h && sx2 >= 0 && sx2 < srcSize.w)
-                                out = out
-                                    + srcPtr[sy2 * srcRowStride + sx2 * elementsPerPixel + k]
-                                          * ((fsy2 - sy2) * (fsx2 - sx2) * invscale);
-
-                        if ((sy2 < fsy2) && (sx1 > fsx1))
-                            if (sy2 >= 0 && sy2 < srcSize.h && sx1 - 1 >= 0 && sx1 - 1 < srcSize.w)
-                                out = out
-                                    + srcPtr[sy2 * srcRowStride + (sx1 - 1) * elementsPerPixel + k]
-                                          * ((fsy2 - sy2) * (sx1 - fsx1) * invscale);
+                                        + srcPtr[sy2 * srcRowStride + (sx1 - 1) * elementsPerPixel + k]
+                                              * ((fsy2 - sy2) * (sx1 - fsx1) * invscale);
+                        }
+                        else // zoom in for varshape
+                        {
+                            double iScale_inv = 1.0 / iScale;
+                            double jScale_inv = 1.0 / jScale;
+
+                            sy1      = cuda::round<cuda::RoundMode::DOWN, int>(fsy1);
+                            sx1      = cuda::round<cuda::RoundMode::DOWN, int>(fsx1);
+                            float fy = (float)(float(di + 1) - float(sy1 + 1) * iScale_inv);
+                            fy       = fy <= 0 ? 0.f : fy - cuda::round<cuda::RoundMode::DOWN, int>(fy);
+
+                            float cbufy[2];
+                            cbufy[0] = 1.f - fy;
+                            cbufy[1] = fy;
+
+                            float fx = (float)(float(dj + 1) - float(sx1 + 1) * jScale_inv);
+                            fx       = fx <= 0 ? 0.f : fx - cuda::round<cuda::RoundMode::DOWN, int>(fx);
+
+                            if (sx1 < 0)
+                            {
+                                fx = 0, sx1 = 0;
+                            }
+                            if (sx1 >= srcSize.w - 1)
+                            {
+                                fx = 0, sx1 = srcSize.w - 2;
+                            }
+                            if (sy1 >= srcSize.h - 1)
+                            {
+                                sy1 = srcSize.h - 2;
+                            }
+
+                            float cbufx[2];
+                            cbufx[0] = 1.f - fx;
+                            cbufx[1] = fx;
+                            out      = srcPtr[sy1 * srcRowStride + sx1 * elementsPerPixel + k] * cbufx[0] * cbufy[0]
+                                + srcPtr[(sy1 + 1) * srcRowStride + sx1 * elementsPerPixel + k] * cbufx[0] * cbufy[1]
+                                + srcPtr[sy1 * srcRowStride + (sx1 + 1) * elementsPerPixel + k] * cbufx[1] * cbufy[0]
+                                + srcPtr[(sy1 + 1) * srcRowStride + (sx1 + 1) * elementsPerPixel + k] * cbufx[1]
+                                      * cbufy[1];
+                        }
                     }
 
                     out = std::rint(std::abs(out));
diff --git a/tests/cvcuda/system/ResizeUtils.hpp b/tests/cvcuda/system/ResizeUtils.hpp
index 3296f6283..d8c27f7ca 100644
--- a/tests/cvcuda/system/ResizeUtils.hpp
+++ b/tests/cvcuda/system/ResizeUtils.hpp
@@ -30,7 +30,8 @@ namespace nvcv::test {
 
 // support NVCV_INTERP_NEAREST/NVCV_INTERP_LINEAR/NVCV_INTERP_CUBIC/NVCV_INTERP_AREA
 void Resize(std::vector<uint8_t> &hDst, int dstRowStride, nvcv::Size2D dstSize, const std::vector<uint8_t> &hSrc,
-            int srcRowStride, nvcv::Size2D srcSize, nvcv::ImageFormat fmt, NVCVInterpolationType interpolation);
+            int srcRowStride, nvcv::Size2D srcSize, nvcv::ImageFormat fmt, NVCVInterpolationType interpolation,
+            bool isVarshape);
 
 // only support NVCV_INTERP_NEAREST/NVCV_INTERP_LINEAR/NVCV_INTERP_CUBIC
 void ResizedCrop(std::vector<uint8_t> &hDst, int dstRowStride, nvcv::Size2D dstSize, const std::vector<uint8_t> &hSrc,
diff --git a/tests/cvcuda/system/TestOpChannelReorder.cpp b/tests/cvcuda/system/TestOpChannelReorder.cpp
index e2f2eb158..011491bb4 100644
--- a/tests/cvcuda/system/TestOpChannelReorder.cpp
+++ b/tests/cvcuda/system/TestOpChannelReorder.cpp
@@ -27,7 +27,43 @@
 
 namespace test = nvcv::test;
 
-TEST(TestOpChannelReorder, smoke_test_works)
+class TestOpChannelReorder : public ::testing::Test
+{
+protected:
+    TestOpChannelReorder() {}
+
+    ~TestOpChannelReorder() {}
+
+    void SetUp() override
+    {
+        // clang-format off
+        inOrders = nvcv::Tensor(
+            {
+                {1, 4},
+                "NC"
+            },
+            nvcv::TYPE_S32);
+        // clang-format on
+    }
+
+    void pushDefaultImages()
+    {
+        in.pushBack(nvcv::Image{
+            nvcv::Size2D{4, 2},
+            nvcv::FMT_RGBA8
+        });
+        out.pushBack(nvcv::Image{
+            nvcv::Size2D{4, 2},
+            nvcv::FMT_RGBA8
+        });
+    }
+
+    nvcv::ImageBatchVarShape in{nvcv::ImageBatchVarShape(2)}, out{nvcv::ImageBatchVarShape(2)};
+    nvcv::Tensor             inOrders;
+    cvcuda::ChannelReorder   chReorder;
+};
+
+TEST_F(TestOpChannelReorder, smoke_test_works)
 {
     // Let's set up input and output images
     nvcv::Image inImages[2] = {
@@ -40,7 +76,8 @@ TEST(TestOpChannelReorder, smoke_test_works)
         nvcv::Image{nvcv::Size2D{4, 2}, nvcv::FMT_RGBA8}
     };
 
-    nvcv::ImageBatchVarShape in(2), out(2);
+    in  = nvcv::ImageBatchVarShape(2);
+    out = nvcv::ImageBatchVarShape(2);
 
     // Create the input and output varshapes
     in.pushBack(inImages[0]);
@@ -66,7 +103,7 @@ TEST(TestOpChannelReorder, smoke_test_works)
 
     // Populate the order tensor
     // clang-format off
-    nvcv::Tensor inOrders(
+    inOrders = nvcv::Tensor(
         {
             {2, 4},
             "NC"
@@ -89,8 +126,6 @@ TEST(TestOpChannelReorder, smoke_test_works)
     cudaStream_t stream;
     ASSERT_EQ(cudaSuccess, cudaStreamCreate(&stream));
 
-    cvcuda::ChannelReorder chReorder;
-
     chReorder(stream, in, out, inOrders);
 
     ASSERT_EQ(cudaSuccess, cudaStreamSynchronize(stream));
@@ -110,3 +145,222 @@ TEST(TestOpChannelReorder, smoke_test_works)
     EXPECT_EQ(make_uchar4(4, 1, 2, 0), outImageValues[0]);
     EXPECT_EQ(make_uchar4(28, 10, 3, 0), outImageValues[1]);
 }
+
+TEST_F(TestOpChannelReorder, create_with_null_handle)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, cvcudaChannelReorderCreate(nullptr));
+}
+
+TEST_F(TestOpChannelReorder, infer_different_samples)
+{
+    in.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_RGBA8
+    });
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_void_samples)
+{
+    EXPECT_EQ(NVCV_SUCCESS, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_input_dataType)
+{
+    in.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_RGBAf16
+    });
+    out.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_RGBA8
+    });
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_output_dataType)
+{
+    in.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_RGBA8
+    });
+    out.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_RGBAf16
+    });
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_order_rank)
+{
+    pushDefaultImages();
+
+    // clang-format off
+    inOrders= nvcv::Tensor(
+        {
+            {1, 4, 4},
+            "NHW"
+        },
+        nvcv::TYPE_S32);
+    // clang-format on
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_order_dataType)
+{
+    pushDefaultImages();
+
+    // clang-format off
+    inOrders= nvcv::Tensor(
+        {
+            {1, 4},
+            "NC"
+        },
+        nvcv::TYPE_F32);
+    // clang-format on
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_order_first_label)
+{
+    pushDefaultImages();
+
+    // clang-format off
+    inOrders= nvcv::Tensor(
+        {
+            {4, 1},
+            "CN"
+        },
+        nvcv::TYPE_S32);
+    // clang-format on
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_order_num_samples)
+{
+    pushDefaultImages();
+
+    // clang-format off
+    inOrders= nvcv::Tensor(
+        {
+            {2, 4},
+            "NC"
+        },
+        nvcv::TYPE_S32);
+    // clang-format on
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_order_num_channels)
+{
+    pushDefaultImages();
+
+    // clang-format off
+    inOrders= nvcv::Tensor(
+        {
+            {1, 5},
+            "NC"
+        },
+        nvcv::TYPE_S32);
+    // clang-format on
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_order_small_num_channels)
+{
+    pushDefaultImages();
+
+    // clang-format off
+    inOrders= nvcv::Tensor(
+        {
+            {1, 3},
+            "NC"
+        },
+        nvcv::TYPE_S32);
+    // clang-format on
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_input_planar)
+{
+    in.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_BGRA8p
+    });
+    out.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_BGRA8
+    });
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_output_planar)
+{
+    in.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_BGRA8
+    });
+    out.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_BGRA8p
+    });
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_input_different_channels)
+{
+    pushDefaultImages();
+    in.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_BGR8
+    });
+    out.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_BGR8
+    });
+
+    // clang-format off
+    inOrders= nvcv::Tensor(
+        {
+            {2, 4},
+            "NC"
+        },
+        nvcv::TYPE_S32);
+    // clang-format on
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
+
+TEST_F(TestOpChannelReorder, infer_invalid_input_different_format)
+{
+    pushDefaultImages();
+    in.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_BGRAf32
+    });
+    out.pushBack(nvcv::Image{
+        nvcv::Size2D{4, 2},
+        nvcv::FMT_BGRAf32
+    });
+
+    // clang-format off
+    inOrders= nvcv::Tensor(
+        {
+            {2, 4},
+            "NC"
+        },
+        nvcv::TYPE_S32);
+    // clang-format on
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcv::ProtectCall([&] { chReorder(NULL, in, out, inOrders); }));
+}
diff --git a/tests/cvcuda/system/TestOpCvtColor.cpp b/tests/cvcuda/system/TestOpCvtColor.cpp
index dd9f92cae..bb6bca005 100644
--- a/tests/cvcuda/system/TestOpCvtColor.cpp
+++ b/tests/cvcuda/system/TestOpCvtColor.cpp
@@ -32,16 +32,72 @@
 namespace test = nvcv::test;
 namespace cuda = nvcv::cuda;
 
-#define VEC_EXPECT_NEAR(vec1, vec2, delta)                              \
-    ASSERT_EQ(vec1.size(), vec2.size());                                \
-    for (std::size_t idx = 0; idx < vec1.size(); ++idx)                 \
-    {                                                                   \
-        EXPECT_NEAR(vec1[idx], vec2[idx], delta) << "At index " << idx; \
+#define VEC_EXPECT_NEAR(vec1, vec2, delta, dtype)                                                                    \
+    ASSERT_EQ(vec1.size(), vec2.size());                                                                             \
+    for (std::size_t idx = 0; idx < vec1.size() / sizeof(dtype); ++idx)                                              \
+    {                                                                                                                \
+        EXPECT_NEAR(reinterpret_cast<dtype *>(vec1.data())[idx], reinterpret_cast<dtype *>(vec2.data())[idx], delta) \
+            << "At index " << idx;                                                                                   \
     }
 
+template<typename T>
+void myGenerate(T *src, std::size_t size, std::default_random_engine &randEng)
+{
+    std::uniform_int_distribution rand(0u, 255u);
+    for (std::size_t idx = 0; idx < size; ++idx)
+    {
+        src[idx] = rand(randEng);
+    }
+}
+
+template<>
+void myGenerate(float *src, std::size_t size, std::default_random_engine &randEng)
+{
+    std::uniform_real_distribution<float> rand(0.f, 1.f);
+    for (std::size_t idx = 0; idx < size; ++idx)
+    {
+        src[idx] = rand(randEng);
+    }
+}
+
+#define NVCV_IMAGE_FORMAT_RGBS8  NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, XYZ1, ASSOCIATED, X8_Y8_Z8)
+#define NVCV_IMAGE_FORMAT_BGRS8  NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, ZYX1, ASSOCIATED, X8_Y8_Z8)
+#define NVCV_IMAGE_FORMAT_RGBAS8 NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, XYZW, ASSOCIATED, X8_Y8_Z8_W8)
+#define NVCV_IMAGE_FORMAT_BGRAS8 NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, ZYXW, ASSOCIATED, X8_Y8_Z8_W8)
+
 #define NVCV_IMAGE_FORMAT_Y16   NVCV_DETAIL_MAKE_YCbCr_FMT1(BT601, NONE, PL, UNSIGNED, X000, ASSOCIATED, X16)
 #define NVCV_IMAGE_FORMAT_BGR16 NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, UNSIGNED, ZYX1, ASSOCIATED, X16_Y16_Z16)
 #define NVCV_IMAGE_FORMAT_RGB16 NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, UNSIGNED, XYZ1, ASSOCIATED, X16_Y16_Z16)
+#define NVCV_IMAGE_FORMAT_BGRA16 \
+    NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, UNSIGNED, ZYXW, ASSOCIATED, X16_Y16_Z16_W16)
+#define NVCV_IMAGE_FORMAT_RGBA16 \
+    NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, UNSIGNED, XYZW, ASSOCIATED, X16_Y16_Z16_W16)
+#define NVCV_IMAGE_FORMAT_YUV16 NVCV_DETAIL_MAKE_YCbCr_FMT1(BT601, NONE, PL, UNSIGNED, XYZ1, ASSOCIATED, X16_Y16_Z16)
+
+#define NVCV_IMAGE_FORMAT_YS16   NVCV_DETAIL_MAKE_YCbCr_FMT1(BT601, NONE, PL, SIGNED, X000, ASSOCIATED, X16)
+#define NVCV_IMAGE_FORMAT_BGRS16 NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, ZYX1, ASSOCIATED, X16_Y16_Z16)
+#define NVCV_IMAGE_FORMAT_RGBS16 NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, XYZ1, ASSOCIATED, X16_Y16_Z16)
+#define NVCV_IMAGE_FORMAT_BGRAS16 \
+    NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, ZYXW, ASSOCIATED, X16_Y16_Z16_W16)
+#define NVCV_IMAGE_FORMAT_RGBAS16 \
+    NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, XYZW, ASSOCIATED, X16_Y16_Z16_W16)
+
+#define NVCV_IMAGE_FORMAT_BGRS32 NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, ZYX1, ASSOCIATED, X32_Y32_Z32)
+#define NVCV_IMAGE_FORMAT_RGBS32 NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, XYZ1, ASSOCIATED, X32_Y32_Z32)
+#define NVCV_IMAGE_FORMAT_BGRAS32 \
+    NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, ZYXW, ASSOCIATED, X32_Y32_Z32_W32)
+#define NVCV_IMAGE_FORMAT_RGBAS32 \
+    NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, SIGNED, XYZW, ASSOCIATED, X32_Y32_Z32_W32)
+#define NVCV_IMAGE_FORMAT_YUVf32 NVCV_DETAIL_MAKE_YCbCr_FMT1(BT601, NONE, PL, FLOAT, XYZ1, ASSOCIATED, X32_Y32_Z32)
+#define NVCV_IMAGE_FORMAT_Yf32   NVCV_DETAIL_MAKE_YCbCr_FMT1(BT601, NONE, PL, FLOAT, X000, ASSOCIATED, X32)
+#define NVCV_IMAGE_FORMAT_HSVf32 NVCV_DETAIL_MAKE_COLOR_FMT1(HSV, UNDEFINED, PL, FLOAT, XYZ0, ASSOCIATED, X32_Y32_Z32)
+
+#define NVCV_IMAGE_FORMAT_BGRf64 NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, FLOAT, ZYX1, ASSOCIATED, X64_Y64_Z64)
+#define NVCV_IMAGE_FORMAT_RGBf64 NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, FLOAT, XYZ1, ASSOCIATED, X64_Y64_Z64)
+#define NVCV_IMAGE_FORMAT_BGRAf64 \
+    NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, FLOAT, ZYXW, ASSOCIATED, X64_Y64_Z64_W64)
+#define NVCV_IMAGE_FORMAT_RGBAf64 \
+    NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, FLOAT, XYZW, ASSOCIATED, X64_Y64_Z64_W64)
 
 // clang-format off
 
@@ -54,10 +110,42 @@ test::ValueList<int, int, int, NVCVImageFormat, NVCVImageFormat, NVCVColorConver
     {  77, 212,  3,    NVCV_IMAGE_FORMAT_BGR8,   NVCV_IMAGE_FORMAT_RGBA8,     NVCV_COLOR_BGR2RGBA,      NVCV_COLOR_RGBA2BGR,   0.0},
     {  33,  55,  4,    NVCV_IMAGE_FORMAT_RGB8,   NVCV_IMAGE_FORMAT_BGRA8,     NVCV_COLOR_RGB2BGRA,      NVCV_COLOR_BGRA2RGB,   0.0},
     { 123, 321,  5,   NVCV_IMAGE_FORMAT_RGBA8,   NVCV_IMAGE_FORMAT_BGRA8,    NVCV_COLOR_RGBA2BGRA,     NVCV_COLOR_BGRA2RGBA,   0.0},
+    { 176, 113,  1,   NVCV_IMAGE_FORMAT_BGRS8,   NVCV_IMAGE_FORMAT_BGRAS8,     NVCV_COLOR_BGR2BGRA,      NVCV_COLOR_BGRA2BGR,   0.0},
+    { 336, 432,  2,   NVCV_IMAGE_FORMAT_RGBS8,   NVCV_IMAGE_FORMAT_RGBAS8,     NVCV_COLOR_RGB2RGBA,      NVCV_COLOR_RGBA2RGB,   0.0},
+    {  77, 212,  3,   NVCV_IMAGE_FORMAT_BGRS8,   NVCV_IMAGE_FORMAT_RGBAS8,     NVCV_COLOR_BGR2RGBA,      NVCV_COLOR_RGBA2BGR,   0.0},
+    {  33,  55,  4,   NVCV_IMAGE_FORMAT_RGBS8,   NVCV_IMAGE_FORMAT_BGRAS8,     NVCV_COLOR_RGB2BGRA,      NVCV_COLOR_BGRA2RGB,   0.0},
+    { 123, 321,  5,   NVCV_IMAGE_FORMAT_RGBAS8,   NVCV_IMAGE_FORMAT_BGRAS8,    NVCV_COLOR_RGBA2BGRA,     NVCV_COLOR_BGRA2RGBA,   0.0},
+    { 176, 113,  1,   NVCV_IMAGE_FORMAT_BGR16,   NVCV_IMAGE_FORMAT_BGRA16,     NVCV_COLOR_BGR2BGRA,      NVCV_COLOR_BGRA2BGR,   0.0},
+    { 336, 432,  2,   NVCV_IMAGE_FORMAT_RGB16,   NVCV_IMAGE_FORMAT_RGBA16,     NVCV_COLOR_RGB2RGBA,      NVCV_COLOR_RGBA2RGB,   0.0},
+    {  77, 212,  3,   NVCV_IMAGE_FORMAT_BGR16,   NVCV_IMAGE_FORMAT_RGBA16,     NVCV_COLOR_BGR2RGBA,      NVCV_COLOR_RGBA2BGR,   0.0},
+    {  33,  55,  4,   NVCV_IMAGE_FORMAT_RGB16,   NVCV_IMAGE_FORMAT_BGRA16,     NVCV_COLOR_RGB2BGRA,      NVCV_COLOR_BGRA2RGB,   0.0},
+    { 123, 321,  5,   NVCV_IMAGE_FORMAT_RGBA16,  NVCV_IMAGE_FORMAT_BGRA16,    NVCV_COLOR_RGBA2BGRA,     NVCV_COLOR_BGRA2RGBA,   0.0},
+    { 176, 113,  1,   NVCV_IMAGE_FORMAT_BGRS16,   NVCV_IMAGE_FORMAT_BGRAS16,     NVCV_COLOR_BGR2BGRA,      NVCV_COLOR_BGRA2BGR,   0.0},
+    { 336, 432,  2,   NVCV_IMAGE_FORMAT_RGBS16,   NVCV_IMAGE_FORMAT_RGBAS16,     NVCV_COLOR_RGB2RGBA,      NVCV_COLOR_RGBA2RGB,   0.0},
+    {  77, 212,  3,   NVCV_IMAGE_FORMAT_BGRS16,   NVCV_IMAGE_FORMAT_RGBAS16,     NVCV_COLOR_BGR2RGBA,      NVCV_COLOR_RGBA2BGR,   0.0},
+    {  33,  55,  4,   NVCV_IMAGE_FORMAT_RGBS16,   NVCV_IMAGE_FORMAT_BGRAS16,     NVCV_COLOR_RGB2BGRA,      NVCV_COLOR_BGRA2RGB,   0.0},
+    { 123, 321,  5,   NVCV_IMAGE_FORMAT_RGBAS16,  NVCV_IMAGE_FORMAT_BGRAS16,    NVCV_COLOR_RGBA2BGRA,     NVCV_COLOR_BGRA2RGBA,   0.0},
+    { 176, 113,  1,   NVCV_IMAGE_FORMAT_BGRf16,   NVCV_IMAGE_FORMAT_BGRAf16,     NVCV_COLOR_BGR2BGRA,      NVCV_COLOR_BGRA2BGR,   0.0},
+    { 336, 432,  2,   NVCV_IMAGE_FORMAT_RGBf16,   NVCV_IMAGE_FORMAT_RGBAf16,     NVCV_COLOR_RGB2RGBA,      NVCV_COLOR_RGBA2RGB,   0.0},
+    {  77, 212,  3,   NVCV_IMAGE_FORMAT_BGRf16,   NVCV_IMAGE_FORMAT_RGBAf16,     NVCV_COLOR_BGR2RGBA,      NVCV_COLOR_RGBA2BGR,   0.0},
+    {  33,  55,  4,   NVCV_IMAGE_FORMAT_RGBf16,   NVCV_IMAGE_FORMAT_BGRAf16,     NVCV_COLOR_RGB2BGRA,      NVCV_COLOR_BGRA2RGB,   0.0},
+    { 123, 321,  5,   NVCV_IMAGE_FORMAT_RGBAf16,  NVCV_IMAGE_FORMAT_BGRAf16,    NVCV_COLOR_RGBA2BGRA,     NVCV_COLOR_BGRA2RGBA,   0.0},
+    { 176, 113,  1,   NVCV_IMAGE_FORMAT_BGRS32,   NVCV_IMAGE_FORMAT_BGRAS32,     NVCV_COLOR_BGR2BGRA,      NVCV_COLOR_BGRA2BGR,   0.0},
+    { 336, 432,  2,   NVCV_IMAGE_FORMAT_RGBS32,   NVCV_IMAGE_FORMAT_RGBAS32,     NVCV_COLOR_RGB2RGBA,      NVCV_COLOR_RGBA2RGB,   0.0},
+    {  77, 212,  3,   NVCV_IMAGE_FORMAT_BGRS32,   NVCV_IMAGE_FORMAT_RGBAS32,     NVCV_COLOR_BGR2RGBA,      NVCV_COLOR_RGBA2BGR,   0.0},
+    {  33,  55,  4,   NVCV_IMAGE_FORMAT_RGBS32,   NVCV_IMAGE_FORMAT_BGRAS32,     NVCV_COLOR_RGB2BGRA,      NVCV_COLOR_BGRA2RGB,   0.0},
+    { 123, 321,  5,   NVCV_IMAGE_FORMAT_RGBAS32,  NVCV_IMAGE_FORMAT_BGRAS32,    NVCV_COLOR_RGBA2BGRA,     NVCV_COLOR_BGRA2RGBA,   0.0},
+    { 176, 113,  1,   NVCV_IMAGE_FORMAT_BGRf64,   NVCV_IMAGE_FORMAT_BGRAf64,     NVCV_COLOR_BGR2BGRA,      NVCV_COLOR_BGRA2BGR,   0.0},
+    { 336, 432,  2,   NVCV_IMAGE_FORMAT_RGBf64,   NVCV_IMAGE_FORMAT_RGBAf64,     NVCV_COLOR_RGB2RGBA,      NVCV_COLOR_RGBA2RGB,   0.0},
+    {  77, 212,  3,   NVCV_IMAGE_FORMAT_BGRf64,   NVCV_IMAGE_FORMAT_RGBAf64,     NVCV_COLOR_BGR2RGBA,      NVCV_COLOR_RGBA2BGR,   0.0},
+    {  33,  55,  4,   NVCV_IMAGE_FORMAT_RGBf64,   NVCV_IMAGE_FORMAT_BGRAf64,     NVCV_COLOR_RGB2BGRA,      NVCV_COLOR_BGRA2RGB,   0.0},
+    { 123, 321,  5,   NVCV_IMAGE_FORMAT_RGBAf64,  NVCV_IMAGE_FORMAT_BGRAf64,    NVCV_COLOR_RGBA2BGRA,     NVCV_COLOR_BGRA2RGBA,   0.0},
     {  23,  21, 63,      NVCV_IMAGE_FORMAT_Y8,    NVCV_IMAGE_FORMAT_BGR8,     NVCV_COLOR_GRAY2BGR,      NVCV_COLOR_BGR2GRAY,   0.0},
     { 402, 202,  5,      NVCV_IMAGE_FORMAT_Y8,    NVCV_IMAGE_FORMAT_RGB8,     NVCV_COLOR_GRAY2RGB,      NVCV_COLOR_RGB2GRAY,   0.0},
     {  32,  21,  4,     NVCV_IMAGE_FORMAT_Y16,   NVCV_IMAGE_FORMAT_BGR16,     NVCV_COLOR_GRAY2BGR,      NVCV_COLOR_BGR2GRAY,   0.0},
     {  54,  66,  5,     NVCV_IMAGE_FORMAT_Y16,   NVCV_IMAGE_FORMAT_RGB16,     NVCV_COLOR_GRAY2RGB,      NVCV_COLOR_RGB2GRAY,   0.0},
+    {  64,  21,  3,     NVCV_IMAGE_FORMAT_Yf32,  NVCV_IMAGE_FORMAT_BGRf32,    NVCV_COLOR_GRAY2BGR,      NVCV_COLOR_BGR2GRAY,   1E-4},
+    {  121, 66,  5,     NVCV_IMAGE_FORMAT_Yf32,  NVCV_IMAGE_FORMAT_RGBf32,    NVCV_COLOR_GRAY2RGB,      NVCV_COLOR_RGB2GRAY,   1E-4},
     { 129,  61,  4,  NVCV_IMAGE_FORMAT_BGRf32, NVCV_IMAGE_FORMAT_BGRAf32,     NVCV_COLOR_BGR2BGRA,      NVCV_COLOR_BGRA2BGR,   0.0},
     {  63,  31,  3,  NVCV_IMAGE_FORMAT_RGBf32, NVCV_IMAGE_FORMAT_RGBAf32,     NVCV_COLOR_RGB2RGBA,      NVCV_COLOR_RGBA2RGB,   0.0},
     {  42, 111,  2,  NVCV_IMAGE_FORMAT_BGRf32, NVCV_IMAGE_FORMAT_RGBAf32,     NVCV_COLOR_BGR2RGBA,      NVCV_COLOR_RGBA2BGR,   0.0},
@@ -66,12 +154,18 @@ test::ValueList<int, int, int, NVCVImageFormat, NVCVImageFormat, NVCVColorConver
     // Codes 9 to 39 are not implemented
     {  55, 257,  4,    NVCV_IMAGE_FORMAT_BGR8,   NVCV_IMAGE_FORMAT_HSV8,       NVCV_COLOR_BGR2HSV,       NVCV_COLOR_HSV2BGR,   5.0},
     { 366,  14,  5,    NVCV_IMAGE_FORMAT_RGB8,   NVCV_IMAGE_FORMAT_HSV8,       NVCV_COLOR_RGB2HSV,       NVCV_COLOR_HSV2RGB,   5.0},
+    {  55, 257,  4,    NVCV_IMAGE_FORMAT_BGRf32, NVCV_IMAGE_FORMAT_HSVf32,     NVCV_COLOR_BGR2HSV,       NVCV_COLOR_HSV2BGR,   1E-2},
+    { 366,  14,  5,    NVCV_IMAGE_FORMAT_RGBf32, NVCV_IMAGE_FORMAT_HSVf32,     NVCV_COLOR_RGB2HSV,       NVCV_COLOR_HSV2RGB,   1E-2},
     // Codes 42 to 53 and 56 to 65 and 68 to 69 are not implemented
     { 112, 157,  4,    NVCV_IMAGE_FORMAT_BGR8,   NVCV_IMAGE_FORMAT_HSV8,  NVCV_COLOR_BGR2HSV_FULL,  NVCV_COLOR_HSV2BGR_FULL,   8.0},
     { 333,  13,  3,    NVCV_IMAGE_FORMAT_RGB8,   NVCV_IMAGE_FORMAT_HSV8,  NVCV_COLOR_RGB2HSV_FULL,  NVCV_COLOR_HSV2RGB_FULL,   8.0},
     // Codes 72 to 81 are not implemented
     { 133,  22,  2,    NVCV_IMAGE_FORMAT_YUV8,   NVCV_IMAGE_FORMAT_BGR8,       NVCV_COLOR_YUV2BGR,       NVCV_COLOR_BGR2YUV, 128.0},
     { 123,  21,  3,    NVCV_IMAGE_FORMAT_YUV8,   NVCV_IMAGE_FORMAT_RGB8,       NVCV_COLOR_YUV2RGB,       NVCV_COLOR_RGB2YUV, 128.0},
+    { 133,  21,  3,    NVCV_IMAGE_FORMAT_YUV16,   NVCV_IMAGE_FORMAT_BGR16,       NVCV_COLOR_YUV2RGB,       NVCV_COLOR_RGB2YUV, 32768.0},
+    { 123,  21,  3,    NVCV_IMAGE_FORMAT_YUV16,   NVCV_IMAGE_FORMAT_RGB16,       NVCV_COLOR_YUV2RGB,       NVCV_COLOR_RGB2YUV, 32768.0},
+    { 133,  21,  3,    NVCV_IMAGE_FORMAT_YUVf32,   NVCV_IMAGE_FORMAT_BGRf32,       NVCV_COLOR_YUV2RGB,       NVCV_COLOR_RGB2YUV, 1E-2},
+    { 123,  21,  3,    NVCV_IMAGE_FORMAT_YUVf32,   NVCV_IMAGE_FORMAT_RGBf32,       NVCV_COLOR_YUV2RGB,       NVCV_COLOR_RGB2YUV, 1E-2},
     // Codes 86 to 89 are not implemented
     // Codes 90 to 147 dealing with subsampled planes (NV12, etc. formats) are postponed (see comment below)
     //     Codes 109, 110, 113, 114 dealing with VYUY format are not implemented
@@ -94,9 +188,35 @@ test::ValueList<int, int, int, NVCVImageFormat, NVCVImageFormat, NVCVColorConver
     // Code 148 is not implemented
 });
 
+#undef NVCV_IMAGE_FORMAT_RGBS8
+#undef NVCV_IMAGE_FORMAT_BGRS8
+#undef NVCV_IMAGE_FORMAT_RGBAS8
+#undef NVCV_IMAGE_FORMAT_BGRAS8
+
 #undef NVCV_IMAGE_FORMAT_Y16
 #undef NVCV_IMAGE_FORMAT_BGR16
 #undef NVCV_IMAGE_FORMAT_RGB16
+#undef NVCV_IMAGE_FORMAT_BGRA16
+#undef NVCV_IMAGE_FORMAT_RGBA16
+#undef NVCV_IMAGE_FORMAT_YUV16
+#undef NVCV_IMAGE_FORMAT_YS16
+#undef NVCV_IMAGE_FORMAT_BGRS16
+#undef NVCV_IMAGE_FORMAT_RGBS16
+#undef NVCV_IMAGE_FORMAT_BGRAS16
+#undef NVCV_IMAGE_FORMAT_RGBAS16
+
+#undef NVCV_IMAGE_FORMAT_BGRS32
+#undef NVCV_IMAGE_FORMAT_RGBS32
+#undef NVCV_IMAGE_FORMAT_BGRAS32
+#undef NVCV_IMAGE_FORMAT_RGBAS32
+#undef NVCV_IMAGE_FORMAT_YUVf32
+#undef NVCV_IMAGE_FORMAT_Yf32
+#undef NVCV_IMAGE_FORMAT_HSVf32
+
+#undef NVCV_IMAGE_FORMAT_BGRS64
+#undef NVCV_IMAGE_FORMAT_RGBS64
+#undef NVCV_IMAGE_FORMAT_BGRAS64
+#undef NVCV_IMAGE_FORMAT_RGBAS64
 
 // clang-format on
 
@@ -109,6 +229,9 @@ TEST_P(OpCvtColor, correct_output)
     nvcv::ImageFormat srcFormat{GetParamValue<3>()};
     nvcv::ImageFormat dstFormat{GetParamValue<4>()};
 
+    NVCVDataType nvcvDataType;
+    ASSERT_EQ(NVCV_SUCCESS, nvcvImageFormatGetPlaneDataType(srcFormat, 0, &nvcvDataType));
+
     NVCVColorConversionCode src2dstCode{GetParamValue<5>()};
     NVCVColorConversionCode dst2srcCode{GetParamValue<6>()};
 
@@ -138,12 +261,20 @@ TEST_P(OpCvtColor, correct_output)
 
     long srcBufSize = srcSampleStride * srcAccess->numSamples();
 
-    std::vector<uint8_t> srcVec(srcBufSize);
-
-    std::default_random_engine    randEng(0);
-    std::uniform_int_distribution rand(0u, 255u);
-
-    std::generate(srcVec.begin(), srcVec.end(), [&]() { return rand(randEng); });
+    std::vector<uint8_t>       srcVec(srcBufSize);
+    std::default_random_engine randEng(0);
+    switch (nvcvDataType)
+    {
+    case NVCV_DATA_TYPE_F32:
+    case NVCV_DATA_TYPE_2F32:
+    case NVCV_DATA_TYPE_3F32:
+    case NVCV_DATA_TYPE_4F32:
+        myGenerate(reinterpret_cast<float *>(srcVec.data()), srcVec.size() / sizeof(float), randEng);
+        break;
+    default:
+        myGenerate(reinterpret_cast<uint8_t *>(srcVec.data()), srcVec.size(), randEng);
+        break;
+    }
 
     // copy random input to device
     ASSERT_EQ(cudaSuccess, cudaMemcpy(srcData->basePtr(), srcVec.data(), srcBufSize, cudaMemcpyHostToDevice));
@@ -166,7 +297,18 @@ TEST_P(OpCvtColor, correct_output)
     // copy output back to host
     ASSERT_EQ(cudaSuccess, cudaMemcpy(testVec.data(), srcData->basePtr(), srcBufSize, cudaMemcpyDeviceToHost));
 
-    VEC_EXPECT_NEAR(testVec, srcVec, maxDiff);
+    switch (nvcvDataType)
+    {
+    case NVCV_DATA_TYPE_F32:
+    case NVCV_DATA_TYPE_2F32:
+    case NVCV_DATA_TYPE_3F32:
+    case NVCV_DATA_TYPE_4F32:
+        VEC_EXPECT_NEAR(testVec, srcVec, maxDiff, float);
+        break;
+    default:
+        VEC_EXPECT_NEAR(testVec, srcVec, maxDiff, uint8_t);
+        break;
+    }
 }
 
 TEST_P(OpCvtColor, varshape_correct_output)
@@ -181,6 +323,9 @@ TEST_P(OpCvtColor, varshape_correct_output)
     nvcv::ImageFormat srcFormat{GetParamValue<3>()};
     nvcv::ImageFormat dstFormat{GetParamValue<4>()};
 
+    NVCVDataType nvcvDataType;
+    ASSERT_EQ(NVCV_SUCCESS, nvcvImageFormatGetPlaneDataType(srcFormat, 0, &nvcvDataType));
+
     NVCVColorConversionCode src2dstCode{GetParamValue<5>()};
     NVCVColorConversionCode dst2srcCode{GetParamValue<6>()};
 
@@ -206,7 +351,18 @@ TEST_P(OpCvtColor, varshape_correct_output)
         std::uniform_int_distribution<uint8_t> udist(0, 255);
 
         srcVec[i].resize(imgSrc[i].size().h * srcRowStride);
-        std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return udist(rng); });
+        switch (nvcvDataType)
+        {
+        case NVCV_DATA_TYPE_F32:
+        case NVCV_DATA_TYPE_2F32:
+        case NVCV_DATA_TYPE_3F32:
+        case NVCV_DATA_TYPE_4F32:
+            myGenerate(reinterpret_cast<float *>(srcVec[i].data()), srcVec[i].size() / sizeof(float), rng);
+            break;
+        default:
+            myGenerate(reinterpret_cast<uint8_t *>(srcVec[i].data()), srcVec[i].size(), rng);
+            break;
+        }
 
         auto imgData = imgSrc[i].exportData<nvcv::ImageDataStridedCuda>();
         ASSERT_NE(imgData, nvcv::NullOpt);
@@ -257,8 +413,116 @@ TEST_P(OpCvtColor, varshape_correct_output)
                                             imgData->plane(0).rowStride, srcVecRowStride[i], imgSrc[i].size().h,
                                             cudaMemcpyDeviceToHost));
 
-        VEC_EXPECT_NEAR(testVec, srcVec[i], maxDiff);
+        switch (nvcvDataType)
+        {
+        case NVCV_DATA_TYPE_F32:
+        case NVCV_DATA_TYPE_2F32:
+        case NVCV_DATA_TYPE_3F32:
+        case NVCV_DATA_TYPE_4F32:
+            VEC_EXPECT_NEAR(testVec, srcVec[i], maxDiff, float);
+            break;
+        default:
+            VEC_EXPECT_NEAR(testVec, srcVec[i], maxDiff, uint8_t);
+            break;
+        }
     }
 }
 
+TEST(OpCvtColor_negative, create_with_null_handle)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, cvcudaCvtColorCreate(nullptr));
+}
+
+// clang-format off
+
+NVCV_TEST_SUITE_P(OpCvtColor_negative,
+test::ValueList<int, int, int, NVCVImageFormat, NVCVImageFormat, NVCVColorConversionCode>
+{
+    //  W,   H,  N,               inputFormat,              outputFormat,              in2outCode
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_Y8,   NVCV_IMAGE_FORMAT_BGRA8,     NVCV_COLOR_BGR2BGRA}, // invalid input channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGR8,   NVCV_IMAGE_FORMAT_BGRAf32,     NVCV_COLOR_BGR2BGRA}, // mismatch data type
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGR8,   NVCV_IMAGE_FORMAT_Y8,     NVCV_COLOR_BGR2BGRA}, // invalid output channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGR8,    NVCV_IMAGE_FORMAT_BGR8,     NVCV_COLOR_GRAY2BGR}, // invalid input channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_Y8,    NVCV_IMAGE_FORMAT_BGRf32,     NVCV_COLOR_GRAY2BGR}, // mismatch data type
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_Y8,    NVCV_IMAGE_FORMAT_BGRA8,     NVCV_COLOR_GRAY2BGR}, // invalid output channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGRA8,    NVCV_IMAGE_FORMAT_Y8,     NVCV_COLOR_BGR2GRAY}, // invalid input channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGRf32,    NVCV_IMAGE_FORMAT_Y8,     NVCV_COLOR_BGR2GRAY}, // mismatch data type
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGR8,    NVCV_IMAGE_FORMAT_BGRA8,     NVCV_COLOR_BGR2GRAY}, // invalid output channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGRA8,   NVCV_IMAGE_FORMAT_YUV8,       NVCV_COLOR_BGR2YUV,}, // invalid input channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGRf32,    NVCV_IMAGE_FORMAT_YUV8,     NVCV_COLOR_BGR2YUV}, // mismatch data type
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGR8,    NVCV_IMAGE_FORMAT_BGRA8,     NVCV_COLOR_BGR2YUV}, // invalid output channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGRA8,   NVCV_IMAGE_FORMAT_BGR8,       NVCV_COLOR_YUV2BGR,}, // invalid input channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_YUV8,    NVCV_IMAGE_FORMAT_BGRf32,     NVCV_COLOR_YUV2BGR}, // mismatch data type
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_YUV8,    NVCV_IMAGE_FORMAT_BGRA8,     NVCV_COLOR_YUV2BGR}, // invalid output channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGRA8,   NVCV_IMAGE_FORMAT_HSV8,       NVCV_COLOR_BGR2HSV}, // invalid input channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGRf32,   NVCV_IMAGE_FORMAT_HSV8,       NVCV_COLOR_BGR2HSV}, // mismatch data type
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGR8,   NVCV_IMAGE_FORMAT_BGRA8,       NVCV_COLOR_BGR2HSV}, // invalid output channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_BGRA8,   NVCV_IMAGE_FORMAT_BGR8,       NVCV_COLOR_HSV2BGR}, // invalid input channel
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_HSV8,   NVCV_IMAGE_FORMAT_BGRf32,       NVCV_COLOR_HSV2BGR}, // mismatch data type
+    {   8,   8,  3,    NVCV_IMAGE_FORMAT_HSV8,   NVCV_IMAGE_FORMAT_Y8,       NVCV_COLOR_HSV2BGR}, // invalid output channel
+});
+
+// clang-format on
+
+TEST_P(OpCvtColor_negative, invalid_input)
+{
+    int width   = GetParamValue<0>();
+    int height  = GetParamValue<1>();
+    int batches = GetParamValue<2>();
+
+    nvcv::ImageFormat srcFormat{GetParamValue<3>()};
+    nvcv::ImageFormat dstFormat{GetParamValue<4>()};
+
+    NVCVColorConversionCode src2dstCode{GetParamValue<5>()};
+
+    nvcv::Tensor srcTensor = nvcv::util::CreateTensor(batches, width, height, srcFormat);
+    nvcv::Tensor dstTensor = nvcv::util::CreateTensor(batches, width, height, dstFormat);
+
+    // run operator
+    cvcuda::CvtColor cvtColorOp;
+    EXPECT_ANY_THROW(cvtColorOp(nullptr, srcTensor, dstTensor, src2dstCode));
+}
+
+TEST_P(OpCvtColor_negative, varshape_invalid_input)
+{
+    int width   = GetParamValue<0>();
+    int height  = GetParamValue<1>();
+    int batches = GetParamValue<2>();
+
+    nvcv::ImageFormat srcFormat{GetParamValue<3>()};
+    nvcv::ImageFormat dstFormat{GetParamValue<4>()};
+
+    NVCVColorConversionCode src2dstCode{GetParamValue<5>()};
+
+    // Create input varshape
+    std::default_random_engine         rng;
+    std::uniform_int_distribution<int> udistWidth(width * 0.8, width * 1.1);
+    std::uniform_int_distribution<int> udistHeight(height * 0.8, height * 1.1);
+
+    std::vector<nvcv::Image> imgSrc;
+
+    for (int i = 0; i < batches; ++i)
+    {
+        imgSrc.emplace_back(nvcv::Size2D{udistWidth(rng), udistHeight(rng)}, srcFormat);
+    }
+
+    nvcv::ImageBatchVarShape batchSrc(batches);
+    batchSrc.pushBack(imgSrc.begin(), imgSrc.end());
+
+    // Create output varshape
+    std::vector<nvcv::Image> imgDst;
+
+    for (int i = 0; i < batches; ++i)
+    {
+        imgDst.emplace_back(imgSrc[i].size(), dstFormat);
+    }
+
+    nvcv::ImageBatchVarShape batchDst(batches);
+    batchDst.pushBack(imgDst.begin(), imgDst.end());
+
+    // run operator
+    cvcuda::CvtColor cvtColorOp;
+    EXPECT_ANY_THROW(cvtColorOp(nullptr, batchSrc, batchDst, src2dstCode));
+}
+
 #undef VEC_EXPECT_NEAR
diff --git a/tests/cvcuda/system/TestOpGammaContrast.cpp b/tests/cvcuda/system/TestOpGammaContrast.cpp
index e51f20772..155291a54 100644
--- a/tests/cvcuda/system/TestOpGammaContrast.cpp
+++ b/tests/cvcuda/system/TestOpGammaContrast.cpp
@@ -24,6 +24,7 @@
 #include <nvcv/ImageBatch.hpp>
 #include <nvcv/Tensor.hpp>
 #include <nvcv/TensorDataAccess.hpp>
+#include <nvcv/cuda/MathWrappers.hpp>
 #include <nvcv/cuda/TypeTraits.hpp>
 
 #include <random>
@@ -31,8 +32,6 @@
 namespace test = nvcv::test;
 namespace cuda = nvcv::cuda;
 
-// clang-format off
-
 #define DBG_GAMMA_CONTRAST 0
 
 static void printVec(std::vector<uint8_t> &vec, int height, int rowPitch, int bytesPerPixel, std::string name)
@@ -55,15 +54,29 @@ static void printVec(std::vector<uint8_t> &vec, int height, int rowPitch, int by
 #endif
 }
 
-static void GammaContrastVarShapeCpuOp(std::vector<uint8_t> &hDst, int dstRowStride, nvcv::Size2D dstSize, const std::vector<uint8_t> &hSrc,
-                   int srcRowStride, nvcv::Size2D srcSize, nvcv::ImageFormat fmt, const std::vector<float> gamma, const int imageIndex, bool perChannel)
+#define VEC_EXPECT_NEAR(vec1, vec2, delta, dtype)                                                                    \
+    ASSERT_EQ(vec1.size(), vec2.size());                                                                             \
+    for (std::size_t idx = 0; idx < vec1.size() / sizeof(dtype); ++idx)                                              \
+    {                                                                                                                \
+        EXPECT_NEAR(reinterpret_cast<dtype *>(vec1.data())[idx], reinterpret_cast<dtype *>(vec2.data())[idx], delta) \
+            << "At index " << idx;                                                                                   \
+    }
+
+namespace {
+
+// uint8 cpu op
+template<typename T>
+void GammaContrastVarShapeCpuOp(std::vector<T> &hDst, int dstRowStride, nvcv::Size2D dstSize,
+                                const std::vector<T> &hSrc, int srcRowStride, nvcv::Size2D srcSize,
+                                nvcv::ImageFormat fmt, const std::vector<float> gamma, const int imageIndex,
+                                bool perChannel)
 {
     assert(fmt.numPlanes() == 1);
 
     int elementsPerPixel = fmt.numChannels();
 
-    uint8_t       *dstPtr = hDst.data();
-    const uint8_t *srcPtr = hSrc.data();
+    T       *dstPtr = hDst.data();
+    const T *srcPtr = hSrc.data();
 
     for (int dst_y = 0; dst_y < dstSize.h; dst_y++)
     {
@@ -71,16 +84,71 @@ static void GammaContrastVarShapeCpuOp(std::vector<uint8_t> &hDst, int dstRowStr
         {
             for (int k = 0; k < elementsPerPixel; k++)
             {
-                int index = dst_y * dstRowStride + dst_x * elementsPerPixel + k;
+                int   index     = dst_y * dstRowStride + dst_x * elementsPerPixel + k;
                 float gamma_tmp = perChannel ? gamma[imageIndex * elementsPerPixel + k] : gamma[imageIndex];
-                float tmp   = (srcPtr[index] + 0.0f) / 255.0f;
-                uint8_t out  = std::rint(pow(tmp, gamma_tmp) * 255.0f);
-                dstPtr[index] = out;
+                float tmp       = (srcPtr[index] + 0.0f) / 255.0f;
+                T     out       = std::rint(pow(tmp, gamma_tmp) * 255.0f);
+                dstPtr[index]   = out;
             }
         }
     }
 }
 
+// float cpu op
+template<>
+void GammaContrastVarShapeCpuOp(std::vector<float> &hDst, int dstRowStride, nvcv::Size2D dstSize,
+                                const std::vector<float> &hSrc, int srcRowStride, nvcv::Size2D srcSize,
+                                nvcv::ImageFormat fmt, const std::vector<float> gamma, const int imageIndex,
+                                bool perChannel)
+{
+    assert(fmt.numPlanes() == 1);
+
+    int elementsPerPixel = fmt.numChannels();
+
+    for (int dst_y = 0; dst_y < dstSize.h; dst_y++)
+    {
+        for (int dst_x = 0; dst_x < dstSize.w; dst_x++)
+        {
+            for (int k = 0; k < elementsPerPixel; k++)
+            {
+                int   index     = dst_y * dstRowStride + dst_x * elementsPerPixel + k;
+                float gamma_tmp = perChannel ? gamma[imageIndex * elementsPerPixel + k] : gamma[imageIndex];
+                float out       = nvcv::cuda::clamp(nvcv::cuda::pow(hSrc[index], gamma_tmp), 0.f, 1.f);
+                hDst[index]     = out;
+            }
+        }
+    }
+}
+
+void GammaContrastVarShapeCpuOpWrapper(std::vector<uint8_t> &hDst, int dstRowStride, nvcv::Size2D dstSize,
+                                       const std::vector<uint8_t> &hSrc, int srcRowStride, nvcv::Size2D srcSize,
+                                       nvcv::ImageFormat fmt, const std::vector<float> gamma, const int imageIndex,
+                                       bool perChannel, NVCVDataType nvcvDataType)
+{
+    if (nvcvDataType == NVCV_DATA_TYPE_F32 || nvcvDataType == NVCV_DATA_TYPE_2F32 || nvcvDataType == NVCV_DATA_TYPE_3F32
+        || nvcvDataType == NVCV_DATA_TYPE_4F32)
+    {
+        std::vector<float> src_tmp(hSrc.size() / sizeof(float));
+        std::vector<float> dst_tmp(hDst.size() / sizeof(float));
+        size_t             copySize = hSrc.size();
+        memcpy(static_cast<void *>(src_tmp.data()), const_cast<void *>(static_cast<const void *>(hSrc.data())),
+               copySize);
+        memcpy(static_cast<void *>(dst_tmp.data()), static_cast<void *>(hDst.data()), copySize);
+        GammaContrastVarShapeCpuOp(dst_tmp, dstRowStride / sizeof(float), dstSize, src_tmp,
+                                   srcRowStride / sizeof(float), srcSize, fmt, gamma, imageIndex, perChannel);
+        memcpy(static_cast<void *>(hDst.data()), static_cast<void *>(dst_tmp.data()), copySize);
+    }
+    else
+    {
+        GammaContrastVarShapeCpuOp(hDst, dstRowStride, dstSize, hSrc, srcRowStride, srcSize, fmt, gamma, imageIndex,
+                                   perChannel);
+    }
+}
+
+} // namespace
+
+// clang-format off
+
 NVCV_TEST_SUITE_P(OpGammaContrast, test::ValueList<int, int, int, NVCVImageFormat, float, bool>
 {
     // width, height, batches,                    format,  Gamma,  per channel
@@ -97,6 +165,20 @@ NVCV_TEST_SUITE_P(OpGammaContrast, test::ValueList<int, int, int, NVCVImageForma
     {   11,    11,       4,   NVCV_IMAGE_FORMAT_RGBA8,        0.4,      false},
     {   7,      8,       3,    NVCV_IMAGE_FORMAT_RGB8,        0.9,      false},
     {   7,      6,       4,   NVCV_IMAGE_FORMAT_RGBA8,        0.8,      false},
+
+    {   5,      5,       1,     NVCV_IMAGE_FORMAT_F32,       0.5,        true},
+    {   9,     11,       2,     NVCV_IMAGE_FORMAT_F32,      0.75,        true},
+    {   12,     7,       3,  NVCV_IMAGE_FORMAT_RGBf32,       1.0,        true},
+    {   11,    11,       4, NVCV_IMAGE_FORMAT_RGBAf32,       0.4,        true},
+    {   7,      8,       3,  NVCV_IMAGE_FORMAT_RGBf32,       0.9,        true},
+    {   7,      6,       4, NVCV_IMAGE_FORMAT_RGBAf32,       0.8,        true},
+
+    {   5,      5,       1,     NVCV_IMAGE_FORMAT_F32,        0.5,      false},
+    {   9,     11,       2,     NVCV_IMAGE_FORMAT_F32,       0.75,      false},
+    {   12,     7,       3,  NVCV_IMAGE_FORMAT_RGBf32,        1.0,      false},
+    {   11,    11,       4, NVCV_IMAGE_FORMAT_RGBAf32,        0.4,      false},
+    {   7,      8,       3,  NVCV_IMAGE_FORMAT_RGBf32,        0.9,      false},
+    {   7,      6,       4, NVCV_IMAGE_FORMAT_RGBAf32,        0.8,      false},
 });
 
 // clang-format on
@@ -112,7 +194,10 @@ TEST_P(OpGammaContrast, varshape_correct_output)
 
     nvcv::ImageFormat format{GetParamValue<3>()};
 
-    float gamma = GetParamValue<4>();
+    NVCVDataType nvcvDataType;
+    ASSERT_EQ(NVCV_SUCCESS, nvcvImageFormatGetPlaneDataType(format, 0, &nvcvDataType));
+    float gamma       = GetParamValue<4>();
+    bool  isFloatTest = false;
 
     bool perChannel = GetParamValue<5>();
 
@@ -135,9 +220,25 @@ TEST_P(OpGammaContrast, varshape_correct_output)
         srcVecRowStride[i] = srcRowStride;
 
         std::uniform_int_distribution<uint8_t> udist(0, 255);
+        std::uniform_real_distribution<float>  udistf(0.f, 1.f);
 
         srcVec[i].resize(imgSrc[i].size().h * srcRowStride);
-        std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return udist(rng); });
+        switch (nvcvDataType)
+        {
+        case NVCV_DATA_TYPE_F32:
+        case NVCV_DATA_TYPE_2F32:
+        case NVCV_DATA_TYPE_3F32:
+        case NVCV_DATA_TYPE_4F32:
+            isFloatTest = true;
+            for (size_t idx = 0; idx < (srcVec[i].size() / sizeof(float)); ++idx)
+            {
+                reinterpret_cast<float *>(srcVec[i].data())[idx] = udistf(rng);
+            }
+            break;
+        default:
+            std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return udist(rng); });
+            break;
+        }
 
         auto imgData = imgSrc[i].exportData<nvcv::ImageDataStridedCuda>();
         ASSERT_NE(imgData, nvcv::NullOpt);
@@ -228,13 +329,27 @@ TEST_P(OpGammaContrast, varshape_correct_output)
         std::generate(goldVec.begin(), goldVec.end(), [&]() { return 0; });
 
         // Generate gold result
-        GammaContrastVarShapeCpuOp(goldVec, dstRowStride, {dstWidth, dstHeight}, srcVec[i], srcRowStride,
-                                   {srcWidth, srcHeight}, format, gammaVec, i, perChannel);
+        GammaContrastVarShapeCpuOpWrapper(goldVec, dstRowStride, {dstWidth, dstHeight}, srcVec[i], srcRowStride,
+                                          {srcWidth, srcHeight}, format, gammaVec, i, perChannel, nvcvDataType);
 
         printVec(goldVec, srcHeight, dstRowStride, format.numChannels(), "golden output");
 
         printVec(testVec, srcHeight, dstRowStride, format.numChannels(), "operator output");
 
-        EXPECT_EQ(testVec, goldVec);
+        if (!isFloatTest)
+        {
+            EXPECT_EQ(testVec, goldVec);
+        }
+        else
+        {
+            VEC_EXPECT_NEAR(testVec, goldVec, 1E-6F, float);
+        }
     }
 }
+
+TEST(OpGammaContrast_negative, create_with_null_handle)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, cvcudaGammaContrastCreate(nullptr, 4, 4));
+}
+
+#undef VEC_EXPECT_NEAR
diff --git a/tests/cvcuda/system/TestOpGaussianNoise.cpp b/tests/cvcuda/system/TestOpGaussianNoise.cpp
index 6495061b4..a8fe5419b 100644
--- a/tests/cvcuda/system/TestOpGaussianNoise.cpp
+++ b/tests/cvcuda/system/TestOpGaussianNoise.cpp
@@ -19,18 +19,22 @@
 
 #include "GaussianNoiseUtils.cuh"
 
+#include <common/InterpUtils.hpp>
 #include <common/ValueTests.hpp>
 #include <cvcuda/OpGaussianNoise.hpp>
 #include <nvcv/Image.hpp>
 #include <nvcv/ImageBatch.hpp>
 #include <nvcv/Tensor.hpp>
 #include <nvcv/TensorDataAccess.hpp>
+#include <nvcv/cuda/MathWrappers.hpp>
+#include <nvcv/cuda/StaticCast.hpp>
 #include <util/TensorDataUtils.hpp>
 
 #include <cmath>
 #include <iostream>
 #include <random>
 
+namespace {
 inline uint8_t cast(float value)
 {
     int v = (int)(value + (value >= 0 ? 0.5 : -0.5));
@@ -39,7 +43,7 @@ inline uint8_t cast(float value)
 
 //test for RGB8
 template<typename T>
-static void GaussianNoise(std::vector<T> &src, std::vector<T> &dst, float mu, float sigma, int batch, bool per_channel)
+void GaussianNoise(std::vector<T> &src, std::vector<T> &dst, float mu, float sigma, int batch, bool per_channel)
 {
     int mem_size = src.size();
     if (!per_channel)
@@ -69,6 +73,39 @@ static void GaussianNoise(std::vector<T> &src, std::vector<T> &dst, float mu, fl
     free(rand_h);
 }
 
+// test for float
+template<>
+void GaussianNoise(std::vector<float> &src, std::vector<float> &dst, float mu, float sigma, int batch, bool per_channel)
+{
+    int mem_size = src.size();
+    if (!per_channel)
+        mem_size /= 3;
+    float *rand_h = (float *)malloc(sizeof(float) * mem_size);
+    get_random(rand_h, per_channel, batch, mem_size);
+
+    int img_size = src.size() / 3;
+    for (int i = 0; i < img_size; i++)
+    {
+        if (per_channel)
+        {
+            for (int ch = 0; ch < 3; ch++)
+            {
+                float delta     = mu + rand_h[i * 3 + ch] * sigma;
+                dst[i * 3 + ch] = nvcv::cuda::clamp(nvcv::cuda::StaticCast<float>(src[i * 3 + ch] + delta), 0.f, 1.f);
+            }
+        }
+        else
+        {
+            float delta    = mu + rand_h[i] * sigma;
+            dst[i * 3]     = nvcv::cuda::clamp(nvcv::cuda::StaticCast<float>(src[i * 3] + delta), 0.f, 1.f);
+            dst[i * 3 + 1] = nvcv::cuda::clamp(nvcv::cuda::StaticCast<float>(src[i * 3 + 1] + delta), 0.f, 1.f);
+            dst[i * 3 + 2] = nvcv::cuda::clamp(nvcv::cuda::StaticCast<float>(src[i * 3 + 2] + delta), 0.f, 1.f);
+        }
+    }
+    free(rand_h);
+}
+} // namespace
+
 // clang-format off
 NVCV_TEST_SUITE_P(OpGaussianNoise, nvcv::test::ValueList<int, int, int, float, float, bool>
 {
@@ -81,22 +118,15 @@ NVCV_TEST_SUITE_P(OpGaussianNoise, nvcv::test::ValueList<int, int, int, float, f
 
 // clang-format on
 
-TEST_P(OpGaussianNoise, tensor_correct_output)
+template<typename datatype>
+static void tensor_correct_output_test(int batch, int height, int width, float mu, float sigma, bool per_channel)
 {
     cudaStream_t stream;
     EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
 
-    int   batch       = GetParamValue<0>();
-    int   height      = GetParamValue<1>();
-    int   width       = GetParamValue<2>();
-    float mu          = GetParamValue<3>();
-    float sigma       = GetParamValue<4>();
-    bool  per_channel = GetParamValue<5>();
-
-    nvcv::ImageFormat fmt = nvcv::FMT_RGB8;
-    using datatype        = uint8_t;
-    nvcv::Tensor imgIn    = nvcv::util::CreateTensor(batch, width, height, fmt);
-    nvcv::Tensor imgOut   = nvcv::util::CreateTensor(batch, width, height, fmt);
+    nvcv::ImageFormat fmt    = std::is_same<datatype, uint8_t>::value ? nvcv::FMT_RGB8 : nvcv::FMT_RGBf32;
+    nvcv::Tensor      imgIn  = nvcv::util::CreateTensor(batch, width, height, fmt);
+    nvcv::Tensor      imgOut = nvcv::util::CreateTensor(batch, width, height, fmt);
 
     auto inData = imgIn.exportData<nvcv::TensorDataStridedCuda>();
     ASSERT_NE(nullptr, inData);
@@ -142,15 +172,24 @@ TEST_P(OpGaussianNoise, tensor_correct_output)
                                            cudaMemcpyHostToDevice, stream));
 
     //Generate input
-    std::vector<std::vector<uint8_t>> srcVec(batch);
-    std::default_random_engine        randEng;
-    int                               rowStride = width * fmt.planePixelStrideBytes(0);
+    std::vector<std::vector<datatype>> srcVec(batch);
+    std::default_random_engine         randEng;
+    int                                rowStride = width * fmt.planePixelStrideBytes(0);
 
     for (int i = 0; i < batch; i++)
     {
-        std::uniform_int_distribution<uint8_t> rand(0, 255);
-        srcVec[i].resize(height * rowStride / sizeof(datatype));
-        std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return rand(randEng); });
+        if constexpr (std::is_same<datatype, uint8_t>::value)
+        {
+            std::uniform_int_distribution<uint8_t> rand(0, 255);
+            srcVec[i].resize(height * rowStride / sizeof(datatype));
+            std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return rand(randEng); });
+        }
+        else
+        {
+            std::uniform_real_distribution<float> rand(0.f, 1.f);
+            srcVec[i].resize(height * rowStride / sizeof(datatype));
+            std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return rand(randEng); });
+        }
         ASSERT_EQ(cudaSuccess, cudaMemcpy2D(inAccess->sampleData(i), inAccess->rowStride(), srcVec[i].data(), rowStride,
                                             rowStride, height, cudaMemcpyHostToDevice));
     }
@@ -180,20 +219,35 @@ TEST_P(OpGaussianNoise, tensor_correct_output)
     EXPECT_EQ(cudaSuccess, cudaStreamDestroy(stream));
 }
 
-TEST_P(OpGaussianNoise, varshape_correct_shape)
+TEST_P(OpGaussianNoise, tensor_correct_output)
 {
-    cudaStream_t stream;
-    EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
+    int   batch       = GetParamValue<0>();
+    int   height      = GetParamValue<1>();
+    int   width       = GetParamValue<2>();
+    float mu          = GetParamValue<3>();
+    float sigma       = GetParamValue<4>();
+    bool  per_channel = GetParamValue<5>();
+    tensor_correct_output_test<uint8_t>(batch, height, width, mu, sigma, per_channel);
+}
 
+TEST_P(OpGaussianNoise, tensor_correct_output_float)
+{
     int   batch       = GetParamValue<0>();
     int   height      = GetParamValue<1>();
     int   width       = GetParamValue<2>();
     float mu          = GetParamValue<3>();
     float sigma       = GetParamValue<4>();
     bool  per_channel = GetParamValue<5>();
+    tensor_correct_output_test<float>(batch, height, width, mu, sigma, per_channel);
+}
 
-    nvcv::ImageFormat fmt = nvcv::FMT_RGB8;
-    using datatype        = uint8_t;
+template<typename datatype>
+static void varshape_correct_output_test(int batch, int height, int width, float mu, float sigma, bool per_channel)
+{
+    cudaStream_t stream;
+    EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
+
+    nvcv::ImageFormat fmt = std::is_same<datatype, uint8_t>::value ? nvcv::FMT_RGB8 : nvcv::FMT_RGBf32;
 
     // Create input and output
     std::default_random_engine         randEng;
@@ -247,10 +301,18 @@ TEST_P(OpGaussianNoise, varshape_correct_shape)
 
         int srcRowStride = srcWidth * fmt.planePixelStrideBytes(0);
 
-        std::uniform_int_distribution<uint8_t> rand(0, 255);
-
-        srcVec[i].resize(srcHeight * srcRowStride / sizeof(datatype));
-        std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return rand(randEng); });
+        if constexpr (std::is_same<datatype, uint8_t>::value)
+        {
+            std::uniform_int_distribution<uint8_t> rand(0, 255);
+            srcVec[i].resize(srcHeight * srcRowStride / sizeof(datatype));
+            std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return rand(randEng); });
+        }
+        else
+        {
+            std::uniform_real_distribution<float> rand(0.f, 1.f);
+            srcVec[i].resize(srcHeight * srcRowStride / sizeof(datatype));
+            std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return rand(randEng); });
+        }
 
         // Copy input data to the GPU
         ASSERT_EQ(cudaSuccess, cudaMemcpy2D(srcData->plane(0).basePtr, srcData->plane(0).rowStride, srcVec[i].data(),
@@ -292,3 +354,135 @@ TEST_P(OpGaussianNoise, varshape_correct_shape)
 
     EXPECT_EQ(cudaSuccess, cudaStreamDestroy(stream));
 }
+
+TEST_P(OpGaussianNoise, varshape_correct_shape)
+{
+    int   batch       = GetParamValue<0>();
+    int   height      = GetParamValue<1>();
+    int   width       = GetParamValue<2>();
+    float mu          = GetParamValue<3>();
+    float sigma       = GetParamValue<4>();
+    bool  per_channel = GetParamValue<5>();
+
+    varshape_correct_output_test<uint8_t>(batch, height, width, mu, sigma, per_channel);
+}
+
+TEST_P(OpGaussianNoise, varshape_correct_shape_float)
+{
+    int   batch       = GetParamValue<0>();
+    int   height      = GetParamValue<1>();
+    int   width       = GetParamValue<2>();
+    float mu          = GetParamValue<3>();
+    float sigma       = GetParamValue<4>();
+    bool  per_channel = GetParamValue<5>();
+
+    varshape_correct_output_test<float>(batch, height, width, mu, sigma, per_channel);
+}
+
+TEST(OpGaussianNoise_negative, create_with_null_handle)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, cvcudaGaussianNoiseCreate(nullptr, 10));
+}
+
+TEST(OpGaussianNoise_negative, create_with_negative_batch)
+{
+    NVCVOperatorHandle opHandle;
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, cvcudaGaussianNoiseCreate(&opHandle, -1));
+}
+
+TEST(OpGaussianNoise_negative, invalid_mu_sigma_layout)
+{
+    nvcv::Tensor imgIn(
+        {
+            {24, 24, 2},
+            "HWC"
+    },
+        nvcv::TYPE_U8);
+    nvcv::Tensor imgOut(
+        {
+            {24, 24, 2},
+            "HWC"
+    },
+        nvcv::TYPE_U8);
+
+    //parameters
+    nvcv::Tensor muval({{2}, "N"}, nvcv::TYPE_F32);
+    nvcv::Tensor sigmaval({{2}, "N"}, nvcv::TYPE_F32);
+
+    // invalid mu parameters
+    nvcv::Tensor invalidMuval(
+        {
+            {2, 2, 2},
+            "HWC"
+    },
+        nvcv::TYPE_F32);
+    nvcv::Tensor invalidSigmaval(
+        {
+            {2, 2, 2},
+            "HWC"
+    },
+        nvcv::TYPE_F32);
+
+    // Call operator
+    int                   maxBatch = 4;
+    unsigned long long    seed     = 12345;
+    cvcuda::GaussianNoise GaussianNoiseOp(maxBatch);
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcv::ProtectCall([&] { GaussianNoiseOp(NULL, imgIn, imgOut, invalidMuval, sigmaval, false, seed); }));
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcv::ProtectCall([&] { GaussianNoiseOp(NULL, imgIn, imgOut, muval, invalidSigmaval, false, seed); }));
+}
+
+// clang-format off
+NVCV_TEST_SUITE_P(OpGaussianNoise_negative, nvcv::test::ValueList<std::string, nvcv::DataType, std::string, nvcv::DataType, std::string, nvcv::DataType, std::string, nvcv::DataType, NVCVStatus>
+{
+    //   in_layout,        in_data_type,   out_layout,     out_data_type,     mu_layout,         mu_data_type,    sigma_layout,    sigma_data_type,    expected_return_status
+    {        "CHW",       nvcv::TYPE_U8,        "HWC",     nvcv::TYPE_U8,           "N",       nvcv::TYPE_F32,             "N",     nvcv::TYPE_F32,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",       nvcv::TYPE_U8,        "CHW",     nvcv::TYPE_U8,           "N",       nvcv::TYPE_F32,             "N",     nvcv::TYPE_F32,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",      nvcv::TYPE_F64,        "HWC",     nvcv::TYPE_U8,           "N",       nvcv::TYPE_F32,             "N",     nvcv::TYPE_F32,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",       nvcv::TYPE_U8,        "HWC",    nvcv::TYPE_F64,           "N",       nvcv::TYPE_F32,             "N",     nvcv::TYPE_F32,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",      nvcv::TYPE_U32,        "HWC",     nvcv::TYPE_U8,           "N",       nvcv::TYPE_F32,             "N",     nvcv::TYPE_F32,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",       nvcv::TYPE_U8,        "HWC",    nvcv::TYPE_U32,           "N",       nvcv::TYPE_F32,             "N",     nvcv::TYPE_F32,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",       nvcv::TYPE_U8,        "HWC",    nvcv::TYPE_U16,           "N",       nvcv::TYPE_F32,             "N",     nvcv::TYPE_F32,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",       nvcv::TYPE_U8,        "HWC",     nvcv::TYPE_U8,           "N",       nvcv::TYPE_F64,             "N",     nvcv::TYPE_F32,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",       nvcv::TYPE_U8,        "HWC",     nvcv::TYPE_U8,           "N",       nvcv::TYPE_F32,             "N",     nvcv::TYPE_F64,    NVCV_ERROR_INVALID_ARGUMENT},
+});
+
+// clang-format on
+
+TEST_P(OpGaussianNoise_negative, infer_negative_parameter)
+{
+    std::string    in_layout              = GetParamValue<0>();
+    nvcv::DataType in_data_type           = GetParamValue<1>();
+    std::string    out_layout             = GetParamValue<2>();
+    nvcv::DataType out_data_type          = GetParamValue<3>();
+    std::string    mu_layout              = GetParamValue<4>();
+    nvcv::DataType mu_data_type           = GetParamValue<5>();
+    std::string    sigma_layout           = GetParamValue<6>();
+    nvcv::DataType sigma_data_type        = GetParamValue<7>();
+    NVCVStatus     expected_return_status = GetParamValue<8>();
+
+    nvcv::Tensor imgIn(
+        {
+            {24, 24, 2},
+            in_layout.c_str()
+    },
+        in_data_type);
+    nvcv::Tensor imgOut(
+        {
+            {24, 24, 2},
+            out_layout.c_str()
+    },
+        out_data_type);
+
+    //parameters
+    nvcv::Tensor muval({{2}, mu_layout.c_str()}, mu_data_type);
+    nvcv::Tensor sigmaval({{2}, sigma_layout.c_str()}, sigma_data_type);
+
+    // Call operator
+    int                   maxBatch = 4;
+    unsigned long long    seed     = 12345;
+    cvcuda::GaussianNoise GaussianNoiseOp(maxBatch);
+    EXPECT_EQ(expected_return_status,
+              nvcv::ProtectCall([&] { GaussianNoiseOp(NULL, imgIn, imgOut, muval, sigmaval, false, seed); }));
+}
diff --git a/tests/cvcuda/system/TestOpHQResize.cpp b/tests/cvcuda/system/TestOpHQResize.cpp
new file mode 100644
index 000000000..f9ce474af
--- /dev/null
+++ b/tests/cvcuda/system/TestOpHQResize.cpp
@@ -0,0 +1,1320 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <common/InterpUtils.hpp>
+#include <common/TypedTests.hpp>
+#include <cvcuda/OpHQResize.hpp>
+#include <nvcv/Image.hpp>
+#include <nvcv/ImageBatch.hpp>
+#include <nvcv/Tensor.hpp>
+#include <nvcv/TensorBatch.hpp>
+#include <nvcv/TensorDataAccess.hpp>
+#include <nvcv/cuda/DropCast.hpp>
+#include <nvcv/cuda/StaticCast.hpp>
+#include <nvcv/cuda/TypeTraits.hpp>
+#include <util/Math.hpp>
+#include <util/TensorDataUtils.hpp>
+
+#include <cmath>
+#include <iostream>
+#include <random>
+#include <vector>
+
+namespace cuda  = nvcv::cuda;
+namespace test  = nvcv::test;
+namespace ttype = nvcv::test::type;
+using uchar     = unsigned char;
+
+template<typename T>
+using uniform_distribution
+    = std::conditional_t<std::is_integral_v<T>, std::uniform_int_distribution<T>, std::uniform_real_distribution<T>>;
+
+namespace baseline {
+
+template<typename Cb>
+void ForAll(int2 shape, Cb &&cb)
+{
+    for (int y = 0; y < shape.y; y++)
+    {
+        for (int x = 0; x < shape.x; x++)
+        {
+            cb(int2{x, y});
+        }
+    }
+}
+
+template<typename Cb>
+void ForAll(int3 shape, Cb &&cb)
+{
+    for (int z = 0; z < shape.z; z++)
+        for (int y = 0; y < shape.y; y++)
+        {
+            for (int x = 0; x < shape.x; x++)
+            {
+                cb(int3{x, y, z});
+            }
+        }
+}
+
+template<typename BT, int kSpatialNDim>
+struct CpuSample
+{
+    static_assert(!cuda::IsCompound<BT>);
+    using ShapeT   = cuda::MakeType<int, kSpatialNDim>;         // WH or WHD
+    using StridesT = cuda::MakeType<int64_t, kSpatialNDim + 1>; // WHN or WHDN
+
+    CpuSample(int64_t size, StridesT strides, int numSamples, ShapeT shape, int numChannels)
+        : m_data(size)
+        , m_strides{strides}
+        , m_numSamples{numSamples}
+        , m_shape{shape}
+        , m_numChannels{numChannels}
+    {
+    }
+
+    BT &get(int sampleIdx, const ShapeT idx, int channel)
+    {
+        return *(reinterpret_cast<BT *>(m_data.data() + offset(sampleIdx, idx)) + channel);
+    }
+
+    uint8_t *data()
+    {
+        return m_data.data();
+    }
+
+    StridesT strides()
+    {
+        return m_strides;
+    }
+
+    ShapeT shape()
+    {
+        return m_shape;
+    }
+
+    int numSamples()
+    {
+        return m_numSamples;
+    }
+
+    int numChannels()
+    {
+        return m_numChannels;
+    }
+
+private:
+    int offset(int sampleIdx, int2 idx)
+    {
+        return sampleIdx * m_strides.z + idx.y * m_strides.y + idx.x * m_strides.x;
+    }
+
+    int offset(int sampleIdx, int3 idx)
+    {
+        return sampleIdx * m_strides.w + idx.z * m_strides.z + idx.y * m_strides.y + idx.x * m_strides.x;
+    }
+
+    std::vector<uint8_t> m_data;
+    StridesT             m_strides;
+    int                  m_numSamples;
+    ShapeT               m_shape;
+    int                  m_numChannels;
+};
+
+inline CpuSample<float, 2> GetIntermediate(int numSamples, int2 shape, int numChannels)
+{
+    int64_t                    size = sizeof(float) * numSamples * shape.y * shape.x * numChannels;
+    cuda::MakeType<int64_t, 3> strides;
+    strides.x = sizeof(float) * numChannels;
+    strides.y = strides.x * shape.x;
+    strides.z = strides.y * shape.y;
+    return {size, strides, numSamples, shape, numChannels};
+}
+
+inline CpuSample<float, 3> GetIntermediate(int numSamples, int3 shape, int numChannels)
+{
+    int64_t                    size = sizeof(float) * numSamples * shape.z * shape.y * shape.x * numChannels;
+    cuda::MakeType<int64_t, 4> strides;
+    strides.x = sizeof(float) * numChannels;
+    strides.y = strides.x * shape.x;
+    strides.z = strides.y * shape.y;
+    strides.w = strides.z * shape.z;
+    return {size, strides, numSamples, shape, numChannels};
+}
+
+struct FilterTriangular
+{
+    int size() const
+    {
+        return 3;
+    }
+
+    float operator[](int k) const
+    {
+        return k == 1 ? 1 : 0;
+    }
+};
+
+struct FilterCubic
+{
+    int size() const
+    {
+        return 129;
+    }
+
+    float operator[](int k) const
+    {
+        float x = 4 * (k - (size() - 1) * 0.5f) / (size() - 1);
+        x       = fabsf(x);
+        if (x >= 2)
+            return 0;
+
+        float x2 = x * x;
+        float x3 = x2 * x;
+        if (x > 1)
+            return -0.5f * x3 + 2.5f * x2 - 4.0f * x + 2.0f;
+        else
+            return 1.5f * x3 - 2.5f * x2 + 1.0f;
+    }
+};
+
+struct FilterGaussian
+{
+    int size() const
+    {
+        return 65;
+    }
+
+    float operator[](int k) const
+    {
+        float x = 4 * (k - (size() - 1) * 0.5f) / (size() - 1);
+        return expf(-x * x);
+    }
+};
+
+struct FilterLanczos
+{
+    static constexpr int kLanczosA          = 3;
+    static constexpr int kLanczosResolution = 32;
+
+    int size() const
+    {
+        return (2 * kLanczosA * kLanczosResolution + 1);
+    }
+
+    float operator[](int k) const
+    {
+        float x = 2 * kLanczosA * (k - (size() - 1) * 0.5f) / (size() - 1);
+        if (fabsf(x) >= kLanczosA)
+            return 0.0f;
+        return nvcv::util::sinc(x) * nvcv::util::sinc(x / kLanczosA);
+    }
+};
+
+template<typename FilterType>
+struct Filter
+{
+    Filter(float support)
+        : m_filter{}
+        , m_support{support}
+    {
+    }
+
+    float support() const
+    {
+        return std::ceil(m_support);
+    }
+
+    float scale() const
+    {
+        return (m_filter.size() - 1) / m_support;
+    }
+
+    float anchor() const
+    {
+        return m_support / 2;
+    }
+
+    float operator()(float x) const
+    {
+        if (!(x > -1))
+            return 0;
+        if (x >= m_filter.size())
+            return 0;
+        int   x0 = std::floor(x);
+        int   x1 = x0 + 1;
+        float d  = x - x0;
+        float f0 = x0 < 0 ? 0.0f : m_filter[x0];
+        float f1 = x1 >= m_filter.size() ? 0.0f : m_filter[x1];
+        return f0 + d * (f1 - f0);
+    }
+
+private:
+    FilterType m_filter;
+    float      m_support;
+};
+
+template<typename OutBT, typename InBT, int kSpatialNDim>
+void RunNN(int axis, CpuSample<OutBT, kSpatialNDim> &outTensorCpu, CpuSample<InBT, kSpatialNDim> &inTensorCpu)
+{
+    const int   numSamples  = inTensorCpu.numSamples();
+    const int   numChannels = inTensorCpu.numChannels();
+    const auto  inShape     = inTensorCpu.shape();
+    const auto  outShape    = outTensorCpu.shape();
+    const int   inSize      = cuda::GetElement(inShape, axis);
+    const int   outSize     = cuda::GetElement(outShape, axis);
+    const float axisScale   = static_cast<float>(inSize) / outSize;
+    const float axisOrigin  = 0.5f * axisScale;
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        ForAll(outShape,
+               [&](const cuda::MakeType<int, kSpatialNDim> outIdx)
+               {
+                   auto inIdx                    = outIdx;
+                   int  inAxis                   = std::floor(cuda::GetElement(outIdx, axis) * axisScale + axisOrigin);
+                   inAxis                        = inAxis < 0 ? 0 : (inAxis > inSize - 1 ? inSize - 1 : inAxis);
+                   cuda::GetElement(inIdx, axis) = inAxis;
+                   for (int c = 0; c < numChannels; c++)
+                   {
+                       outTensorCpu.get(sampleIdx, outIdx, c)
+                           = cuda::SaturateCast<OutBT>(inTensorCpu.get(sampleIdx, inIdx, c));
+                   }
+               });
+    }
+}
+
+template<typename OutBT, typename InBT, int kSpatialNDim>
+void RunLinear(int axis, CpuSample<OutBT, kSpatialNDim> &outTensorCpu, CpuSample<InBT, kSpatialNDim> &inTensorCpu)
+{
+    const int   numSamples  = inTensorCpu.numSamples();
+    const int   numChannels = inTensorCpu.numChannels();
+    const auto  inShape     = inTensorCpu.shape();
+    const auto  outShape    = outTensorCpu.shape();
+    const int   inSize      = cuda::GetElement(inShape, axis);
+    const int   outSize     = cuda::GetElement(outShape, axis);
+    const float axisScale   = static_cast<float>(inSize) / outSize;
+    const float axisOrigin  = 0.5f * axisScale - 0.5f;
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        ForAll(outShape,
+               [&](const cuda::MakeType<int, kSpatialNDim> outIdx)
+               {
+                   const float inAxis0f           = cuda::GetElement(outIdx, axis) * axisScale + axisOrigin;
+                   int         inAxis0            = std::floor(inAxis0f);
+                   int         inAxis1            = inAxis0 + 1;
+                   const float q                  = inAxis0f - inAxis0;
+                   inAxis0                        = inAxis0 < 0 ? 0 : (inAxis0 > inSize - 1 ? inSize - 1 : inAxis0);
+                   inAxis1                        = inAxis1 < 0 ? 0 : (inAxis1 > inSize - 1 ? inSize - 1 : inAxis1);
+                   auto inIdx0                    = outIdx;
+                   auto inIdx1                    = outIdx;
+                   cuda::GetElement(inIdx0, axis) = inAxis0;
+                   cuda::GetElement(inIdx1, axis) = inAxis1;
+                   for (int c = 0; c < numChannels; c++)
+                   {
+                       const float a                          = inTensorCpu.get(sampleIdx, inIdx0, c);
+                       const float b                          = inTensorCpu.get(sampleIdx, inIdx1, c);
+                       const float tmp                        = b - a;
+                       outTensorCpu.get(sampleIdx, outIdx, c) = cuda::SaturateCast<OutBT>(std::fmaf(tmp, q, a));
+                   }
+               });
+    }
+}
+
+template<typename OutBT, typename InBT, typename FilterT, int kSpatialNDim>
+void RunFilter(int axis, CpuSample<OutBT, kSpatialNDim> &outTensorCpu, CpuSample<InBT, kSpatialNDim> &inTensorCpu,
+               const FilterT &filter)
+{
+    const int   numSamples    = inTensorCpu.numSamples();
+    const int   numChannels   = inTensorCpu.numChannels();
+    const auto  inShape       = inTensorCpu.shape();
+    const auto  outShape      = outTensorCpu.shape();
+    const int   inSize        = cuda::GetElement(inShape, axis);
+    const int   outSize       = cuda::GetElement(outShape, axis);
+    const int   filterSupport = filter.support();
+    const float filterStep    = filter.scale();
+    const float axisScale     = static_cast<float>(inSize) / outSize;
+    const float axisOrigin    = 0.5f * axisScale - 0.5f - filter.anchor();
+
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        ForAll(outShape,
+               [&](const cuda::MakeType<int, kSpatialNDim> outIdx)
+               {
+                   const float inAxis0f = cuda::GetElement(outIdx, axis) * axisScale + axisOrigin;
+                   int         inAxis0  = std::ceil(inAxis0f);
+                   const float fStart   = (inAxis0 - inAxis0f) * filterStep;
+                   for (int c = 0; c < numChannels; c++)
+                   {
+                       float tmp  = 0;
+                       float norm = 0;
+                       for (int k = 0; k < filterSupport; k++)
+                       {
+                           int inAxis                    = inAxis0 + k;
+                           inAxis                        = inAxis < 0 ? 0 : (inAxis > inSize - 1 ? inSize - 1 : inAxis);
+                           auto inIdx                    = outIdx;
+                           cuda::GetElement(inIdx, axis) = inAxis;
+                           const InBT inVal              = inTensorCpu.get(sampleIdx, inIdx, c);
+                           float      coeff              = filter(fStart + k * filterStep);
+                           tmp                           = std::fmaf(inVal, coeff, tmp);
+                           norm += coeff;
+                       }
+                       outTensorCpu.get(sampleIdx, outIdx, c) = cuda::SaturateCast<OutBT>(tmp / norm);
+                   }
+               });
+    }
+}
+
+template<typename OutBT, typename InBT, int kSpatialNDim>
+void RunFilter(int axis, CpuSample<OutBT, kSpatialNDim> &outTensorCpu, CpuSample<InBT, kSpatialNDim> &inTensorCpu,
+               const NVCVInterpolationType interpolation, bool antialias)
+{
+    const auto  inShape  = inTensorCpu.shape();
+    const auto  outShape = outTensorCpu.shape();
+    const float inSize   = cuda::GetElement(inShape, axis);
+    const float outSize  = cuda::GetElement(outShape, axis);
+    switch (interpolation)
+    {
+    case NVCV_INTERP_LINEAR:
+    {
+        float radius  = antialias ? inSize / outSize : 1;
+        float support = std::max(1.0f, 2 * radius);
+        RunFilter(axis, outTensorCpu, inTensorCpu, Filter<FilterTriangular>{support});
+    }
+    break;
+    case NVCV_INTERP_CUBIC:
+    {
+        float radius  = antialias ? (2 * inSize / outSize) : 2;
+        float support = std::max(4.0f, 2 * radius);
+        RunFilter(axis, outTensorCpu, inTensorCpu, Filter<FilterCubic>{support});
+    }
+    break;
+    case NVCV_INTERP_GAUSSIAN:
+    {
+        float radius  = antialias ? inSize / outSize : 1;
+        float support = std::max(1.0f, 2 * radius);
+        RunFilter(axis, outTensorCpu, inTensorCpu, Filter<FilterGaussian>{support});
+    }
+    break;
+    case NVCV_INTERP_LANCZOS:
+    {
+        float radius  = antialias ? (3 * inSize / outSize) : 3;
+        float support = std::max(6.0f, 2 * radius);
+        RunFilter(axis, outTensorCpu, inTensorCpu, Filter<FilterLanczos>{support});
+    }
+    break;
+    default:
+        FAIL() << "Unsupported filter";
+    }
+}
+
+template<typename OutBT, typename InBT, int kSpatialNDim>
+void RunPass(int axis, CpuSample<OutBT, kSpatialNDim> &outTensorCpu, CpuSample<InBT, kSpatialNDim> &inTensorCpu,
+             const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation, bool antialias)
+
+{
+    const auto inShape       = inTensorCpu.shape();
+    const auto outShape      = outTensorCpu.shape();
+    const int  inSize        = cuda::GetElement(inShape, axis);
+    const int  outSize       = cuda::GetElement(outShape, axis);
+    const bool isScalingDown = outSize < inSize;
+    antialias &= isScalingDown;
+    const auto interpolation = isScalingDown ? minInterpolation : magInterpolation;
+    switch (interpolation)
+    {
+    case NVCV_INTERP_NEAREST:
+        RunNN(axis, outTensorCpu, inTensorCpu);
+        break;
+    case NVCV_INTERP_LINEAR:
+    {
+        if (antialias)
+        {
+            RunFilter(axis, outTensorCpu, inTensorCpu, interpolation, antialias);
+        }
+        else
+        {
+            RunLinear(axis, outTensorCpu, inTensorCpu);
+        }
+    }
+    break;
+    default:
+        RunFilter(axis, outTensorCpu, inTensorCpu, interpolation, antialias);
+        break;
+    }
+}
+
+template<typename OutBT, typename InBT>
+void Resize(CpuSample<OutBT, 2> &refTensorCpu, CpuSample<InBT, 2> &inTensorCpu,
+            const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation, bool antialias)
+{
+    int        numSamples         = inTensorCpu.numSamples();
+    int        numChannels        = inTensorCpu.numChannels();
+    const int2 inShape            = inTensorCpu.shape();
+    const int2 outShape           = refTensorCpu.shape();
+    const int2 interShape         = {outShape.x, inShape.y};
+    auto       intermediateTensor = GetIntermediate(numSamples, interShape, numChannels);
+    RunPass(0, intermediateTensor, inTensorCpu, minInterpolation, magInterpolation, antialias);
+    RunPass(1, refTensorCpu, intermediateTensor, minInterpolation, magInterpolation, antialias);
+}
+
+template<typename OutBT, typename InBT>
+void Resize(CpuSample<OutBT, 3> &refTensorCpu, CpuSample<InBT, 3> &inTensorCpu,
+            const NVCVInterpolationType minInterpolation, const NVCVInterpolationType magInterpolation, bool antialias)
+{
+    int        numSamples          = inTensorCpu.numSamples();
+    int        numChannels         = inTensorCpu.numChannels();
+    const int3 inShape             = inTensorCpu.shape();
+    const int3 outShape            = refTensorCpu.shape();
+    const int3 interShape0         = {outShape.x, inShape.y, inShape.z};
+    const int3 interShape1         = {outShape.x, outShape.y, inShape.z};
+    auto       intermediateTensor0 = GetIntermediate(numSamples, interShape0, numChannels);
+    RunPass(0, intermediateTensor0, inTensorCpu, minInterpolation, magInterpolation, antialias);
+    auto intermediateTensor1 = GetIntermediate(numSamples, interShape1, numChannels);
+    RunPass(1, intermediateTensor1, intermediateTensor0, minInterpolation, magInterpolation, antialias);
+    RunPass(2, refTensorCpu, intermediateTensor1, minInterpolation, magInterpolation, antialias);
+}
+
+template<typename InBT, typename BT, int kSpatialNDim>
+void Compare(CpuSample<BT, kSpatialNDim> &tensor, CpuSample<BT, kSpatialNDim> &refTensor, bool antialias)
+{
+    int        numSamples  = tensor.numSamples();
+    int        numChannels = tensor.numChannels();
+    const auto shape       = tensor.shape();
+    ASSERT_EQ(numSamples, refTensor.numSamples());
+    ASSERT_EQ(numChannels, refTensor.numChannels());
+    ASSERT_EQ(shape, refTensor.shape());
+    double  err = 0;
+    int64_t vol = 0;
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        ForAll(shape,
+               [&](const cuda::MakeType<int, kSpatialNDim> idx)
+               {
+                   for (int c = 0; c < numChannels; c++)
+                   {
+                       const BT val    = tensor.get(sampleIdx, idx, c);
+                       const BT refVal = refTensor.get(sampleIdx, idx, c);
+                       err += abs(val - refVal);
+                       vol += 1;
+
+                       if (std::is_integral_v<BT>) // uchar -> uchar, short -> short, ushort -> ushort
+                       {
+                           ASSERT_NEAR(val, refVal, (std::is_same_v<BT, uchar> ? 1 : 10)); // uchar : short, ushort
+                       }
+                       else // output type is float
+                       {
+                           if (!std::is_integral_v<InBT>) // float -> float
+                           {
+                               ASSERT_NEAR(val, refVal, 1e-4);
+                           }
+                           else // [uchar, short, ushort] -> float
+                           {
+                               ASSERT_NEAR(val, refVal, (std::is_same_v<BT, uchar> ? 0.1 : 6));
+                           }
+                       }
+                   }
+               });
+    }
+    double mean_err = err / vol;
+    ASSERT_LE(mean_err, antialias ? 0.1 : 0.4);
+}
+} // namespace baseline
+
+inline void GetMaxShape(HQResizeTensorShapeI &ret, const HQResizeTensorShapeI &other)
+{
+    ASSERT_EQ(ret.ndim, other.ndim);
+    ret.numChannels = std::max(ret.numChannels, other.numChannels);
+    for (int d = 0; d < ret.ndim; d++)
+    {
+        ret.extent[d] = std::max(ret.extent[d], other.extent[d]);
+    }
+}
+
+inline void GetMaxShape(HQResizeTensorShapeI &ret, const HQResizeTensorShapeI *shapes, int numSamples)
+{
+    if (numSamples > 0)
+    {
+        ret = shapes[0];
+        for (int i = 1; i < numSamples; i++)
+        {
+            GetMaxShape(ret, shapes[i]);
+        }
+    }
+}
+
+template<typename BT>
+struct TypeAsFormatImpl
+{
+};
+
+template<>
+struct TypeAsFormatImpl<uchar>
+{
+    static constexpr NVCVDataType value = NVCV_DATA_TYPE_U8;
+};
+
+template<>
+struct TypeAsFormatImpl<short>
+{
+    static constexpr NVCVDataType value = NVCV_DATA_TYPE_S16;
+};
+
+template<>
+struct TypeAsFormatImpl<ushort>
+{
+    static constexpr NVCVDataType value = NVCV_DATA_TYPE_U16;
+};
+
+template<>
+struct TypeAsFormatImpl<float>
+{
+    static constexpr NVCVDataType value = NVCV_DATA_TYPE_F32;
+};
+
+template<typename BT>
+nvcv::DataType TypeAsFormat()
+{
+    return nvcv::DataType{TypeAsFormatImpl<BT>::value};
+}
+
+template<typename... Extents>
+nvcv::Tensor CreateTensorHelper(nvcv::DataType dtype, const char *layoutStr, int numSamples, Extents... extents)
+{
+    nvcv::TensorLayout layout{layoutStr};
+    if (numSamples == 1)
+    {
+        nvcv::TensorShape shape{{extents...}, layout.last(sizeof...(extents))};
+        return nvcv::Tensor{shape, dtype};
+    }
+    else
+    {
+        nvcv::TensorShape shape{
+            {numSamples, extents...},
+            layout
+        };
+        return nvcv::Tensor{shape, dtype};
+    }
+}
+
+#define NVCV_SHAPE2D(h, w) (int2{w, h})
+#define NVCV_TEST_ROW(NumSamples, InShape, OutShape, NumChannels, InT, OutT, Interpolation)                          \
+    ttype::Types<ttype::Value<NumSamples>, ttype::Value<InShape>, ttype::Value<OutShape>, ttype::Value<NumChannels>, \
+                 InT, OutT, ttype::Value<Interpolation>>
+
+NVCV_TYPED_TEST_SUITE(
+    OpHQResizeTensor2D,
+    // [uchar, ushort, short, float] x [same, float] x [1, 2, 3, 4, more channels]
+    // the input and output shapes: [x, y] -> [scale_down, scale_up]
+    // interpolation methods: [nn, linear, gaussian, cubic, lanczos]
+    ttype::Types<
+        NVCV_TEST_ROW(1, NVCV_SHAPE2D(769, 211), NVCV_SHAPE2D(40, 40), 1, uchar, uchar, NVCV_INTERP_NEAREST),
+        NVCV_TEST_ROW(2, NVCV_SHAPE2D(1024, 101), NVCV_SHAPE2D(105, 512), 1, uchar, float, NVCV_INTERP_LINEAR),
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(31, 244), NVCV_SHAPE2D(311, 122), 2, uchar, uchar, NVCV_INTERP_CUBIC),
+        NVCV_TEST_ROW(4, NVCV_SHAPE2D(41, 41), NVCV_SHAPE2D(244, 244), 2, uchar, float, NVCV_INTERP_GAUSSIAN),
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(769, 211), NVCV_SHAPE2D(40, 40), 3, uchar, uchar, NVCV_INTERP_LANCZOS),
+        NVCV_TEST_ROW(4, NVCV_SHAPE2D(1024, 101), NVCV_SHAPE2D(105, 512), 3, uchar, float, NVCV_INTERP_LANCZOS),
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(31, 244), NVCV_SHAPE2D(311, 122), 4, uchar, uchar, NVCV_INTERP_GAUSSIAN),
+        NVCV_TEST_ROW(4, NVCV_SHAPE2D(41, 41), NVCV_SHAPE2D(244, 244), 4, uchar, float, NVCV_INTERP_CUBIC),
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(31, 244), NVCV_SHAPE2D(311, 122), 5, uchar, uchar, NVCV_INTERP_LINEAR),
+        NVCV_TEST_ROW(4, NVCV_SHAPE2D(41, 41), NVCV_SHAPE2D(244, 244), 8, uchar, float, NVCV_INTERP_LINEAR),
+
+        NVCV_TEST_ROW(1, NVCV_SHAPE2D(769, 211), NVCV_SHAPE2D(40, 40), 1, ushort, ushort, NVCV_INTERP_LINEAR),
+        NVCV_TEST_ROW(2, NVCV_SHAPE2D(1024, 101), NVCV_SHAPE2D(105, 512), 1, short, float, NVCV_INTERP_LINEAR),
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(31, 244), NVCV_SHAPE2D(311, 122), 2, short, short, NVCV_INTERP_GAUSSIAN),
+        NVCV_TEST_ROW(4, NVCV_SHAPE2D(41, 41), NVCV_SHAPE2D(244, 244), 2, ushort, float, NVCV_INTERP_CUBIC),
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(769, 211), NVCV_SHAPE2D(40, 40), 3, ushort, ushort, NVCV_INTERP_GAUSSIAN),
+        NVCV_TEST_ROW(4, NVCV_SHAPE2D(1024, 101), NVCV_SHAPE2D(105, 512), 3, short, float, NVCV_INTERP_GAUSSIAN),
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(31, 244), NVCV_SHAPE2D(311, 122), 4, ushort, ushort, NVCV_INTERP_LINEAR),
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(31, 244), NVCV_SHAPE2D(311, 122), 7, ushort, float, NVCV_INTERP_NEAREST),
+
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(769, 211), NVCV_SHAPE2D(40, 40), 1, float, float, NVCV_INTERP_NEAREST),
+        NVCV_TEST_ROW(4, NVCV_SHAPE2D(1024, 101), NVCV_SHAPE2D(105, 512), 2, float, float, NVCV_INTERP_LINEAR),
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(31, 244), NVCV_SHAPE2D(311, 122), 3, float, float, NVCV_INTERP_CUBIC),
+        NVCV_TEST_ROW(4, NVCV_SHAPE2D(41, 41), NVCV_SHAPE2D(244, 244), 4, float, float, NVCV_INTERP_GAUSSIAN),
+        NVCV_TEST_ROW(3, NVCV_SHAPE2D(769, 211), NVCV_SHAPE2D(40, 40), 7, float, float, NVCV_INTERP_LANCZOS)>);
+
+template<typename TypeParam>
+void TestTensor(bool antialias)
+{
+    const int  numSamples                     = ttype::GetValue<TypeParam, 0>;
+    const int2 inShape                        = ttype::GetValue<TypeParam, 1>;
+    const int2 outShape                       = ttype::GetValue<TypeParam, 2>;
+    const int  numChannels                    = ttype::GetValue<TypeParam, 3>;
+    using InBT                                = ttype::GetType<TypeParam, 4>;
+    using OutBT                               = ttype::GetType<TypeParam, 5>;
+    const nvcv::DataType        inDtype       = TypeAsFormat<InBT>();
+    const nvcv::DataType        outDtype      = TypeAsFormat<OutBT>();
+    const NVCVInterpolationType interpolation = ttype::GetValue<TypeParam, 6>;
+
+    nvcv::Tensor inTensor  = CreateTensorHelper(inDtype, "NHWC", numSamples, inShape.y, inShape.x, numChannels);
+    nvcv::Tensor outTensor = CreateTensorHelper(outDtype, "NHWC", numSamples, outShape.y, outShape.x, numChannels);
+
+    auto inData  = inTensor.exportData<nvcv::TensorDataStridedCuda>();
+    auto outData = outTensor.exportData<nvcv::TensorDataStridedCuda>();
+    ASSERT_TRUE(inData && outData);
+
+    auto inAccess  = nvcv::TensorDataAccessStridedImagePlanar::Create(*inData);
+    auto outAccess = nvcv::TensorDataAccessStridedImagePlanar::Create(*outData);
+    ASSERT_TRUE(inAccess && outAccess);
+    long3 inStrides{inAccess->colStride(), inAccess->rowStride(),
+                    inAccess->sampleStride() == 0 ? inAccess->rowStride() * inShape.y : inAccess->sampleStride()};
+    long3 outStrides{outAccess->colStride(), outAccess->rowStride(),
+                     outAccess->sampleStride() == 0 ? outAccess->rowStride() * outShape.y : outAccess->sampleStride()};
+
+    ASSERT_EQ(inAccess->numSamples(), numSamples);
+    ASSERT_EQ(inAccess->numChannels(), numChannels);
+    ASSERT_EQ(outAccess->numChannels(), numChannels);
+    ASSERT_EQ(outAccess->numSamples(), numSamples);
+
+    baseline::CpuSample<InBT, 2>  inTensorCpu(inStrides.z * numSamples, inStrides, numSamples, inShape, numChannels);
+    baseline::CpuSample<OutBT, 2> outTensorCpu(outStrides.z * numSamples, outStrides, numSamples, outShape,
+                                               numChannels);
+    baseline::CpuSample<OutBT, 2> refTensorCpu(outStrides.z * numSamples, outStrides, numSamples, outShape,
+                                               numChannels);
+
+    uniform_distribution<InBT> rand(InBT{0}, std::is_integral_v<InBT> ? cuda::TypeTraits<InBT>::max : InBT{1});
+    std::mt19937_64            rng(12345);
+
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        for (int y = 0; y < inShape.y; y++)
+        {
+            for (int x = 0; x < inShape.x; x++)
+            {
+                for (int c = 0; c < numChannels; c++)
+                {
+                    inTensorCpu.get(sampleIdx, int2{x, y}, c) = rand(rng);
+                }
+            }
+        }
+    }
+
+    cvcuda::HQResize        op;
+    cudaStream_t            stream;
+    cvcuda::UniqueWorkspace ws;
+    {
+        HQResizeTensorShapeI inShapeDesc{
+            {inShape.y, inShape.x},
+            2,
+            numChannels
+        };
+        HQResizeTensorShapeI outShapeDesc{
+            {outShape.y, outShape.x},
+            2,
+            numChannels
+        };
+        ASSERT_NO_THROW(ws = cvcuda::AllocateWorkspace(op.getWorkspaceRequirements(
+                            numSamples, inShapeDesc, outShapeDesc, interpolation, interpolation, antialias)));
+    }
+    ASSERT_EQ(cudaSuccess, cudaStreamCreate(&stream));
+    ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(inData->basePtr(), inTensorCpu.data(), inStrides.z * numSamples,
+                                           cudaMemcpyHostToDevice, stream));
+    ASSERT_NO_THROW(op(stream, ws.get(), inTensor, outTensor, interpolation, interpolation, antialias));
+    ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(outTensorCpu.data(), outData->basePtr(), outStrides.z * numSamples,
+                                           cudaMemcpyDeviceToHost, stream));
+    baseline::Resize(refTensorCpu, inTensorCpu, interpolation, interpolation, antialias);
+    ASSERT_EQ(cudaSuccess, cudaStreamSynchronize(stream));
+    baseline::Compare<InBT>(outTensorCpu, refTensorCpu, antialias);
+    ASSERT_EQ(cudaSuccess, cudaStreamDestroy(stream));
+}
+
+TYPED_TEST(OpHQResizeTensor2D, correct_output_no_antialias)
+{
+    TestTensor<TypeParam>(false);
+}
+
+TYPED_TEST(OpHQResizeTensor2D, correct_output_with_antialias)
+{
+    TestTensor<TypeParam>(true);
+}
+
+#define NVCV_SHAPE3D(d, h, w) (int3{w, h, d})
+NVCV_TYPED_TEST_SUITE(
+    OpHQResizeTensor3D,
+    ttype::Types<
+        NVCV_TEST_ROW(1, NVCV_SHAPE3D(244, 244, 244), NVCV_SHAPE3D(40, 40, 40), 1, uchar, uchar, NVCV_INTERP_NEAREST),
+        NVCV_TEST_ROW(2, NVCV_SHAPE3D(40, 40, 40), NVCV_SHAPE3D(244, 244, 244), 2, uchar, float, NVCV_INTERP_GAUSSIAN),
+        NVCV_TEST_ROW(3, NVCV_SHAPE3D(100, 100, 100), NVCV_SHAPE3D(50, 100, 100), 3, ushort, ushort, NVCV_INTERP_CUBIC),
+        NVCV_TEST_ROW(4, NVCV_SHAPE3D(100, 100, 100), NVCV_SHAPE3D(100, 50, 100), 4, ushort, float, NVCV_INTERP_LINEAR),
+        NVCV_TEST_ROW(3, NVCV_SHAPE3D(100, 100, 100), NVCV_SHAPE3D(100, 100, 50), 3, float, float, NVCV_INTERP_CUBIC),
+        NVCV_TEST_ROW(4, NVCV_SHAPE3D(40, 40, 40), NVCV_SHAPE3D(100, 40, 40), 5, uchar, float, NVCV_INTERP_LANCZOS),
+        NVCV_TEST_ROW(7, NVCV_SHAPE3D(40, 40, 40), NVCV_SHAPE3D(50, 150, 100), 3, uchar, uchar, NVCV_INTERP_CUBIC)>);
+
+TYPED_TEST(OpHQResizeTensor3D, correct_output_with_antialias)
+{
+    const int  numSamples                     = ttype::GetValue<TypeParam, 0>;
+    const int3 inShape                        = ttype::GetValue<TypeParam, 1>;
+    const int3 outShape                       = ttype::GetValue<TypeParam, 2>;
+    const int  numChannels                    = ttype::GetValue<TypeParam, 3>;
+    using InBT                                = ttype::GetType<TypeParam, 4>;
+    using OutBT                               = ttype::GetType<TypeParam, 5>;
+    const nvcv::DataType        inDtype       = TypeAsFormat<InBT>();
+    const nvcv::DataType        outDtype      = TypeAsFormat<OutBT>();
+    const NVCVInterpolationType interpolation = ttype::GetValue<TypeParam, 6>;
+    constexpr bool              antialias     = true;
+
+    nvcv::Tensor inTensor
+        = CreateTensorHelper(inDtype, "NDHWC", numSamples, inShape.z, inShape.y, inShape.x, numChannels);
+    nvcv::Tensor outTensor
+        = CreateTensorHelper(outDtype, "NDHWC", numSamples, outShape.z, outShape.y, outShape.x, numChannels);
+
+    auto inData  = inTensor.exportData<nvcv::TensorDataStridedCuda>();
+    auto outData = outTensor.exportData<nvcv::TensorDataStridedCuda>();
+    ASSERT_TRUE(inData && outData);
+
+    auto inAccess  = nvcv::TensorDataAccessStridedImagePlanar::Create(*inData);
+    auto outAccess = nvcv::TensorDataAccessStridedImagePlanar::Create(*outData);
+    ASSERT_TRUE(inAccess && outAccess);
+    long4 inStrides{inAccess->colStride(), inAccess->rowStride(), inAccess->depthStride(),
+                    inAccess->sampleStride() == 0 ? inAccess->depthStride() * inShape.z : inAccess->sampleStride()};
+    long4 outStrides{
+        outAccess->colStride(), outAccess->rowStride(), outAccess->depthStride(),
+        outAccess->sampleStride() == 0 ? outAccess->depthStride() * outShape.z : outAccess->sampleStride()};
+
+    ASSERT_EQ(inAccess->numSamples(), numSamples);
+    ASSERT_EQ(inAccess->numChannels(), numChannels);
+    ASSERT_EQ(outAccess->numChannels(), numChannels);
+    ASSERT_EQ(outAccess->numSamples(), numSamples);
+
+    baseline::CpuSample<InBT, 3>  inTensorCpu(inStrides.w * numSamples, inStrides, numSamples, inShape, numChannels);
+    baseline::CpuSample<OutBT, 3> outTensorCpu(outStrides.w * numSamples, outStrides, numSamples, outShape,
+                                               numChannels);
+    baseline::CpuSample<OutBT, 3> refTensorCpu(outStrides.w * numSamples, outStrides, numSamples, outShape,
+                                               numChannels);
+
+    uniform_distribution<InBT> rand(InBT{0}, std::is_integral_v<InBT> ? cuda::TypeTraits<InBT>::max : InBT{1});
+    std::mt19937_64            rng(12345);
+
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        for (int z = 0; z < inShape.z; z++)
+        {
+            for (int y = 0; y < inShape.y; y++)
+            {
+                for (int x = 0; x < inShape.x; x++)
+                {
+                    for (int c = 0; c < numChannels; c++)
+                    {
+                        inTensorCpu.get(sampleIdx, int3{x, y, z}, c) = rand(rng);
+                    }
+                }
+            }
+        }
+    }
+
+    cvcuda::HQResize        op;
+    cudaStream_t            stream;
+    cvcuda::UniqueWorkspace ws;
+    {
+        HQResizeTensorShapeI inShapeDesc{
+            {inShape.z, inShape.y, inShape.x},
+            3,
+            numChannels
+        };
+        HQResizeTensorShapeI outShapeDesc{
+            {outShape.z, outShape.y, outShape.x},
+            3,
+            numChannels
+        };
+        ASSERT_NO_THROW(ws = cvcuda::AllocateWorkspace(op.getWorkspaceRequirements(
+                            numSamples, inShapeDesc, outShapeDesc, interpolation, interpolation, antialias)));
+    }
+    ASSERT_EQ(cudaSuccess, cudaStreamCreate(&stream));
+    ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(inData->basePtr(), inTensorCpu.data(), inStrides.w * numSamples,
+                                           cudaMemcpyHostToDevice, stream));
+    ASSERT_NO_THROW(op(stream, ws.get(), inTensor, outTensor, interpolation, interpolation, antialias));
+    ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(outTensorCpu.data(), outData->basePtr(), outStrides.w * numSamples,
+                                           cudaMemcpyDeviceToHost, stream));
+    baseline::Resize(refTensorCpu, inTensorCpu, interpolation, interpolation, antialias);
+    ASSERT_EQ(cudaSuccess, cudaStreamSynchronize(stream));
+    baseline::Compare<InBT>(outTensorCpu, refTensorCpu, antialias);
+    ASSERT_EQ(cudaSuccess, cudaStreamDestroy(stream));
+}
+
+#define NVCV_TEST_ROW_TB(NumChannels, InT, OutT, Antialias, MinInterpolation, MagInterpolation)                 \
+    ttype::Types<ttype::Value<NumChannels>, InT, OutT, ttype::Value<Antialias>, ttype::Value<MinInterpolation>, \
+                 ttype::Value<MagInterpolation>>
+
+NVCV_TYPED_TEST_SUITE(OpHQResizeBatch,
+                      ttype::Types<NVCV_TEST_ROW_TB(1, uchar, float, false, NVCV_INTERP_LANCZOS, NVCV_INTERP_LANCZOS),
+                                   NVCV_TEST_ROW_TB(2, uchar, uchar, true, NVCV_INTERP_LANCZOS, NVCV_INTERP_CUBIC),
+                                   NVCV_TEST_ROW_TB(3, uchar, float, false, NVCV_INTERP_LINEAR, NVCV_INTERP_CUBIC),
+                                   NVCV_TEST_ROW_TB(4, uchar, uchar, true, NVCV_INTERP_LINEAR, NVCV_INTERP_LINEAR),
+                                   NVCV_TEST_ROW_TB(-1, uchar, uchar, false, NVCV_INTERP_CUBIC, NVCV_INTERP_NEAREST),
+                                   NVCV_TEST_ROW_TB(1, ushort, ushort, false, NVCV_INTERP_CUBIC, NVCV_INTERP_CUBIC),
+                                   NVCV_TEST_ROW_TB(2, short, float, false, NVCV_INTERP_LANCZOS, NVCV_INTERP_LINEAR),
+                                   NVCV_TEST_ROW_TB(3, float, float, true, NVCV_INTERP_LINEAR, NVCV_INTERP_GAUSSIAN),
+                                   NVCV_TEST_ROW_TB(-1, float, float, true, NVCV_INTERP_LINEAR, NVCV_INTERP_NEAREST)>);
+
+TYPED_TEST(OpHQResizeBatch, tensor_batch_2d_correct_output)
+{
+    const int numChannels                        = ttype::GetValue<TypeParam, 0>;
+    using InBT                                   = ttype::GetType<TypeParam, 1>;
+    using OutBT                                  = ttype::GetType<TypeParam, 2>;
+    const nvcv::DataType        inDtype          = TypeAsFormat<InBT>();
+    const nvcv::DataType        outDtype         = TypeAsFormat<OutBT>();
+    const bool                  antialias        = ttype::GetValue<TypeParam, 3>;
+    const NVCVInterpolationType minInterpolation = ttype::GetValue<TypeParam, 4>;
+    const NVCVInterpolationType magInterpolation = ttype::GetValue<TypeParam, 4>;
+
+    constexpr int numSamples              = 5;
+    const int     varChannels[numSamples] = {4, 1, 7, 3, 5};
+
+    std::vector<HQResizeTensorShapeI> inShapes = {
+        {{728, 1024}, 2, numChannels > 0 ? numChannels : varChannels[0]},
+        { {512, 512}, 2, numChannels > 0 ? numChannels : varChannels[1]},
+        { {128, 256}, 2, numChannels > 0 ? numChannels : varChannels[2]},
+        { {256, 128}, 2, numChannels > 0 ? numChannels : varChannels[3]},
+        {   {40, 40}, 2, numChannels > 0 ? numChannels : varChannels[4]}
+    };
+    std::vector<HQResizeTensorShapeI> outShapes = {
+        {{245, 245}, 2, inShapes[0].numChannels},
+        { {250, 51}, 2, inShapes[1].numChannels},
+        {{243, 128}, 2, inShapes[2].numChannels},
+        {{128, 256}, 2, inShapes[3].numChannels},
+        {{512, 512}, 2, inShapes[4].numChannels}
+    };
+
+    ASSERT_EQ(numSamples, inShapes.size());
+    ASSERT_EQ(numSamples, outShapes.size());
+
+    nvcv::TensorBatch inTensors(numSamples);
+    nvcv::TensorBatch outTensors(numSamples);
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        if (numChannels == 1)
+        {
+            inTensors.pushBack(
+                CreateTensorHelper(inDtype, "HW", 1, inShapes[sampleIdx].extent[0], inShapes[sampleIdx].extent[1]));
+            outTensors.pushBack(
+                CreateTensorHelper(outDtype, "HW", 1, outShapes[sampleIdx].extent[0], outShapes[sampleIdx].extent[1]));
+        }
+        else
+        {
+            inTensors.pushBack(CreateTensorHelper(inDtype, "HWC", 1, inShapes[sampleIdx].extent[0],
+                                                  inShapes[sampleIdx].extent[1], inShapes[sampleIdx].numChannels));
+            outTensors.pushBack(CreateTensorHelper(outDtype, "HWC", 1, outShapes[sampleIdx].extent[0],
+                                                   outShapes[sampleIdx].extent[1], outShapes[sampleIdx].numChannels));
+        }
+    }
+
+    cudaStream_t stream;
+    ASSERT_EQ(cudaSuccess, cudaStreamCreate(&stream));
+
+    uniform_distribution<InBT> rand(InBT{0}, std::is_integral_v<InBT> ? cuda::TypeTraits<InBT>::max : InBT{1});
+    std::mt19937_64            rng(12345);
+
+    std::vector<baseline::CpuSample<InBT, 2>>  inBatchCpu;
+    std::vector<baseline::CpuSample<OutBT, 2>> outBatchCpu;
+    std::vector<baseline::CpuSample<OutBT, 2>> refBatchCpu;
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        SCOPED_TRACE(sampleIdx);
+        auto inData  = inTensors[sampleIdx].exportData<nvcv::TensorDataStridedCuda>();
+        auto outData = outTensors[sampleIdx].exportData<nvcv::TensorDataStridedCuda>();
+        ASSERT_TRUE(inData && outData);
+
+        auto inAccess  = nvcv::TensorDataAccessStridedImagePlanar::Create(*inData);
+        auto outAccess = nvcv::TensorDataAccessStridedImagePlanar::Create(*outData);
+        ASSERT_TRUE(inAccess && outAccess);
+
+        long3 inStrides{inAccess->colStride(), inAccess->rowStride(),
+                        inAccess->sampleStride() == 0 ? inAccess->rowStride() * inShapes[sampleIdx].extent[0]
+                                                      : inAccess->sampleStride()};
+        long3 outStrides{outAccess->colStride(), outAccess->rowStride(),
+                         outAccess->sampleStride() == 0 ? outAccess->rowStride() * outShapes[sampleIdx].extent[0]
+                                                        : outAccess->sampleStride()};
+
+        ASSERT_EQ(inAccess->numSamples(), 1);
+        ASSERT_EQ(outAccess->numSamples(), 1);
+        ASSERT_EQ(inAccess->numChannels(), inShapes[sampleIdx].numChannels);
+        ASSERT_EQ(outAccess->numChannels(), outShapes[sampleIdx].numChannels);
+
+        int2 inShape{inShapes[sampleIdx].extent[1], inShapes[sampleIdx].extent[0]};
+        int2 outShape{outShapes[sampleIdx].extent[1], outShapes[sampleIdx].extent[0]};
+        inBatchCpu.push_back(baseline::CpuSample<InBT, 2>{inStrides.z, inStrides, 1, inShape, inAccess->numChannels()});
+        outBatchCpu.push_back(
+            baseline::CpuSample<OutBT, 2>{outStrides.z, outStrides, 1, outShape, outAccess->numChannels()});
+        refBatchCpu.push_back(
+            baseline::CpuSample<OutBT, 2>{outStrides.z, outStrides, 1, outShape, outAccess->numChannels()});
+
+        auto &inTensorCpu = inBatchCpu[sampleIdx];
+        for (int y = 0; y < inShape.y; y++)
+        {
+            for (int x = 0; x < inShape.x; x++)
+            {
+                for (int c = 0; c < inShapes[sampleIdx].numChannels; c++)
+                {
+                    inTensorCpu.get(0, int2{x, y}, c) = rand(rng);
+                }
+            }
+        }
+        ASSERT_EQ(cudaSuccess,
+                  cudaMemcpyAsync(inData->basePtr(), inTensorCpu.data(), inStrides.z, cudaMemcpyHostToDevice, stream));
+    }
+
+    cvcuda::HQResize        op;
+    cvcuda::UniqueWorkspace ws;
+
+    {
+        HQResizeTensorShapesI inShapeDesc{inShapes.data(), numSamples, 2, numChannels};
+        HQResizeTensorShapesI outShapeDesc{outShapes.data(), numSamples, 2, numChannels};
+        ASSERT_NO_THROW(ws = cvcuda::AllocateWorkspace(op.getWorkspaceRequirements(
+                            numSamples, inShapeDesc, outShapeDesc, minInterpolation, magInterpolation, antialias)));
+    }
+    ASSERT_NO_THROW(op(stream, ws.get(), inTensors, outTensors, minInterpolation, magInterpolation, antialias));
+
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        SCOPED_TRACE(sampleIdx);
+        auto outData = outTensors[sampleIdx].exportData<nvcv::TensorDataStridedCuda>();
+        ASSERT_TRUE(outData);
+        ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(outBatchCpu[sampleIdx].data(), outData->basePtr(),
+                                               outBatchCpu[sampleIdx].strides().z, cudaMemcpyDeviceToHost, stream));
+    }
+
+    ASSERT_EQ(cudaSuccess, cudaStreamSynchronize(stream));
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        SCOPED_TRACE(sampleIdx);
+        baseline::Resize(refBatchCpu[sampleIdx], inBatchCpu[sampleIdx], minInterpolation, magInterpolation, antialias);
+        baseline::Compare<InBT>(outBatchCpu[sampleIdx], refBatchCpu[sampleIdx], antialias);
+    }
+    ASSERT_EQ(cudaSuccess, cudaStreamDestroy(stream));
+}
+
+TYPED_TEST(OpHQResizeBatch, tensor_batch_3d_correct_output)
+{
+    const int numChannels                        = ttype::GetValue<TypeParam, 0>;
+    using InBT                                   = ttype::GetType<TypeParam, 1>;
+    using OutBT                                  = ttype::GetType<TypeParam, 2>;
+    const nvcv::DataType        inDtype          = TypeAsFormat<InBT>();
+    const nvcv::DataType        outDtype         = TypeAsFormat<OutBT>();
+    const bool                  antialias        = ttype::GetValue<TypeParam, 3>;
+    const NVCVInterpolationType minInterpolation = ttype::GetValue<TypeParam, 4>;
+    const NVCVInterpolationType magInterpolation = ttype::GetValue<TypeParam, 4>;
+
+    constexpr int numSamples              = 5;
+    const int     varChannels[numSamples] = {6, 2, 3, 4, 1};
+
+    std::vector<HQResizeTensorShapeI> inShapes = {
+        {{128, 128, 128}, 3, numChannels > 0 ? numChannels : varChannels[0]},
+        {  {512, 40, 40}, 3, numChannels > 0 ? numChannels : varChannels[1]},
+        {  {40, 512, 40}, 3, numChannels > 0 ? numChannels : varChannels[2]},
+        {  {40, 40, 512}, 3, numChannels > 0 ? numChannels : varChannels[3]},
+        {   {40, 40, 40}, 3, numChannels > 0 ? numChannels : varChannels[4]}
+    };
+    std::vector<HQResizeTensorShapeI> outShapes = {
+        {   {45, 64, 50}, 3, inShapes[0].numChannels},
+        {   {40, 40, 40}, 3, inShapes[1].numChannels},
+        {   {40, 40, 40}, 3, inShapes[2].numChannels},
+        {   {40, 40, 40}, 3, inShapes[3].numChannels},
+        {{128, 128, 128}, 3, inShapes[4].numChannels}
+    };
+
+    ASSERT_EQ(numSamples, inShapes.size());
+    ASSERT_EQ(numSamples, outShapes.size());
+
+    nvcv::TensorBatch inTensors(numSamples);
+    nvcv::TensorBatch outTensors(numSamples);
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        if (numChannels == 1)
+        {
+            inTensors.pushBack(CreateTensorHelper(inDtype, "DHW", 1, inShapes[sampleIdx].extent[0],
+                                                  inShapes[sampleIdx].extent[1], inShapes[sampleIdx].extent[2]));
+            outTensors.pushBack(CreateTensorHelper(outDtype, "DHW", 1, outShapes[sampleIdx].extent[0],
+                                                   outShapes[sampleIdx].extent[1], outShapes[sampleIdx].extent[2]));
+        }
+        else
+        {
+            inTensors.pushBack(CreateTensorHelper(inDtype, "DHWC", 1, inShapes[sampleIdx].extent[0],
+                                                  inShapes[sampleIdx].extent[1], inShapes[sampleIdx].extent[2],
+                                                  inShapes[sampleIdx].numChannels));
+            outTensors.pushBack(CreateTensorHelper(outDtype, "DHWC", 1, outShapes[sampleIdx].extent[0],
+                                                   outShapes[sampleIdx].extent[1], outShapes[sampleIdx].extent[2],
+                                                   outShapes[sampleIdx].numChannels));
+        }
+    }
+
+    cudaStream_t stream;
+    ASSERT_EQ(cudaSuccess, cudaStreamCreate(&stream));
+
+    uniform_distribution<InBT> rand(InBT{0}, std::is_integral_v<InBT> ? cuda::TypeTraits<InBT>::max : InBT{1});
+    std::mt19937_64            rng(12345);
+
+    std::vector<baseline::CpuSample<InBT, 3>>  inBatchCpu;
+    std::vector<baseline::CpuSample<OutBT, 3>> outBatchCpu;
+    std::vector<baseline::CpuSample<OutBT, 3>> refBatchCpu;
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        SCOPED_TRACE(sampleIdx);
+        auto inData  = inTensors[sampleIdx].exportData<nvcv::TensorDataStridedCuda>();
+        auto outData = outTensors[sampleIdx].exportData<nvcv::TensorDataStridedCuda>();
+        ASSERT_TRUE(inData && outData);
+
+        auto inAccess  = nvcv::TensorDataAccessStridedImagePlanar::Create(*inData);
+        auto outAccess = nvcv::TensorDataAccessStridedImagePlanar::Create(*outData);
+        ASSERT_TRUE(inAccess && outAccess);
+
+        long4 inStrides{inAccess->colStride(), inAccess->rowStride(), inAccess->depthStride(),
+                        inAccess->sampleStride() == 0 ? inAccess->depthStride() * inShapes[sampleIdx].extent[0]
+                                                      : inAccess->sampleStride()};
+        long4 outStrides{outAccess->colStride(), outAccess->rowStride(), outAccess->depthStride(),
+                         outAccess->sampleStride() == 0 ? outAccess->depthStride() * outShapes[sampleIdx].extent[0]
+                                                        : outAccess->sampleStride()};
+
+        ASSERT_EQ(inAccess->numSamples(), 1);
+        ASSERT_EQ(outAccess->numSamples(), 1);
+        ASSERT_EQ(inAccess->numChannels(), inShapes[sampleIdx].numChannels);
+        ASSERT_EQ(outAccess->numChannels(), outShapes[sampleIdx].numChannels);
+
+        int3 inShape{inShapes[sampleIdx].extent[2], inShapes[sampleIdx].extent[1], inShapes[sampleIdx].extent[0]};
+        int3 outShape{outShapes[sampleIdx].extent[2], outShapes[sampleIdx].extent[1], outShapes[sampleIdx].extent[0]};
+        inBatchCpu.push_back(baseline::CpuSample<InBT, 3>{inStrides.w, inStrides, 1, inShape, inAccess->numChannels()});
+        outBatchCpu.push_back(
+            baseline::CpuSample<OutBT, 3>{outStrides.w, outStrides, 1, outShape, outAccess->numChannels()});
+        refBatchCpu.push_back(
+            baseline::CpuSample<OutBT, 3>{outStrides.w, outStrides, 1, outShape, outAccess->numChannels()});
+
+        auto &inTensorCpu = inBatchCpu[sampleIdx];
+        for (int z = 0; z < inShape.z; z++)
+        {
+            for (int y = 0; y < inShape.y; y++)
+            {
+                for (int x = 0; x < inShape.x; x++)
+                {
+                    for (int c = 0; c < inShapes[sampleIdx].numChannels; c++)
+                    {
+                        inTensorCpu.get(0, int3{x, y, z}, c) = rand(rng);
+                    }
+                }
+            }
+        }
+        ASSERT_EQ(cudaSuccess,
+                  cudaMemcpyAsync(inData->basePtr(), inTensorCpu.data(), inStrides.w, cudaMemcpyHostToDevice, stream));
+    }
+
+    cvcuda::HQResize        op;
+    cvcuda::UniqueWorkspace ws;
+
+    {
+        HQResizeTensorShapesI inShapeDesc{inShapes.data(), numSamples, 3, numChannels};
+        HQResizeTensorShapesI outShapeDesc{outShapes.data(), numSamples, 3, numChannels};
+        ASSERT_NO_THROW(ws = cvcuda::AllocateWorkspace(op.getWorkspaceRequirements(
+                            numSamples, inShapeDesc, outShapeDesc, minInterpolation, magInterpolation, antialias)));
+    }
+    ASSERT_NO_THROW(op(stream, ws.get(), inTensors, outTensors, minInterpolation, magInterpolation, antialias));
+
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        SCOPED_TRACE(sampleIdx);
+        auto outData = outTensors[sampleIdx].exportData<nvcv::TensorDataStridedCuda>();
+        ASSERT_TRUE(outData);
+        ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(outBatchCpu[sampleIdx].data(), outData->basePtr(),
+                                               outBatchCpu[sampleIdx].strides().w, cudaMemcpyDeviceToHost, stream));
+    }
+
+    ASSERT_EQ(cudaSuccess, cudaStreamSynchronize(stream));
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        SCOPED_TRACE(sampleIdx);
+        baseline::Resize(refBatchCpu[sampleIdx], inBatchCpu[sampleIdx], minInterpolation, magInterpolation, antialias);
+        baseline::Compare<InBT>(outBatchCpu[sampleIdx], refBatchCpu[sampleIdx], antialias);
+    }
+    ASSERT_EQ(cudaSuccess, cudaStreamDestroy(stream));
+}
+
+#define NVCV_IMAGE_FORMAT_RGB16U \
+    NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, PL, UNSIGNED, XYZ1, ASSOCIATED, X16_Y16_Z16)
+#define NVCV_TEST_ROW_IB(NumChannels, InT, InFormat, OutT, OutFormat, Antialias, MinInterpolation, MagInterpolation) \
+    ttype::Types<ttype::Value<NumChannels>, InT, ttype::Value<InFormat>, OutT, ttype::Value<OutFormat>,              \
+                 ttype::Value<Antialias>, ttype::Value<MinInterpolation>, ttype::Value<MagInterpolation>>
+
+NVCV_TYPED_TEST_SUITE(
+    OpHQResizeImageBatch,
+    ttype::Types<NVCV_TEST_ROW_IB(1, uchar, NVCV_IMAGE_FORMAT_U8, uchar, NVCV_IMAGE_FORMAT_U8, false,
+                                  NVCV_INTERP_LINEAR, NVCV_INTERP_CUBIC),
+                 NVCV_TEST_ROW_IB(3, uchar3, NVCV_IMAGE_FORMAT_RGB8, float3, NVCV_IMAGE_FORMAT_RGBf32, true,
+                                  NVCV_INTERP_LANCZOS, NVCV_INTERP_LINEAR),
+                 NVCV_TEST_ROW_IB(3, ushort3, NVCV_IMAGE_FORMAT_RGB16U, float3, NVCV_IMAGE_FORMAT_RGBf32, true,
+                                  NVCV_INTERP_LANCZOS, NVCV_INTERP_LINEAR),
+                 NVCV_TEST_ROW_IB(4, uchar4, NVCV_IMAGE_FORMAT_RGBA8, uchar4, NVCV_IMAGE_FORMAT_RGBA8, true,
+                                  NVCV_INTERP_LINEAR, NVCV_INTERP_GAUSSIAN),
+                 NVCV_TEST_ROW_IB(4, float4, NVCV_IMAGE_FORMAT_RGBAf32, float4, NVCV_IMAGE_FORMAT_RGBAf32, false,
+                                  NVCV_INTERP_LINEAR, NVCV_INTERP_LINEAR)>);
+
+template<typename TypeParam>
+void TestImageBatch(int numSamples, std::vector<HQResizeTensorShapeI> &inShapes,
+                    std::vector<HQResizeTensorShapeI> &outShapes, cvcuda::UniqueWorkspace &ws,
+                    bool allocateWorkspace = true)
+{
+    const int numChannels = ttype::GetValue<TypeParam, 0>;
+    using InT             = ttype::GetType<TypeParam, 1>;
+    using InBT            = cuda::BaseType<InT>;
+    using OutT            = ttype::GetType<TypeParam, 3>;
+    using OutBT           = cuda::BaseType<OutT>;
+    const nvcv::ImageFormat     inImgFormat{ttype::GetValue<TypeParam, 2>};
+    const nvcv::ImageFormat     outImgFormat{ttype::GetValue<TypeParam, 4>};
+    const bool                  antialias        = ttype::GetValue<TypeParam, 5>;
+    const NVCVInterpolationType minInterpolation = ttype::GetValue<TypeParam, 6>;
+    const NVCVInterpolationType magInterpolation = ttype::GetValue<TypeParam, 7>;
+
+    ASSERT_GE(numChannels, 1);
+    ASSERT_LE(numChannels, 4);
+    ASSERT_EQ(sizeof(InT), inImgFormat.planePixelStrideBytes(0));
+    ASSERT_EQ(sizeof(OutT), outImgFormat.planePixelStrideBytes(0));
+
+    ASSERT_EQ(numSamples, inShapes.size());
+    ASSERT_EQ(numSamples, outShapes.size());
+
+    cudaStream_t stream;
+    ASSERT_EQ(cudaSuccess, cudaStreamCreate(&stream));
+
+    uniform_distribution<InBT> rand(InBT{0}, std::is_integral_v<InBT> ? cuda::TypeTraits<InBT>::max : InBT{1});
+    std::mt19937_64            rng(12345);
+
+    std::vector<nvcv::Image>                   imgSrc;
+    std::vector<nvcv::Image>                   imgDst;
+    std::vector<baseline::CpuSample<InBT, 2>>  inBatchCpu;
+    std::vector<baseline::CpuSample<OutBT, 2>> outBatchCpu;
+    std::vector<baseline::CpuSample<OutBT, 2>> refBatchCpu;
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        SCOPED_TRACE(sampleIdx);
+        nvcv::Size2D inImgShape{inShapes[sampleIdx].extent[1], inShapes[sampleIdx].extent[0]};
+        imgSrc.emplace_back(inImgShape, inImgFormat);
+        nvcv::Size2D outImgShape{outShapes[sampleIdx].extent[1], outShapes[sampleIdx].extent[0]};
+        imgDst.emplace_back(outImgShape, outImgFormat);
+
+        auto inData  = imgSrc[sampleIdx].exportData<nvcv::ImageDataStridedCuda>();
+        auto outData = imgDst[sampleIdx].exportData<nvcv::ImageDataStridedCuda>();
+        ASSERT_TRUE(inData && outData);
+
+        long3 inStrides{sizeof(InT), inData->plane(0).rowStride, inData->plane(0).rowStride * inData->plane(0).height};
+        long3 outStrides{sizeof(OutT), outData->plane(0).rowStride,
+                         outData->plane(0).rowStride * outData->plane(0).height};
+
+        inBatchCpu.push_back(baseline::CpuSample<InBT, 2>{
+            inStrides.z, inStrides, 1, int2{inImgShape.w, inImgShape.h},
+               numChannels
+        });
+        outBatchCpu.push_back(baseline::CpuSample<OutBT, 2>{
+            outStrides.z, outStrides, 1, int2{outImgShape.w, outImgShape.h},
+               numChannels
+        });
+        refBatchCpu.push_back(baseline::CpuSample<OutBT, 2>{
+            outStrides.z, outStrides, 1, int2{outImgShape.w, outImgShape.h},
+               numChannels
+        });
+
+        auto &inTensorCpu = inBatchCpu[sampleIdx];
+        for (int y = 0; y < inImgShape.h; y++)
+        {
+            for (int x = 0; x < inImgShape.w; x++)
+            {
+                for (int c = 0; c < numChannels; c++)
+                {
+                    inTensorCpu.get(0, int2{x, y}, c) = rand(rng);
+                }
+            }
+        }
+        ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(inData->plane(0).basePtr, inTensorCpu.data(), inStrides.z,
+                                               cudaMemcpyHostToDevice, stream));
+    }
+
+    nvcv::ImageBatchVarShape batchSrc(numSamples);
+    nvcv::ImageBatchVarShape batchDst(numSamples);
+    batchSrc.pushBack(imgSrc.begin(), imgSrc.end());
+    batchDst.pushBack(imgDst.begin(), imgDst.end());
+
+    cvcuda::HQResize op;
+    if (allocateWorkspace)
+    {
+        HQResizeTensorShapesI inShapeDesc{inShapes.data(), numSamples, 2, numChannels};
+        HQResizeTensorShapesI outShapeDesc{outShapes.data(), numSamples, 2, numChannels};
+        ASSERT_NO_THROW(ws = cvcuda::AllocateWorkspace(op.getWorkspaceRequirements(
+                            numSamples, inShapeDesc, outShapeDesc, minInterpolation, magInterpolation, antialias)));
+    }
+    ASSERT_NO_THROW(op(stream, ws.get(), batchSrc, batchDst, minInterpolation, magInterpolation, antialias));
+
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        SCOPED_TRACE(sampleIdx);
+        const auto outData = imgDst[sampleIdx].exportData<nvcv::ImageDataStridedCuda>();
+        ASSERT_TRUE(outData);
+        ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(outBatchCpu[sampleIdx].data(), outData->plane(0).basePtr,
+                                               outBatchCpu[sampleIdx].strides().z, cudaMemcpyDeviceToHost, stream));
+    }
+
+    ASSERT_EQ(cudaSuccess, cudaStreamSynchronize(stream));
+    for (int sampleIdx = 0; sampleIdx < numSamples; sampleIdx++)
+    {
+        SCOPED_TRACE(sampleIdx);
+        baseline::Resize(refBatchCpu[sampleIdx], inBatchCpu[sampleIdx], minInterpolation, magInterpolation, antialias);
+        baseline::Compare<InBT>(outBatchCpu[sampleIdx], refBatchCpu[sampleIdx], antialias);
+    }
+    ASSERT_EQ(cudaSuccess, cudaStreamDestroy(stream));
+}
+
+TYPED_TEST(OpHQResizeImageBatch, varbatch_2d_correct_output)
+{
+    const int                         numSamples = 4;
+    std::vector<HQResizeTensorShapeI> inShapes   = {{{256, 128}}, {{40, 40}}, {{728, 1024}}, {{128, 256}}};
+    std::vector<HQResizeTensorShapeI> outShapes  = {{{128, 256}}, {{512, 512}}, {{245, 245}}, {{243, 128}}};
+    cvcuda::UniqueWorkspace           ws;
+    TestImageBatch<TypeParam>(numSamples, inShapes, outShapes, ws);
+}
+
+TEST(OpHQResizeImageBatch, test_multi_run_single_workspace)
+{
+    using FirstRun  = typename NVCV_TEST_ROW_IB(1, uchar, NVCV_IMAGE_FORMAT_U8, uchar, NVCV_IMAGE_FORMAT_U8, false,
+                                                NVCV_INTERP_LINEAR, NVCV_INTERP_CUBIC);
+    using SecondRun = typename NVCV_TEST_ROW_IB(3, uchar3, NVCV_IMAGE_FORMAT_RGB8, float3, NVCV_IMAGE_FORMAT_RGBf32,
+                                                true, NVCV_INTERP_LANCZOS, NVCV_INTERP_LINEAR);
+
+    const int                         numSamples0 = 1;
+    std::vector<HQResizeTensorShapeI> inShapes0   = {
+          {{128, 128}, 2, 1}
+    };
+    std::vector<HQResizeTensorShapeI> outShapes0 = {
+        {{40, 50}, 2, 1}
+    };
+
+    const int                         numSamples1 = 3;
+    std::vector<HQResizeTensorShapeI> inShapes1   = {
+          {  {50, 40}, 2, 3},
+          {  {64, 64}, 2, 3},
+          {{128, 128}, 2, 3}
+    };
+    std::vector<HQResizeTensorShapeI> outShapes1 = {
+        {{128, 128}, 2, 3},
+        {{128, 128}, 2, 3},
+        {{128, 128}, 2, 3}
+    };
+
+    HQResizeTensorShapeI maxShape;
+    GetMaxShape(maxShape, inShapes0.data(), numSamples0);
+    GetMaxShape(maxShape, outShapes0.data(), numSamples0);
+    GetMaxShape(maxShape, inShapes1.data(), numSamples1);
+    GetMaxShape(maxShape, outShapes1.data(), numSamples1);
+
+    cvcuda::HQResize        op;
+    cvcuda::UniqueWorkspace ws;
+    ASSERT_NO_THROW(
+        ws = cvcuda::AllocateWorkspace(op.getWorkspaceRequirements(std::max(numSamples0, numSamples1), maxShape)));
+    TestImageBatch<FirstRun>(numSamples0, inShapes0, outShapes0, ws, false);
+    TestImageBatch<SecondRun>(numSamples1, inShapes1, outShapes1, ws, false);
+}
diff --git a/tests/cvcuda/system/TestOpMedianBlur.cpp b/tests/cvcuda/system/TestOpMedianBlur.cpp
index cae0b279e..aaaa331db 100644
--- a/tests/cvcuda/system/TestOpMedianBlur.cpp
+++ b/tests/cvcuda/system/TestOpMedianBlur.cpp
@@ -307,7 +307,7 @@ TEST_P(OpMedianBlur, varshape_correct_output)
     const int               bytesPerPixel = 3;
 
     // Create tensor to store kernel size
-    nvcv::Tensor ksizeTensor(nvcv::TensorShape({numberOfImages, 2}, nvcv::TENSOR_NW), nvcv::TYPE_S32);
+    nvcv::Tensor ksizeTensor(nvcv::TensorShape({numberOfImages}, "N"), nvcv::TYPE_2S32);
     auto         ksizeTensorData = ksizeTensor.exportData<nvcv::TensorDataStridedCuda>();
     ASSERT_NE(nullptr, ksizeTensorData);
 
diff --git a/tests/cvcuda/system/TestOpRandomResizedCrop.cpp b/tests/cvcuda/system/TestOpRandomResizedCrop.cpp
index a287c1fcf..e73223a15 100644
--- a/tests/cvcuda/system/TestOpRandomResizedCrop.cpp
+++ b/tests/cvcuda/system/TestOpRandomResizedCrop.cpp
@@ -352,3 +352,71 @@ TEST_P(OpRandomResizedCrop, varshape_correct_output)
         EXPECT_THAT(mae, t::Each(t::Le(maeThreshold)));
     }
 }
+
+TEST(OpRandomResizedCrop_negative, createWithNullHandle)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, cvcudaRandomResizedCropCreate(nullptr, 0.2, 1.0, 0.8, 1.3, 2, 0));
+}
+
+TEST(OpRandomResizedCrop_negative, createWithInvalidScale)
+{
+    NVCVOperatorHandle opHandle;
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, cvcudaRandomResizedCropCreate(&opHandle, 1.0, 0.2, 0.8, 1.3, 2, 0));
+}
+
+TEST(OpRandomResizedCrop_negative, createWithInvalidRatio)
+{
+    NVCVOperatorHandle opHandle;
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, cvcudaRandomResizedCropCreate(&opHandle, 0.2, 1.0, 1.3, 0.8, 2, 0));
+}
+
+// clang-format off
+NVCV_TEST_SUITE_P(OpRandomResizedCrop_negative, nvcv::test::ValueList<std::string, nvcv::DataType, std::string, nvcv::DataType, NVCVInterpolationType, NVCVStatus>
+{
+    //   in_layout,        in_data_type,   out_layout,     out_data_type,         interpolation,         expected_return_status
+    {        "CHW",       nvcv::TYPE_U8,        "HWC",     nvcv::TYPE_U8,   NVCV_INTERP_NEAREST,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",       nvcv::TYPE_U8,        "CHW",     nvcv::TYPE_U8,   NVCV_INTERP_NEAREST,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",      nvcv::TYPE_F64,        "HWC",     nvcv::TYPE_U8,   NVCV_INTERP_NEAREST,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",       nvcv::TYPE_U8,        "HWC",    nvcv::TYPE_F64,   NVCV_INTERP_NEAREST,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",      nvcv::TYPE_U32,        "HWC",     nvcv::TYPE_U8,   NVCV_INTERP_NEAREST,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",       nvcv::TYPE_U8,        "HWC",    nvcv::TYPE_U32,   NVCV_INTERP_NEAREST,    NVCV_ERROR_INVALID_ARGUMENT},
+    {        "HWC",       nvcv::TYPE_U8,        "HWC",     nvcv::TYPE_U8,      NVCV_INTERP_AREA,    NVCV_ERROR_INVALID_ARGUMENT},
+});
+
+// clang-format on
+
+TEST_P(OpRandomResizedCrop_negative, infer_negative_parameter)
+{
+    std::string           in_layout              = GetParamValue<0>();
+    nvcv::DataType        in_data_type           = GetParamValue<1>();
+    std::string           out_layout             = GetParamValue<2>();
+    nvcv::DataType        out_data_type          = GetParamValue<3>();
+    NVCVInterpolationType interpolation          = GetParamValue<4>();
+    NVCVStatus            expected_return_status = GetParamValue<5>();
+
+    double minScale = 0.08;
+    double maxScale = 1.0;
+    double minRatio = 3.0 / 4;
+    double maxRatio = 4.0 / 3;
+
+    nvcv::Tensor imgSrc(
+        {
+            {24, 24, 2},
+            in_layout.c_str()
+    },
+        in_data_type);
+    nvcv::Tensor imgDst(
+        {
+            {24, 24, 2},
+            out_layout.c_str()
+    },
+        out_data_type);
+
+    // Create and Call operator
+    int      numberOfImages = 4;
+    uint32_t seed           = 1;
+
+    cvcuda::RandomResizedCrop randomResizedCropOp(minScale, maxScale, minRatio, maxRatio, numberOfImages, seed);
+    EXPECT_EQ(expected_return_status,
+              nvcv::ProtectCall([&] { randomResizedCropOp(NULL, imgSrc, imgDst, interpolation); }));
+}
diff --git a/tests/cvcuda/system/TestOpResize.cpp b/tests/cvcuda/system/TestOpResize.cpp
index 3cde6b857..26140de44 100644
--- a/tests/cvcuda/system/TestOpResize.cpp
+++ b/tests/cvcuda/system/TestOpResize.cpp
@@ -56,8 +56,9 @@ NVCV_TEST_SUITE_P(OpResize, test::ValueList<int, int, int, int, NVCVInterpolatio
     {        420,      420,       40,        42,   NVCV_INTERP_CUBIC,           1},
     {       1920,     1080,      640,       320,   NVCV_INTERP_CUBIC,           1},
     {       1920,     1080,      640,       320,   NVCV_INTERP_CUBIC,           2},
-    {         44,       40,       22,        20,    NVCV_INTERP_AREA,           1},
+    {         44,       40,       22,        20,    NVCV_INTERP_AREA,           2},
     {         30,       30,       20,        20,    NVCV_INTERP_AREA,           2},
+    {         30,       30,       60,        60,    NVCV_INTERP_AREA,           4},
 });
 
 // clang-format on
@@ -140,7 +141,7 @@ TEST_P(OpResize, tensor_correct_output)
 
         // Generate gold result
         test::Resize(goldVec, dstVecRowStride, {dstWidth, dstHeight}, srcVec[i], srcVecRowStride, {srcWidth, srcHeight},
-                     fmt, interpolation);
+                     fmt, interpolation, false);
 
         EXPECT_EQ(goldVec, testVec);
     }
@@ -171,7 +172,10 @@ TEST_P(OpResize, varshape_correct_output)
     std::uniform_int_distribution<int> rndDstHeight(dstHeightBase * 0.8, dstHeightBase * 1.1);
 
     std::vector<nvcv::Image> imgSrc, imgDst;
-    for (int i = 0; i < numberOfImages; ++i)
+    // The size of the first image is fixed: to cover area fast code path
+    imgSrc.emplace_back(nvcv::Size2D{srcWidthBase, srcHeightBase}, fmt);
+    imgDst.emplace_back(nvcv::Size2D{dstHeightBase, dstHeightBase}, fmt);
+    for (int i = 0; i < numberOfImages - 1; ++i)
     {
         imgSrc.emplace_back(nvcv::Size2D{rndSrcWidth(randEng), rndSrcHeight(randEng)}, fmt);
         imgDst.emplace_back(nvcv::Size2D{rndDstWidth(randEng), rndDstHeight(randEng)}, fmt);
@@ -249,7 +253,7 @@ TEST_P(OpResize, varshape_correct_output)
 
         // Generate gold result
         test::Resize(goldVec, dstRowStride, {dstWidth, dstHeight}, srcVec[i], srcVecRowStride[i], {srcWidth, srcHeight},
-                     fmt, interpolation);
+                     fmt, interpolation, true);
 
         // maximum absolute error
         std::vector<int> mae(testVec.size());
diff --git a/tests/cvcuda/system/TestOpThreshold.cpp b/tests/cvcuda/system/TestOpThreshold.cpp
index 8a0671d85..a495173c6 100644
--- a/tests/cvcuda/system/TestOpThreshold.cpp
+++ b/tests/cvcuda/system/TestOpThreshold.cpp
@@ -154,9 +154,10 @@ static double getThreshVal_Triangle(std::vector<uint8_t> &src)
     return thresh;
 }
 
+namespace {
 //test for uint8
 template<typename T>
-static void Threshold(std::vector<T> &src, std::vector<T> &dst, double thresh, double maxval, uint32_t type)
+void Threshold(std::vector<T> &src, std::vector<T> &dst, double thresh, double maxval, uint32_t type)
 {
     int automatic_thresh = (type & ~NVCV_THRESH_MASK);
     type &= NVCV_THRESH_MASK;
@@ -214,17 +215,105 @@ static void Threshold(std::vector<T> &src, std::vector<T> &dst, double thresh, d
     }
 }
 
+// test for double
+template<>
+void Threshold(std::vector<double> &src, std::vector<double> &dst, double thresh, double maxval, uint32_t type)
+{
+    int automatic_thresh = (type & ~NVCV_THRESH_MASK);
+    type &= NVCV_THRESH_MASK;
+
+    if (automatic_thresh == (NVCV_THRESH_OTSU | NVCV_THRESH_TRIANGLE) || automatic_thresh == NVCV_THRESH_OTSU
+        || automatic_thresh == NVCV_THRESH_TRIANGLE)
+        return;
+    dst.assign(src.begin(), src.end());
+
+    int size = src.size();
+    switch (type)
+    {
+    case NVCV_THRESH_BINARY:
+        for (int i = 0; i < size; i++) dst[i] = src[i] > thresh ? maxval : 0;
+        break;
+    case NVCV_THRESH_BINARY_INV:
+        for (int i = 0; i < size; i++) dst[i] = src[i] <= thresh ? maxval : 0;
+        break;
+    case NVCV_THRESH_TRUNC:
+        for (int i = 0; i < size; i++) dst[i] = std::min(static_cast<double>(src[i]), thresh);
+        break;
+    case NVCV_THRESH_TOZERO:
+        for (int i = 0; i < size; i++) dst[i] = src[i] > thresh ? src[i] : 0;
+        break;
+    case NVCV_THRESH_TOZERO_INV:
+        for (int i = 0; i < size; i++) dst[i] = src[i] <= thresh ? src[i] : 0;
+        break;
+    }
+}
+
+void ThresholdWrapper(std::vector<uint8_t> &src, std::vector<uint8_t> &dst, double thresh, double maxval, uint32_t type,
+                      NVCVDataType nvcvDataType)
+{
+    if (nvcvDataType == NVCV_DATA_TYPE_F64)
+    {
+        std::vector<double> src_tmp(src.size() / sizeof(double));
+        std::vector<double> dst_tmp(dst.size() / sizeof(double));
+        size_t              copySize = src.size();
+        memcpy(static_cast<void *>(src_tmp.data()), static_cast<void *>(src.data()), copySize);
+        memcpy(static_cast<void *>(dst_tmp.data()), static_cast<void *>(dst.data()), copySize);
+        Threshold(src_tmp, dst_tmp, thresh, maxval, type);
+        memcpy(static_cast<void *>(dst.data()), static_cast<void *>(dst_tmp.data()), copySize);
+    }
+    else
+    {
+        Threshold(src, dst, thresh, maxval, type);
+    }
+}
+
+template<typename T>
+void myGenerate(T *src, std::size_t size, std::default_random_engine &randEng)
+{
+    std::uniform_int_distribution rand(0u, 255u);
+    for (std::size_t idx = 0; idx < size; ++idx)
+    {
+        src[idx] = rand(randEng);
+    }
+}
+
+template<>
+void myGenerate(double *src, std::size_t size, std::default_random_engine &randEng)
+{
+    std::uniform_real_distribution<double> rand(0., 1.);
+    for (std::size_t idx = 0; idx < size; ++idx)
+    {
+        src[idx] = rand(randEng);
+    }
+}
+} // namespace
+
 // clang-format off
-NVCV_TEST_SUITE_P(OpThreshold, nvcv::test::ValueList<int, int, int, uint32_t, double, double>
+NVCV_TEST_SUITE_P(OpThreshold, nvcv::test::ValueList<int, int, int, uint32_t, double, double, nvcv::ImageFormat>
 {
-    //batch,    height,     width,                                                type,         thresh,       maxval
-    {     1,       480,       360,                                  NVCV_THRESH_BINARY,            100,         255},
-    {     5,       100,       100,                              NVCV_THRESH_BINARY_INV,            100,         255},
-    {     4,       100,       101,                                   NVCV_THRESH_TRUNC,            100,         255},
-    {     3,       360,       480,                                  NVCV_THRESH_TOZERO,            100,         255},
-    {     2,       100,       101,                              NVCV_THRESH_TOZERO_INV,            100,         255},
-    {     1,       800,       600,                 NVCV_THRESH_OTSU|NVCV_THRESH_BINARY,            100,         255},
-    {     3,       600,       1000,        NVCV_THRESH_TRIANGLE|NVCV_THRESH_BINARY_INV,            100,         255},
+    //batch,    height,     width,                                                type,         thresh,      maxval,      format,
+    {     1,       480,       360,                                  NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_U8},
+    {     1,       480,       360,                                  NVCV_THRESH_BINARY,             -1,         255, nvcv::FMT_U8},
+    {     1,       480,       360,                                  NVCV_THRESH_BINARY,            256,         255, nvcv::FMT_U8},
+    {     1,       480,       360,                                  NVCV_THRESH_BINARY,            0.5,         255, nvcv::FMT_F64},
+    {     5,       100,       100,                              NVCV_THRESH_BINARY_INV,            100,         255, nvcv::FMT_U8},
+    {     5,       100,       100,                              NVCV_THRESH_BINARY_INV,             -1,         255, nvcv::FMT_U8},
+    {     5,       100,       100,                              NVCV_THRESH_BINARY_INV,            256,         255, nvcv::FMT_U8},
+    {     5,       100,       100,                              NVCV_THRESH_BINARY_INV,            0.5,         255, nvcv::FMT_F64},
+    {     4,       100,       101,                                   NVCV_THRESH_TRUNC,            100,         255, nvcv::FMT_U8},
+    {     4,       100,       101,                                   NVCV_THRESH_TRUNC,             -1,         255, nvcv::FMT_U8},
+    {     4,       100,       101,                                   NVCV_THRESH_TRUNC,            256,         255, nvcv::FMT_U8},
+    {     4,       100,       101,                                   NVCV_THRESH_TRUNC,            0.5,         255, nvcv::FMT_F64},
+    {     3,       360,       480,                                  NVCV_THRESH_TOZERO,            100,         255, nvcv::FMT_U8},
+    {     3,       360,       480,                                  NVCV_THRESH_TOZERO,             -1,         255, nvcv::FMT_U8},
+    {     3,       360,       480,                                  NVCV_THRESH_TOZERO,            256,         255, nvcv::FMT_U8},
+    {     3,       360,       480,                                  NVCV_THRESH_TOZERO,            0.5,         255, nvcv::FMT_F64},
+    {     2,       100,       101,                              NVCV_THRESH_TOZERO_INV,            100,         255, nvcv::FMT_U8},
+    {     2,       100,       101,                              NVCV_THRESH_TOZERO_INV,             -1,         255, nvcv::FMT_U8},
+    {     2,       100,       101,                              NVCV_THRESH_TOZERO_INV,            256,         255, nvcv::FMT_U8},
+    {     2,       100,       101,                              NVCV_THRESH_TOZERO_INV,            0.5,         255, nvcv::FMT_F64},
+    {     1,       800,       600,                 NVCV_THRESH_OTSU|NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_U8},
+    {     3,       600,       1000,        NVCV_THRESH_TRIANGLE|NVCV_THRESH_BINARY_INV,            100,         255, nvcv::FMT_U8},
 });
 
 // clang-format on
@@ -234,16 +323,19 @@ TEST_P(OpThreshold, tensor_correct_output)
     cudaStream_t stream;
     EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
 
-    int      batch  = GetParamValue<0>();
-    int      height = GetParamValue<1>();
-    int      width  = GetParamValue<2>();
-    uint32_t type   = GetParamValue<3>();
-    double   thresh = GetParamValue<4>();
-    double   maxval = GetParamValue<5>();
+    int               batch  = GetParamValue<0>();
+    int               height = GetParamValue<1>();
+    int               width  = GetParamValue<2>();
+    uint32_t          type   = GetParamValue<3>();
+    double            thresh = GetParamValue<4>();
+    double            maxval = GetParamValue<5>();
+    nvcv::ImageFormat fmt    = GetParamValue<6>();
 
-    nvcv::ImageFormat fmt    = nvcv::FMT_U8;
-    nvcv::Tensor      imgIn  = nvcv::util::CreateTensor(batch, width, height, fmt);
-    nvcv::Tensor      imgOut = nvcv::util::CreateTensor(batch, width, height, fmt);
+    NVCVDataType nvcvDataType;
+    ASSERT_EQ(NVCV_SUCCESS, nvcvImageFormatGetPlaneDataType(fmt, 0, &nvcvDataType));
+
+    nvcv::Tensor imgIn  = nvcv::util::CreateTensor(batch, width, height, fmt);
+    nvcv::Tensor imgOut = nvcv::util::CreateTensor(batch, width, height, fmt);
 
     auto inData = imgIn.exportData<nvcv::TensorDataStridedCuda>();
     ASSERT_NE(nullptr, inData);
@@ -295,9 +387,16 @@ TEST_P(OpThreshold, tensor_correct_output)
 
     for (int i = 0; i < batch; i++)
     {
-        std::uniform_int_distribution<uint8_t> rand(0, 255);
         srcVec[i].resize(height * rowStride);
-        std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return rand(randEng); });
+        switch (nvcvDataType)
+        {
+        case NVCV_DATA_TYPE_F64:
+            myGenerate(reinterpret_cast<double *>(srcVec[i].data()), srcVec[i].size() / sizeof(double), randEng);
+            break;
+        default:
+            myGenerate(reinterpret_cast<uint8_t *>(srcVec[i].data()), srcVec[i].size(), randEng);
+            break;
+        }
         ASSERT_EQ(cudaSuccess, cudaMemcpy2D(inAccess->sampleData(i), inAccess->rowStride(), srcVec[i].data(), rowStride,
                                             rowStride, height, cudaMemcpyHostToDevice));
     }
@@ -319,7 +418,7 @@ TEST_P(OpThreshold, tensor_correct_output)
                                             rowStride, height, cudaMemcpyDeviceToHost));
 
         std::vector<uint8_t> goldVec(height * rowStride);
-        Threshold(srcVec[i], goldVec, thresh, maxval, type);
+        ThresholdWrapper(srcVec[i], goldVec, thresh, maxval, type, nvcvDataType);
         EXPECT_EQ(goldVec, testVec);
     }
 
@@ -331,14 +430,17 @@ TEST_P(OpThreshold, varshape_correct_shape)
     cudaStream_t stream;
     EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
 
-    int      batch  = GetParamValue<0>();
-    int      height = GetParamValue<1>();
-    int      width  = GetParamValue<2>();
-    uint32_t type   = GetParamValue<3>();
-    double   thresh = GetParamValue<4>();
-    double   maxval = GetParamValue<5>();
+    int               batch  = GetParamValue<0>();
+    int               height = GetParamValue<1>();
+    int               width  = GetParamValue<2>();
+    uint32_t          type   = GetParamValue<3>();
+    double            thresh = GetParamValue<4>();
+    double            maxval = GetParamValue<5>();
+    nvcv::ImageFormat fmt    = GetParamValue<6>();
+
+    NVCVDataType nvcvDataType;
+    ASSERT_EQ(NVCV_SUCCESS, nvcvImageFormatGetPlaneDataType(fmt, 0, &nvcvDataType));
 
-    nvcv::ImageFormat                  fmt = nvcv::FMT_U8;
     // Create input and output
     std::default_random_engine         randEng;
     std::uniform_int_distribution<int> rndWidth(width * 0.8, width * 1.1);
@@ -391,10 +493,16 @@ TEST_P(OpThreshold, varshape_correct_shape)
 
         int srcRowStride = srcWidth * fmt.planePixelStrideBytes(0);
 
-        std::uniform_int_distribution<uint8_t> rand(0, 255);
-
         srcVec[i].resize(srcHeight * srcRowStride);
-        std::generate(srcVec[i].begin(), srcVec[i].end(), [&]() { return rand(randEng); });
+        switch (nvcvDataType)
+        {
+        case NVCV_DATA_TYPE_F64:
+            myGenerate(reinterpret_cast<double *>(srcVec[i].data()), srcVec[i].size() / sizeof(double), randEng);
+            break;
+        default:
+            myGenerate(reinterpret_cast<uint8_t *>(srcVec[i].data()), srcVec[i].size(), randEng);
+            break;
+        }
 
         // Copy input data to the GPU
         ASSERT_EQ(cudaSuccess, cudaMemcpy2D(srcData->plane(0).basePtr, srcData->plane(0).rowStride, srcVec[i].data(),
@@ -429,9 +537,148 @@ TEST_P(OpThreshold, varshape_correct_shape)
                                dstHeight, cudaMemcpyDeviceToHost));
 
         std::vector<uint8_t> goldVec(dstHeight * dstRowStride);
-        Threshold(srcVec[i], goldVec, thresh, maxval, type);
+        ThresholdWrapper(srcVec[i], goldVec, thresh, maxval, type, nvcvDataType);
         EXPECT_EQ(goldVec, testVec);
     }
 
     EXPECT_EQ(cudaSuccess, cudaStreamDestroy(stream));
 }
+
+// clang-format off
+NVCV_TEST_SUITE_P(OpThreshold_Negative, nvcv::test::ValueList<int, int, int, uint32_t, double, double, nvcv::ImageFormat, nvcv::ImageFormat, nvcv::DataType, nvcv::DataType>
+{
+    //batch,    height,     width,                                                     type,         thresh,      maxval,      inFormat,    outFormat,  threshDataType,     maxvalType
+    {     1,       224,       224,                                       NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_F16, nvcv::FMT_F16, nvcv::TYPE_F64, nvcv::TYPE_F64},
+    {     1,       224,       224,                                       NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_U8,  nvcv::FMT_U16, nvcv::TYPE_F64, nvcv::TYPE_F64},
+    {     1,       224,       224,                                       NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_U8,  nvcv::FMT_U8,  nvcv::TYPE_F32, nvcv::TYPE_F64},
+    {     1,       224,       224,                                       NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_U8,  nvcv::FMT_U8,  nvcv::TYPE_F64, nvcv::TYPE_F32},
+    {     1,       224,       224, NVCV_THRESH_TRIANGLE|NVCV_THRESH_OTSU|NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_U8,  nvcv::FMT_U8,  nvcv::TYPE_F64, nvcv::TYPE_F64},
+    {     1,       224,       224,                     NVCV_THRESH_TRUNC|NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_U8,  nvcv::FMT_U8,  nvcv::TYPE_F64, nvcv::TYPE_F64},
+    {     1,       224,       224,                      NVCV_THRESH_OTSU|NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_U16, nvcv::FMT_U16, nvcv::TYPE_F64, nvcv::TYPE_F64},
+    {     1,       224,       224,                      NVCV_THRESH_OTSU|NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_RGB8, nvcv::FMT_RGB8, nvcv::TYPE_F64, nvcv::TYPE_F64},
+    {     1,       224,       224,                  NVCV_THRESH_TRIANGLE|NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_U16, nvcv::FMT_U16, nvcv::TYPE_F64, nvcv::TYPE_F64},
+    {     1,       224,       224,                  NVCV_THRESH_TRIANGLE|NVCV_THRESH_BINARY,            100,         255, nvcv::FMT_RGB8, nvcv::FMT_RGB8, nvcv::TYPE_F64, nvcv::TYPE_F64},
+});
+
+// clang-format on
+
+TEST(OpThreshold_Negative, create_with_null_handle)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, cvcudaThresholdCreate(nullptr, NVCV_THRESH_BINARY, 5));
+}
+
+TEST(OpThreshold_Negative, create_with_negative_maxBatchSize)
+{
+    NVCVOperatorHandle thresholdHandle;
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, cvcudaThresholdCreate(&thresholdHandle, NVCV_THRESH_BINARY, -1));
+}
+
+TEST_P(OpThreshold_Negative, invalid_inputs)
+{
+    cudaStream_t stream;
+    EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
+
+    int               batch          = GetParamValue<0>();
+    int               height         = GetParamValue<1>();
+    int               width          = GetParamValue<2>();
+    uint32_t          type           = GetParamValue<3>();
+    double            thresh         = GetParamValue<4>();
+    double            maxval         = GetParamValue<5>();
+    nvcv::ImageFormat inFormat       = GetParamValue<6>();
+    nvcv::ImageFormat outFormat      = GetParamValue<7>();
+    nvcv::DataType    threshDataType = GetParamValue<8>();
+    nvcv::DataType    maxvalDataType = GetParamValue<9>();
+
+    nvcv::Tensor imgIn  = nvcv::util::CreateTensor(batch, width, height, inFormat);
+    nvcv::Tensor imgOut = nvcv::util::CreateTensor(batch, width, height, outFormat);
+
+    //parameters
+    nvcv::Tensor threshval({{batch}, "N"}, threshDataType);
+    nvcv::Tensor maxvalval({{batch}, "N"}, maxvalDataType);
+
+    auto threshData = threshval.exportData<nvcv::TensorDataStridedCuda>();
+    auto maxvalData = maxvalval.exportData<nvcv::TensorDataStridedCuda>();
+
+    ASSERT_NE(nullptr, threshData);
+    ASSERT_NE(nullptr, maxvalData);
+
+    std::vector<double> threshVec(batch, thresh);
+    std::vector<double> maxvalVec(batch, maxval);
+
+    // Copy vectors to the GPU
+    ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(threshData->basePtr(), threshVec.data(), threshVec.size() * sizeof(double),
+                                           cudaMemcpyHostToDevice, stream));
+    ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(maxvalData->basePtr(), maxvalVec.data(), maxvalVec.size() * sizeof(double),
+                                           cudaMemcpyHostToDevice, stream));
+
+    // Call operator
+    int               maxBatch = 5;
+    cvcuda::Threshold thresholdOp(type, maxBatch);
+    EXPECT_ANY_THROW(thresholdOp(stream, imgIn, imgOut, threshval, maxvalval));
+
+    EXPECT_EQ(cudaSuccess, cudaStreamSynchronize(stream));
+    EXPECT_EQ(cudaSuccess, cudaStreamDestroy(stream));
+}
+
+TEST_P(OpThreshold_Negative, varshape_invalid_inputs)
+{
+    cudaStream_t stream;
+    EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
+
+    int               batch          = GetParamValue<0>();
+    int               height         = GetParamValue<1>();
+    int               width          = GetParamValue<2>();
+    uint32_t          type           = GetParamValue<3>();
+    double            thresh         = GetParamValue<4>();
+    double            maxval         = GetParamValue<5>();
+    nvcv::ImageFormat inFormat       = GetParamValue<6>();
+    nvcv::ImageFormat outFormat      = GetParamValue<7>();
+    nvcv::DataType    threshDataType = GetParamValue<8>();
+    nvcv::DataType    maxvalDataType = GetParamValue<9>();
+
+    // Create input and output
+    std::default_random_engine         randEng;
+    std::uniform_int_distribution<int> rndWidth(width * 0.8, width * 1.1);
+    std::uniform_int_distribution<int> rndHeight(height * 0.8, height * 1.1);
+
+    std::vector<nvcv::Image> imgSrc, imgDst;
+    for (int i = 0; i < batch; ++i)
+    {
+        int rw = rndWidth(randEng);
+        int rh = rndHeight(randEng);
+        imgSrc.emplace_back(nvcv::Size2D{rw, rh}, inFormat);
+        imgDst.emplace_back(nvcv::Size2D{rw, rh}, outFormat);
+    }
+
+    nvcv::ImageBatchVarShape batchSrc(batch);
+    batchSrc.pushBack(imgSrc.begin(), imgSrc.end());
+
+    nvcv::ImageBatchVarShape batchDst(batch);
+    batchDst.pushBack(imgDst.begin(), imgDst.end());
+
+    //parameters
+    nvcv::Tensor threshval({{batch}, "N"}, threshDataType);
+    nvcv::Tensor maxvalval({{batch}, "N"}, maxvalDataType);
+
+    auto threshData = threshval.exportData<nvcv::TensorDataStridedCuda>();
+    auto maxvalData = maxvalval.exportData<nvcv::TensorDataStridedCuda>();
+
+    ASSERT_NE(nullptr, threshData);
+    ASSERT_NE(nullptr, maxvalData);
+
+    std::vector<double> threshVec(batch, thresh);
+    std::vector<double> maxvalVec(batch, maxval);
+
+    // Copy vectors to the GPU
+    ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(threshData->basePtr(), threshVec.data(), threshVec.size() * sizeof(double),
+                                           cudaMemcpyHostToDevice, stream));
+    ASSERT_EQ(cudaSuccess, cudaMemcpyAsync(maxvalData->basePtr(), maxvalVec.data(), maxvalVec.size() * sizeof(double),
+                                           cudaMemcpyHostToDevice, stream));
+    // Call operator
+    int               maxBatch = 5;
+    cvcuda::Threshold thresholdOp(type, maxBatch);
+    EXPECT_ANY_THROW(thresholdOp(stream, batchSrc, batchDst, threshval, maxvalval));
+
+    EXPECT_EQ(cudaSuccess, cudaStreamSynchronize(stream));
+    EXPECT_EQ(cudaSuccess, cudaStreamDestroy(stream));
+}
diff --git a/tests/nvcv_types/cudatools_system/CMakeLists.txt b/tests/nvcv_types/cudatools_system/CMakeLists.txt
index fd97891b6..3779482ec 100644
--- a/tests/nvcv_types/cudatools_system/CMakeLists.txt
+++ b/tests/nvcv_types/cudatools_system/CMakeLists.txt
@@ -45,6 +45,8 @@ add_executable(nvcv_test_cudatools_system
     TestTypeTraits.cpp
     TestMetaprogramming.cpp
     TestArrayWrap.cpp
+    TestTensorBatchWrap.cpp
+    DeviceTensorBatchWrap.cu
 )
 
 target_link_libraries(nvcv_test_cudatools_system
diff --git a/tests/nvcv_types/cudatools_system/DeviceTensorBatchWrap.cu b/tests/nvcv_types/cudatools_system/DeviceTensorBatchWrap.cu
new file mode 100644
index 000000000..fa1a3b1f1
--- /dev/null
+++ b/tests/nvcv_types/cudatools_system/DeviceTensorBatchWrap.cu
@@ -0,0 +1,152 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeviceTensorBatchWrap.hpp"
+
+#include <gtest/gtest.h>            // for EXPECT_EQ, etc.
+#include <nvcv/cuda/MathOps.hpp>    // for operator == to allow EXPECT_EQ
+#include <nvcv/cuda/StaticCast.hpp> // for StaticCast, etc.
+#include <nvcv/cuda/TypeTraits.hpp>
+
+namespace cuda = nvcv::cuda;
+
+template<typename T, int NDIM, typename TensorWrap, typename... Coords>
+__device__ T *tensor_ptr(TensorWrap &tensor, int *coords, Coords... vcoords)
+{
+    if constexpr (sizeof...(Coords) == NDIM)
+    {
+        return tensor.ptr(vcoords...);
+    }
+    else
+    {
+        return tensor_ptr<T, NDIM, TensorWrap>(tensor, coords, vcoords..., coords[sizeof...(Coords)]);
+    }
+}
+
+template<typename TensorBatchWrapT, typename T>
+__device__ void SetThroughTensor<TensorBatchWrapT, T>::Set(TensorBatchWrapT wrap, int sample, int *coords, T value)
+{
+    auto tensor                                                      = wrap.tensor(sample);
+    *tensor_ptr<T, TensorBatchWrapT::kNumDimensions>(tensor, coords) = value;
+}
+
+template<typename TensorBatchWrapT, typename T>
+__device__ void SetThroughSubscript<TensorBatchWrapT, T>::Set(TensorBatchWrapT wrap, int sample, int *coords, T value)
+{
+    constexpr int NDIM = TensorBatchWrapT::kNumDimensions;
+    if constexpr (NDIM == 1)
+    {
+        wrap[int2{sample, coords[0]}] = value;
+    }
+    else if constexpr (NDIM == 2)
+    {
+        wrap[int3{sample, coords[1], coords[0]}] = value;
+    }
+    else if constexpr (NDIM == 3)
+    {
+        wrap[int4{sample, coords[2], coords[1], coords[0]}] = value;
+    }
+}
+
+template<typename T, typename TensorBatchWrapT, typename... VCoords>
+__device__ void SetThroughPtrHelper(TensorBatchWrapT wrap, int sample, int *coords, T value, VCoords... vcoords)
+{
+    constexpr int NDIM = TensorBatchWrapT::kNumDimensions;
+    if constexpr (sizeof...(VCoords) == 0)
+    {
+        SetThroughPtrHelper<T>(wrap, sample, coords, value, sample, coords[0]);
+    }
+    else if constexpr (sizeof...(VCoords) < NDIM + 1)
+    {
+        SetThroughPtrHelper<T>(wrap, sample, coords, value, vcoords..., coords[sizeof...(VCoords) - 1]);
+    }
+    else
+    {
+        *wrap.ptr(vcoords...) = value;
+    }
+}
+
+template<typename TensorBatchWrapT, typename T>
+__device__ void SetThroughPtr<TensorBatchWrapT, T>::Set(TensorBatchWrapT wrap, int sample, int *coords, T value)
+{
+    SetThroughPtrHelper<T>(wrap, sample, coords, value);
+}
+
+template<typename SetValue, typename TensorBatchWrapT>
+__global__ void SetReferenceKernel(TensorBatchWrapT wrap)
+{
+    int            sample    = blockIdx.x;
+    const int64_t *shape     = wrap.shape(sample);
+    int            id        = threadIdx.x;
+    int64_t        tensorVol = 1;
+    const int      ndim      = TensorBatchWrapT::kNumDimensions;
+    for (int d = 0; d < ndim; ++d)
+    {
+        tensorVol *= shape[d];
+    }
+    for (int index = id; index < tensorVol; index += blockDim.x)
+    {
+        int coords[ndim];
+        int tmp_i = index;
+        for (int d = ndim - 1; d >= 0; --d)
+        {
+            coords[d] = tmp_i % shape[d];
+            tmp_i /= shape[d];
+        }
+        SetValue::Set(wrap, sample, coords, cuda::SetAll<TensorBatchWrapT::ValueType>(index % 255));
+    }
+}
+
+template<typename SetValue, typename TensorBatchWrapT>
+void SetReference(TensorBatchWrapT wrap, cudaStream_t stream)
+{
+    int blocks = wrap.numTensors();
+    SetReferenceKernel<SetValue, TensorBatchWrapT><<<blocks, 1024, 0, stream>>>(wrap);
+}
+
+#define SetReferenceSpec(SET_VALUE, TENSOR_BATCH_TYPE)                                                         \
+    template __device__ void SET_VALUE<TENSOR_BATCH_TYPE, TENSOR_BATCH_TYPE::ValueType>::Set(                  \
+        TENSOR_BATCH_TYPE, int, int *, TENSOR_BATCH_TYPE::ValueType);                                          \
+    template void SetReference<SET_VALUE<TENSOR_BATCH_TYPE, TENSOR_BATCH_TYPE::ValueType>, TENSOR_BATCH_TYPE>( \
+        TENSOR_BATCH_TYPE, cudaStream_t)
+
+#define TB_PARAMS1 uchar1, -1, 32 * sizeof(uchar1), sizeof(uchar1)
+SetReferenceSpec(SetThroughTensor, cuda::TensorBatchWrap<TB_PARAMS1>);
+
+#define TB_PARAMS2 double4, 8 * sizeof(double4), sizeof(double4)
+SetReferenceSpec(SetThroughTensor, cuda::TensorBatchWrap<TB_PARAMS2>);
+
+#define TB_PARAMS3 float3, -1, -1, 8 * sizeof(float3), sizeof(float3)
+SetReferenceSpec(SetThroughTensor, cuda::TensorBatchWrap<TB_PARAMS3>);
+
+#define TB_PARAMS4 uchar2, sizeof(uchar2)
+SetReferenceSpec(SetThroughSubscript, cuda::TensorBatchWrap<TB_PARAMS4>);
+
+#define TB_PARAMS5 int3, -1, 16 * sizeof(int3), sizeof(int3)
+SetReferenceSpec(SetThroughSubscript, cuda::TensorBatchWrap<TB_PARAMS5>);
+
+#define TB_PARAMS6 ushort4, -1, sizeof(ushort4)
+SetReferenceSpec(SetThroughSubscript, cuda::TensorBatchWrap<TB_PARAMS6>);
+
+#define TB_PARAMS7 uchar4, -1, -1, 32 * sizeof(uchar4), sizeof(uchar4)
+SetReferenceSpec(SetThroughPtr, cuda::TensorBatchWrap<TB_PARAMS7>);
+
+#define TB_PARAMS8 float1, -1, -1, -1, 8 * sizeof(float1), sizeof(float1)
+SetReferenceSpec(SetThroughPtr, cuda::TensorBatchWrap<TB_PARAMS8>);
+
+#define TB_PARAMS9 float4, sizeof(float4)
+SetReferenceSpec(SetThroughPtr, cuda::TensorBatchWrap<TB_PARAMS9>);
diff --git a/tests/nvcv_types/cudatools_system/DeviceTensorBatchWrap.hpp b/tests/nvcv_types/cudatools_system/DeviceTensorBatchWrap.hpp
new file mode 100644
index 000000000..2f98460b3
--- /dev/null
+++ b/tests/nvcv_types/cudatools_system/DeviceTensorBatchWrap.hpp
@@ -0,0 +1,42 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nvcv/TensorBatchData.hpp>
+#include <nvcv/cuda/TensorBatchWrap.hpp>
+
+namespace cuda = nvcv::cuda;
+
+template<typename SetValueMethod, typename TensorBatchWrapT>
+void SetReference(TensorBatchWrapT wrap, cudaStream_t stream);
+
+template<typename TensorBatchWrapT, typename T>
+struct SetThroughTensor
+{
+    static __device__ void Set(TensorBatchWrapT wrap, int sample, int *coords, T value);
+};
+
+template<typename TensorBatchWrapT, typename T>
+struct SetThroughSubscript
+{
+    static __device__ void Set(TensorBatchWrapT wrap, int sample, int *coords, T value);
+};
+
+template<typename TensorBatchWrapT, typename T>
+struct SetThroughPtr
+{
+    static __device__ void Set(TensorBatchWrapT wrap, int sample, int *coords, T value);
+};
diff --git a/tests/nvcv_types/cudatools_system/TestTensorBatchWrap.cpp b/tests/nvcv_types/cudatools_system/TestTensorBatchWrap.cpp
new file mode 100644
index 000000000..50d6fbf64
--- /dev/null
+++ b/tests/nvcv_types/cudatools_system/TestTensorBatchWrap.cpp
@@ -0,0 +1,175 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeviceTensorBatchWrap.hpp"
+
+#include <common/HashUtils.hpp>      // for NVCV_INSTANTIATE_TEST_SUITE_P, etc.
+#include <common/TypedTests.hpp>     // for NVCV_MIXTYPED_TEST_SUITE_P, etc.
+#include <common/ValueTests.hpp>     // for StringLiteral
+#include <nvcv/Image.hpp>            // for Image, etc.
+#include <nvcv/TensorBatch.hpp>      // for TensorBatch
+#include <nvcv/TensorDataAccess.hpp> // for TensorDataAccessStridedImagePlanar, etc.
+#include <nvcv/cuda/MathOps.hpp>     // for operator == to allow EXPECT_EQ
+#include <nvcv/cuda/TensorBatchWrap.hpp>
+
+#include <limits>
+#include <random>
+#include <type_traits>
+
+namespace t     = ::testing;
+namespace test  = nvcv::test;
+namespace cuda  = nvcv::cuda;
+namespace ttype = nvcv::test::type;
+
+static constexpr int kMaxDim = 50;
+
+template<typename T, int NDIM, int INNER_DIM = -1, typename R>
+nvcv::Tensor GetRandomTensor(R &rg, nvcv::DataType dtype, cudaStream_t stream)
+{
+    std::uniform_int_distribution<int32_t> shape_dist(kMaxDim / 2, kMaxDim);
+    nvcv::TensorShape::ShapeType           shapeData(NDIM);
+    for (auto &d : shapeData)
+    {
+        d = shape_dist(rg);
+    }
+    if (INNER_DIM != -1)
+    {
+        shapeData[NDIM - 1] = INNER_DIM;
+    }
+    auto t = nvcv::Tensor(nvcv::TensorShape(shapeData, ""), dtype);
+    return t;
+}
+
+template<typename T, int NDIM, int N = 0>
+void VerifyTensorHelper(NVCVByte *data, const int64_t *shape, const int64_t *stride, int64_t startIndex = 0)
+{
+    if constexpr (N == NDIM)
+    {
+        auto gold  = cuda::SetAll<T>(startIndex % 255);
+        auto value = *reinterpret_cast<T *>(data);
+        ASSERT_EQ(value, gold);
+    }
+    else
+    {
+        int64_t indexStride = 1;
+        for (int i = 1; i + N < NDIM; ++i)
+        {
+            indexStride *= shape[i];
+        }
+        for (int i = 0; i < shape[0]; ++i)
+        {
+            VerifyTensorHelper<T, NDIM, N + 1>(data, shape + 1, stride + 1, startIndex + i * indexStride);
+            data += stride[0];
+        }
+    }
+}
+
+template<typename T, int NDIM>
+void VerifyTensor(const nvcv::Tensor &tensor, cudaStream_t stream)
+{
+    auto                  data       = tensor.exportData().cdata();
+    auto                  bufferSize = data.shape[0] * data.buffer.strided.strides[0];
+    std::vector<NVCVByte> hostBuffer(bufferSize);
+    ASSERT_EQ(
+        cudaMemcpyAsync(hostBuffer.data(), data.buffer.strided.basePtr, bufferSize, cudaMemcpyDeviceToHost, stream),
+        cudaSuccess);
+    ASSERT_EQ(cudaStreamSynchronize(stream), cudaSuccess);
+    VerifyTensorHelper<T, NDIM>(hostBuffer.data(), &data.shape[0], &data.buffer.strided.strides[0]);
+}
+
+template<typename T, int NDIM, int INNER_DIM, int N, int... Strides>
+struct TensorBatchWrapHelper
+{
+    using type = TensorBatchWrapHelper<T, NDIM, INNER_DIM, N + 1, -1, Strides...>::type;
+};
+
+template<typename T, int NDIM, int INNER_DIM>
+struct TensorBatchWrapHelper<T, NDIM, INNER_DIM, 0>
+{
+    using type
+        = TensorBatchWrapHelper<T, NDIM, INNER_DIM, 2, (INNER_DIM != -1) ? INNER_DIM * sizeof(T) : -1, sizeof(T)>::type;
+};
+
+template<typename T, int INNER_DIM>
+struct TensorBatchWrapHelper<T, 1, INNER_DIM, 0>
+{
+    using type = cuda::TensorBatchWrap<T, sizeof(T)>;
+};
+
+template<typename T, int NDIM, int INNER_DIM, int... Strides>
+struct TensorBatchWrapHelper<T, NDIM, INNER_DIM, NDIM, Strides...>
+{
+    using type = cuda::TensorBatchWrap<T, Strides...>;
+};
+
+template<typename T, int NDIM, int INNER_DIM>
+using TensorBatchWrapHelperT = typename TensorBatchWrapHelper<T, NDIM, INNER_DIM, 0>::type;
+
+#define NVCV_TEST_ROW(NUM_TENSORS, DTYPE, TYPE, NDIM, INNER_DIM, SET_VALUE_METHOD)                                  \
+    ttype::Types<ttype::Value<NUM_TENSORS>, ttype::Value<DTYPE>, TYPE, ttype::Value<NDIM>, ttype::Value<INNER_DIM>, \
+                 SET_VALUE_METHOD<TensorBatchWrapHelperT<TYPE, NDIM, INNER_DIM>, TYPE>>
+
+NVCV_TYPED_TEST_SUITE(TensorBatchWrapTensorTest,
+                      ttype::Types<NVCV_TEST_ROW(16, NVCV_DATA_TYPE_U8, uchar1, 3, 32, SetThroughTensor),
+                                   NVCV_TEST_ROW(8, NVCV_DATA_TYPE_4F64, double4, 2, 8, SetThroughTensor),
+                                   NVCV_TEST_ROW(16, NVCV_DATA_TYPE_3F32, float3, 4, 8, SetThroughTensor),
+                                   NVCV_TEST_ROW(64, NVCV_DATA_TYPE_2U8, uchar2, 1, -1, SetThroughSubscript),
+                                   NVCV_TEST_ROW(16, NVCV_DATA_TYPE_3S32, int3, 3, 16, SetThroughSubscript),
+                                   NVCV_TEST_ROW(32, NVCV_DATA_TYPE_4U16, ushort4, 2, -1, SetThroughSubscript),
+                                   NVCV_TEST_ROW(16, NVCV_DATA_TYPE_4U8, uchar4, 4, 32, SetThroughPtr),
+                                   NVCV_TEST_ROW(1, NVCV_DATA_TYPE_F32, float1, 5, 8, SetThroughPtr),
+                                   NVCV_TEST_ROW(32, NVCV_DATA_TYPE_4F32, float4, 1, -1, SetThroughPtr)>);
+
+#undef NVCV_TEST_ROW
+
+TYPED_TEST(TensorBatchWrapTensorTest, correct_content)
+{
+    int            numTensors = ttype::GetValue<TypeParam, 0>;
+    nvcv::DataType dtype{ttype::GetValue<TypeParam, 1>};
+    using T                 = ttype::GetType<TypeParam, 2>;
+    constexpr int NDIM      = ttype::GetValue<TypeParam, 3>;
+    constexpr int INNER_DIM = ttype::GetValue<TypeParam, 4>;
+    using SET_METHOD        = ttype::GetType<TypeParam, 5>;
+    using TensorBatchWrapT  = TensorBatchWrapHelperT<T, NDIM, INNER_DIM>;
+
+    cudaStream_t stream;
+    ASSERT_EQ(cudaSuccess, cudaStreamCreate(&stream));
+    nvcv::TensorBatch         tensorBatch(numTensors);
+    std::vector<nvcv::Tensor> tensors;
+
+    std::mt19937 rg{231};
+    for (int i = 0; i < tensorBatch.capacity(); ++i)
+    {
+        auto t = GetRandomTensor<T, NDIM, INNER_DIM>(rg, dtype, stream);
+        ASSERT_EQ(t.rank(), NDIM);
+        tensors.push_back(t);
+    }
+
+    tensorBatch.pushBack(tensors.begin(), tensors.end());
+
+    auto tensorBatchData = tensorBatch.exportData(stream).cast<nvcv::TensorBatchDataStridedCuda>();
+    ASSERT_TRUE(tensorBatchData.hasValue());
+
+    auto wrap = TensorBatchWrapT(*tensorBatchData);
+    SetReference<SET_METHOD>(wrap, stream);
+    ASSERT_EQ(cudaStreamSynchronize(stream), cudaSuccess);
+
+    for (auto &tensor : tensors)
+    {
+        VerifyTensor<T, NDIM>(tensor, stream);
+    }
+}
diff --git a/tests/nvcv_types/python/nvcv_test_types_python.in b/tests/nvcv_types/python/nvcv_test_types_python.in
index 2abb0fc1c..ee9d70f8c 100755
--- a/tests/nvcv_types/python/nvcv_test_types_python.in
+++ b/tests/nvcv_types/python/nvcv_test_types_python.in
@@ -15,25 +15,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
 tests_dir="@PYTHON_TEST_DIR@"
-python_versions="@PYTHON_TEST_VERSIONS@"
+python_versions_tentative="@PYTHON_TEST_VERSIONS@"
+
+python_versions=""
 
 # Verify if correct package dependencies are installed --------
 pip_depends="pytest torch"
 
-declare -a install_commands
-
-for ver in $python_versions; do
+# Collect all python versions that are indeed installed and have proper dependencies installed
+# Two behaviors:
+# - default: skip Python versions that are not installed or don't have pytest and torch installed
+# - if NVCV_FORCE_PYTHON is set: exit with error
+for ver in $python_versions_tentative; do
     if ! python$ver -c "import pytest, torch" > /dev/null 2>&1; then
-        install_commands+=("sudo python$ver -m pip install $pip_depends")
+        echo "WARNING: Python version $ver not installed or missing proper dependencies"
+        echo "Please install Python version $ver and run the following commands before running tests: sudo python$ver -m pip install $pip_depends"
+        if [[ "$NVCV_FORCE_PYTHON" == 1 || "$NVCV_FORCE_PYTHON" == yes ]]; then
+            exit 1 #hard exit
+        fi
+    else
+        echo "Found Python version $ver installed with proper dependencies, adding to tests"
+        python_versions+="$ver "
     fi
 done
 
-if [[ "${install_commands[*]}" ]]; then
-    echo "Please run the following commands before running $(basename $0): "
-    ( IFS=$'\n'; echo -e "${install_commands[*]}" )
-    exit 1
-fi
 
 # Run tests --------
 
diff --git a/tests/nvcv_types/system/TestAllocatorC.cpp b/tests/nvcv_types/system/TestAllocatorC.cpp
index 6b47c25eb..8d7fe0bbf 100644
--- a/tests/nvcv_types/system/TestAllocatorC.cpp
+++ b/tests/nvcv_types/system/TestAllocatorC.cpp
@@ -82,6 +82,10 @@ TEST(AllocatorTest, CreateAndUseCustom)
     ASSERT_EQ(nvcvAllocatorConstructCustom(allocators, 2, &halloc), NVCV_SUCCESS);
     ASSERT_NE(halloc, nullptr);
 
+    int refCount = 0;
+    EXPECT_EQ(nvcvAllocatorRefCount(halloc, &refCount), NVCV_SUCCESS);
+    EXPECT_EQ(refCount, 1);
+
     for (int i = 0; i < 2; i++)
     {
         NVCVResourceAllocator  alloc = {};
@@ -216,3 +220,183 @@ TEST(Allocator, smoke_test_custom_functors)
     myalloc1.cudaMem().free((void *)1, 7);
     EXPECT_EQ(1, devCounter);
 }
+
+TEST(AllocatorTest, smoke_user_pointer)
+{
+    NVCVResourceAllocator allocators[1] = {};
+
+    int ctx0 = 100;
+
+    allocators[0].resType         = NVCV_RESOURCE_MEM_HOST;
+    allocators[0].ctx             = &ctx0;
+    allocators[0].res.mem.fnAlloc = [](void *ctx, int64_t size, int32_t align)
+    {
+        *(int *)ctx += 1;
+        return memalign(align, size);
+    };
+    allocators[0].res.mem.fnFree = [](void *ctx, void *ptr, int64_t size, int32_t align)
+    {
+        *(int *)ctx += 10;
+        free(ptr);
+    };
+    allocators[0].cleanup = [](void *ctx, NVCVResourceAllocator *alloc)
+    {
+        EXPECT_EQ(ctx, alloc->ctx);
+        int *ctx_int = static_cast<int *>(ctx);
+        *ctx_int     = 0xDEAD;
+    };
+
+    NVCVAllocatorHandle halloc = nullptr;
+    ASSERT_EQ(nvcvAllocatorConstructCustom(allocators, 1, &halloc), NVCV_SUCCESS);
+    ASSERT_NE(halloc, nullptr);
+
+    void *userPtr;
+    ASSERT_EQ(nvcvAllocatorGetUserPointer(halloc, &userPtr), NVCV_SUCCESS);
+    EXPECT_EQ(nullptr, userPtr);
+
+    ASSERT_EQ(nvcvAllocatorSetUserPointer(halloc, (void *)0x123), NVCV_SUCCESS);
+    ASSERT_EQ(nvcvAllocatorGetUserPointer(halloc, &userPtr), NVCV_SUCCESS);
+    EXPECT_EQ((void *)0x123, userPtr);
+
+    ASSERT_EQ(nvcvAllocatorSetUserPointer(halloc, nullptr), NVCV_SUCCESS);
+    ASSERT_EQ(nvcvAllocatorGetUserPointer(halloc, &userPtr), NVCV_SUCCESS);
+    EXPECT_EQ(nullptr, userPtr);
+
+    int newRef = 1;
+    EXPECT_EQ(nvcvAllocatorDecRef(halloc, &newRef), NVCV_SUCCESS);
+    EXPECT_EQ(newRef, 0);
+}
+
+TEST(AllocatorTest, invalid_arguments_api_calls)
+{
+    NVCVResourceAllocator allocators[2] = {};
+
+    allocators[0].resType         = NVCV_RESOURCE_MEM_HOST;
+    allocators[0].res.mem.fnAlloc = [](void *ctx, int64_t size, int32_t align)
+    {
+        return memalign(align, size);
+    };
+    allocators[0].res.mem.fnFree = [](void *ctx, void *ptr, int64_t size, int32_t align)
+    {
+        free(ptr);
+    };
+    allocators[0].cleanup = [](void *ctx, NVCVResourceAllocator *alloc) {
+    };
+
+    allocators[1].resType         = NVCV_RESOURCE_MEM_CUDA;
+    allocators[1].res.mem.fnAlloc = [](void *ctx, int64_t size, int32_t align)
+    {
+        void *mem;
+        EXPECT_EQ(cudaMalloc(&mem, size), cudaSuccess);
+        return mem;
+    };
+    allocators[1].res.mem.fnFree = [](void *ctx, void *ptr, int64_t size, int32_t align)
+    {
+        EXPECT_EQ(cudaFree(ptr), cudaSuccess);
+    };
+    allocators[1].cleanup = [](void *ctx, NVCVResourceAllocator *alloc) {
+    };
+
+    NVCVAllocatorHandle halloc = nullptr;
+    // 1. Pointer to output handle must not be NULL
+    EXPECT_EQ(nvcvAllocatorConstructCustom(allocators, 2, nullptr), NVCV_ERROR_INVALID_ARGUMENT);
+    ASSERT_EQ(nvcvAllocatorConstructCustom(allocators, 2, &halloc), NVCV_SUCCESS);
+    ASSERT_NE(halloc, nullptr);
+
+    // 2. Pointer to output user pointer cannot be NULL
+    EXPECT_EQ(nvcvAllocatorGetUserPointer(halloc, nullptr), NVCV_ERROR_INVALID_ARGUMENT);
+
+    // 3. Pointer to output buffer must not be NULL
+    EXPECT_EQ(nvcvAllocatorAllocHostMemory(halloc, nullptr, (1 << 10), 256), NVCV_ERROR_INVALID_ARGUMENT);
+    EXPECT_EQ(nvcvAllocatorAllocHostPinnedMemory(halloc, nullptr, (1 << 10), 256), NVCV_ERROR_INVALID_ARGUMENT);
+    EXPECT_EQ(nvcvAllocatorAllocCudaMemory(halloc, nullptr, (1 << 10), 256), NVCV_ERROR_INVALID_ARGUMENT);
+
+    // 4. allocHostMem
+    void *p0 = nullptr;
+    EXPECT_EQ(nvcvAllocatorAllocHostMemory(halloc, &p0, -1, 256), NVCV_ERROR_INVALID_ARGUMENT);
+    EXPECT_EQ(nvcvAllocatorAllocHostMemory(halloc, &p0, (1 << 10), 3), NVCV_ERROR_INVALID_ARGUMENT);
+    EXPECT_EQ(nvcvAllocatorAllocHostMemory(halloc, &p0, 128, 256), NVCV_ERROR_INVALID_ARGUMENT);
+
+    // 5. allocHostPinnedMem
+    EXPECT_EQ(nvcvAllocatorAllocHostPinnedMemory(halloc, &p0, -1, 256), NVCV_ERROR_INVALID_ARGUMENT);
+    EXPECT_EQ(nvcvAllocatorAllocHostPinnedMemory(halloc, &p0, (1 << 10), 3), NVCV_ERROR_INVALID_ARGUMENT);
+    EXPECT_EQ(nvcvAllocatorAllocHostPinnedMemory(halloc, &p0, 128, 256), NVCV_ERROR_INVALID_ARGUMENT);
+
+    // 6. allocHostPinnedMem
+    EXPECT_EQ(nvcvAllocatorAllocCudaMemory(halloc, &p0, -1, 256), NVCV_ERROR_INVALID_ARGUMENT);
+    EXPECT_EQ(nvcvAllocatorAllocCudaMemory(halloc, &p0, (1 << 10), 3), NVCV_ERROR_INVALID_ARGUMENT);
+    EXPECT_EQ(nvcvAllocatorAllocCudaMemory(halloc, &p0, 128, 256), NVCV_ERROR_INVALID_ARGUMENT);
+
+    int newRef = 1;
+    EXPECT_EQ(nvcvAllocatorDecRef(halloc, &newRef), NVCV_SUCCESS);
+    EXPECT_EQ(newRef, 0);
+}
+
+TEST(AllocatorTest, customAllocator_constructor_negative)
+{
+    NVCVResourceAllocator invalidFnAllocAllocator[1]         = {};
+    NVCVResourceAllocator invalidFnFreeAllocator[1]          = {};
+    NVCVResourceAllocator duplicatedResourceTypeAllocator[2] = {};
+
+    // 1. allocation function must not be NULL
+    invalidFnAllocAllocator[0].resType        = NVCV_RESOURCE_MEM_HOST;
+    invalidFnAllocAllocator[0].res.mem.fnFree = [](void *ctx, void *ptr, int64_t size, int32_t align)
+    {
+        free(ptr);
+    };
+    invalidFnAllocAllocator[0].cleanup = [](void *ctx, NVCVResourceAllocator *alloc) {
+    };
+
+    NVCVAllocatorHandle halloc = nullptr;
+
+    EXPECT_EQ(nvcvAllocatorConstructCustom(invalidFnAllocAllocator, 1, &halloc), NVCV_ERROR_INVALID_ARGUMENT);
+
+    // 2. deallocation function must not be NULL
+    invalidFnFreeAllocator[0].resType         = NVCV_RESOURCE_MEM_CUDA;
+    invalidFnFreeAllocator[0].res.mem.fnAlloc = [](void *ctx, int64_t size, int32_t align)
+    {
+        void *mem;
+        EXPECT_EQ(cudaMalloc(&mem, size), cudaSuccess);
+        return mem;
+    };
+    invalidFnFreeAllocator[0].cleanup = [](void *ctx, NVCVResourceAllocator *alloc) {
+    };
+
+    EXPECT_EQ(nvcvAllocatorConstructCustom(invalidFnFreeAllocator, 1, &halloc), NVCV_ERROR_INVALID_ARGUMENT);
+
+    // 3. duplicated resource type
+    duplicatedResourceTypeAllocator[0].resType         = NVCV_RESOURCE_MEM_HOST;
+    duplicatedResourceTypeAllocator[0].res.mem.fnAlloc = [](void *ctx, int64_t size, int32_t align)
+    {
+        return memalign(align, size);
+    };
+    duplicatedResourceTypeAllocator[0].res.mem.fnFree = [](void *ctx, void *ptr, int64_t size, int32_t align)
+    {
+        free(ptr);
+    };
+    duplicatedResourceTypeAllocator[0].cleanup = [](void *ctx, NVCVResourceAllocator *alloc) {
+    };
+
+    duplicatedResourceTypeAllocator[1].resType         = NVCV_RESOURCE_MEM_HOST;
+    duplicatedResourceTypeAllocator[1].res.mem.fnAlloc = [](void *ctx, int64_t size, int32_t align)
+    {
+        return memalign(align, size);
+    };
+    duplicatedResourceTypeAllocator[1].res.mem.fnFree = [](void *ctx, void *ptr, int64_t size, int32_t align)
+    {
+        free(ptr);
+    };
+    duplicatedResourceTypeAllocator[1].cleanup = [](void *ctx, NVCVResourceAllocator *alloc) {
+    };
+
+    EXPECT_EQ(nvcvAllocatorConstructCustom(duplicatedResourceTypeAllocator, 2, &halloc), NVCV_ERROR_INVALID_ARGUMENT);
+}
+
+TEST(AllocatorTest, get_name)
+{
+    EXPECT_STREQ("NVCV_RESOURCE_MEM_CUDA", nvcvResourceTypeGetName(NVCV_RESOURCE_MEM_CUDA));
+    EXPECT_STREQ("NVCV_RESOURCE_MEM_HOST", nvcvResourceTypeGetName(NVCV_RESOURCE_MEM_HOST));
+    EXPECT_STREQ("NVCV_RESOURCE_MEM_HOST_PINNED", nvcvResourceTypeGetName(NVCV_RESOURCE_MEM_HOST_PINNED));
+    EXPECT_STREQ("Unexpected error retrieving NVCVResourceType string representation",
+                 nvcvResourceTypeGetName(static_cast<NVCVResourceType>(255)));
+}
diff --git a/tests/nvcv_types/system/TestArray.cpp b/tests/nvcv_types/system/TestArray.cpp
index d0f9f5297..cbc67ab5d 100644
--- a/tests/nvcv_types/system/TestArray.cpp
+++ b/tests/nvcv_types/system/TestArray.cpp
@@ -208,3 +208,268 @@ TEST(ArrayTests, smoke_create_allocator)
 
     EXPECT_EQ(32, data->stride());
 }
+
+TEST(ArrayTests, invalid_outputs_calcReq)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, nullptr));
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvArrayCalcRequirementsWithTarget(16, NVCV_DATA_TYPE_U8, 0, NVCV_RESOURCE_MEM_HOST, nullptr));
+}
+
+TEST(ArrayTests, invalid_alignment_calcReq_with_target)
+{
+    NVCVArrayRequirements req;
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvArrayCalcRequirementsWithTarget(16, NVCV_DATA_TYPE_U8, 7, NVCV_RESOURCE_MEM_HOST, &req));
+}
+
+TEST(ArrayTests, invalid_input_construct)
+{
+    NVCVArrayRequirements req;
+    NVCVArrayHandle       arrayHandle;
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayConstruct(nullptr, nullptr, &arrayHandle));
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayConstruct(&req, nullptr, nullptr));
+}
+
+TEST(ArrayTests, valid_construct)
+{
+    NVCVArrayRequirements req;
+    NVCVArrayHandle       arrayHandle;
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, valid_construct_with_target)
+{
+    NVCVArrayRequirements req;
+    NVCVArrayHandle       arrayHandle;
+    EXPECT_EQ(NVCV_SUCCESS,
+              nvcvArrayCalcRequirementsWithTarget(16, NVCV_DATA_TYPE_U8, 0, NVCV_RESOURCE_MEM_HOST_PINNED, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstructWithTarget(&req, nullptr, NVCV_RESOURCE_MEM_HOST_PINNED, &arrayHandle));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, mismatch_construct_with_target)
+{
+    NVCVArrayRequirements req;
+    NVCVArrayHandle       arrayHandle;
+    int64_t               capacity = -1, length = -1;
+    NVCVResourceType      target;
+    NVCVDataType          dType;
+    EXPECT_EQ(NVCV_SUCCESS,
+              nvcvArrayCalcRequirementsWithTarget(16, NVCV_DATA_TYPE_U8, 0, NVCV_RESOURCE_MEM_CUDA, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstructWithTarget(&req, nullptr, NVCV_RESOURCE_MEM_HOST, &arrayHandle));
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayGetCapacity(arrayHandle, &capacity));
+    EXPECT_EQ(16, capacity);
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayGetTarget(arrayHandle, &target));
+    EXPECT_EQ(NVCV_RESOURCE_MEM_HOST, target);
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayGetLength(arrayHandle, &length));
+    EXPECT_EQ(0, length);
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayGetDataType(arrayHandle, &dType));
+    EXPECT_EQ(NVCV_DATA_TYPE_U8, dType);
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, invalid_req_construct_with_target)
+{
+    NVCVArrayHandle arrayHandle;
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvArrayConstructWithTarget(nullptr, nullptr, NVCV_RESOURCE_MEM_HOST, &arrayHandle));
+}
+
+TEST(ArrayTests, invalid_handle_construct_with_target)
+{
+    NVCVArrayRequirements req;
+    EXPECT_EQ(NVCV_SUCCESS,
+              nvcvArrayCalcRequirementsWithTarget(16, NVCV_DATA_TYPE_U8, 0, NVCV_RESOURCE_MEM_HOST, &req));
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvArrayConstructWithTarget(&req, nullptr, NVCV_RESOURCE_MEM_HOST, nullptr));
+}
+
+TEST(ArrayTests, invalid_data_wrap_data_construct)
+{
+    NVCVArrayHandle arrayHandle;
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayWrapDataConstruct(nullptr, nullptr, nullptr, &arrayHandle));
+}
+
+TEST(ArrayTests, invalid_handle_wrap_data_construct)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+    NVCVArrayData         arrayData;
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayExportData(arrayHandle, &arrayData));
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayWrapDataConstruct(&arrayData, nullptr, nullptr, nullptr));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+void arrayDataCleanUpFunc(void *ctx, const NVCVArrayData *data);
+
+void arrayDataCleanUpFunc(void *ctx, const NVCVArrayData *data) {}
+
+TEST(ArrayTests, valid_handle_wrap_data_construct)
+{
+    NVCVArrayHandle       arrayHandle, arrayHandle2;
+    NVCVArrayRequirements req;
+    NVCVArrayData         arrayData;
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayExportData(arrayHandle, &arrayData));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayWrapDataConstruct(&arrayData, &arrayDataCleanUpFunc, nullptr, &arrayHandle2));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle2, nullptr));
+}
+
+TEST(ArrayTests, null_basePtr_wrap_data_construct)
+{
+    NVCVArrayHandle arrayHandle2;
+    NVCVArrayData   arrayData;
+    arrayData.buffer.strided.basePtr = nullptr;
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayWrapDataConstruct(&arrayData, nullptr, nullptr, &arrayHandle2));
+}
+
+TEST(ArrayTests, valid_array_inc_ref)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+    int                   refCount = -1;
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayRefCount(arrayHandle, &refCount));
+    EXPECT_EQ(refCount, 1);
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayIncRef(arrayHandle, &refCount));
+    EXPECT_EQ(refCount, 2);
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, &refCount));
+    EXPECT_EQ(refCount, 1);
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, smoke_user_pointer)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+    void                 *userPtr;
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayGetUserPointer(arrayHandle, &userPtr));
+    EXPECT_EQ(nullptr, userPtr);
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArraySetUserPointer(arrayHandle, reinterpret_cast<void *>(0x123ULL)));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayGetUserPointer(arrayHandle, &userPtr));
+    EXPECT_EQ(reinterpret_cast<void *>(0x123ULL), userPtr);
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArraySetUserPointer(arrayHandle, nullptr));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayGetUserPointer(arrayHandle, &userPtr));
+    EXPECT_EQ(nullptr, userPtr);
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, invalid_out_get_user_pointer)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayGetUserPointer(arrayHandle, nullptr));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, invalid_out_get_data_type)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayGetDataType(arrayHandle, nullptr));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, valid_get_allocator)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+    NVCVAllocatorHandle   alloc;
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayGetAllocator(arrayHandle, &alloc));
+    EXPECT_EQ(alloc, nullptr);
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, invalid_out_get_allocator)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayGetAllocator(arrayHandle, nullptr));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, invalid_out_export_data)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayExportData(arrayHandle, nullptr));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, invalid_out_get_length)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayGetLength(arrayHandle, nullptr));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, invalid_out_get_capacity)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayGetCapacity(arrayHandle, nullptr));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
+
+TEST(ArrayTests, invalid_out_get_target)
+{
+    NVCVArrayHandle       arrayHandle;
+    NVCVArrayRequirements req;
+
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayCalcRequirements(16, NVCV_DATA_TYPE_U8, 0, &req));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayConstruct(&req, nullptr, &arrayHandle));
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvArrayGetTarget(arrayHandle, nullptr));
+    EXPECT_EQ(NVCV_SUCCESS, nvcvArrayDecRef(arrayHandle, nullptr));
+}
diff --git a/tests/nvcv_types/system/TestConfig.cpp b/tests/nvcv_types/system/TestConfig.cpp
index 03b112aa3..ef028c5ca 100644
--- a/tests/nvcv_types/system/TestConfig.cpp
+++ b/tests/nvcv_types/system/TestConfig.cpp
@@ -18,6 +18,7 @@
 #include "Definitions.hpp"
 
 #include <common/TypedTests.hpp>
+#include <nvcv/Array.hpp>
 #include <nvcv/Config.hpp>
 #include <nvcv/Image.hpp>
 #include <nvcv/ImageBatch.hpp>
@@ -46,6 +47,10 @@ T CreateObj()
     {
         return nvcv::Tensor(nvcv::TensorShape({32, 12, 4}, nvcv::TENSOR_NONE), nvcv::TYPE_U8);
     }
+    else if constexpr (std::is_same_v<nvcv::Array, T>)
+    {
+        return nvcv::Array(1, nvcv::TYPE_U8, 0, NVCV_RESOURCE_MEM_HOST);
+    }
     else
     {
         static_assert(sizeof(T) != 0 && "Invalid core object type");
@@ -71,13 +76,17 @@ void SetMaxCount(int32_t maxCount)
     {
         nvcv::cfg::SetMaxTensorCount(maxCount);
     }
+    else if constexpr (std::is_same_v<nvcv::Array, T>)
+    {
+        nvcv::cfg::SetMaxArrayCount(maxCount);
+    }
     else
     {
         static_assert(sizeof(T) != 0 && "Invalid core object type");
     }
 }
 
-using AllCoreTypes = ttest::Types<nvcv::Image, nvcv::ImageBatch, nvcv::Tensor, nvcv::Allocator>;
+using AllCoreTypes = ttest::Types<nvcv::Image, nvcv::ImageBatch, nvcv::Tensor, nvcv::Allocator, nvcv::Array>;
 
 template<class T>
 class ConfigTests : public ::testing::Test
diff --git a/tests/nvcv_types/system/TestImageFormat.cpp b/tests/nvcv_types/system/TestImageFormat.cpp
index 5cd8da66f..72dab22da 100644
--- a/tests/nvcv_types/system/TestImageFormat.cpp
+++ b/tests/nvcv_types/system/TestImageFormat.cpp
@@ -426,6 +426,12 @@ TEST(ImageFormatTests, set_extra_channel_info_image_format_none)
     ASSERT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvImageFormatSetExtraChannelInfo(&fmt, &exChannelInfo));
 }
 
+TEST(ImageFormatTests, set_extra_channel_info_null_input_ptr)
+{
+    NVCVExtraChannelInfo exChannelInfo = {2, 8, NVCV_DATA_KIND_UNSIGNED, NVCV_EXTRA_CHANNEL_POS3D};
+    ASSERT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvImageFormatSetExtraChannelInfo(nullptr, &exChannelInfo));
+}
+
 TEST(ImageFormatTests, set_extra_channel_info_max_min_bounds)
 {
     NVCVExtraChannelInfo exChannelInfo = {8, 8, NVCV_DATA_KIND_UNSIGNED, NVCV_EXTRA_CHANNEL_POS3D};
@@ -579,6 +585,8 @@ TEST(ImageFormatTests, check_alpha_type)
     fmt = NVCV_IMAGE_FORMAT_RGBA8;
     ASSERT_EQ(NVCV_SUCCESS, nvcvImageFormatGetAlphaType(fmt, &alphaType));
     EXPECT_EQ(NVCV_ALPHA_ASSOCIATED, alphaType);
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvImageFormatGetAlphaType(fmt, nullptr));
 }
 
 TEST_P(ImageFormatTests, check_extra_channel_info)
diff --git a/tests/nvcv_types/system/TestTensor.cpp b/tests/nvcv_types/system/TestTensor.cpp
index 368e47692..2f45bcccb 100644
--- a/tests/nvcv_types/system/TestTensor.cpp
+++ b/tests/nvcv_types/system/TestTensor.cpp
@@ -458,3 +458,203 @@ TEST_P(TensorWrapParamTests, smoke_create)
         }
     }
 }
+
+class TensorTests_Negative : public ::testing::Test
+{
+public:
+    TensorTests_Negative() {}
+
+    ~TensorTests_Negative() {}
+
+    void SetUp() override
+    {
+        ASSERT_EQ(NVCV_SUCCESS, nvcvTensorCalcRequirementsForImages(1, 224, 224, NVCV_IMAGE_FORMAT_RGBA8, 0, 0, &reqs));
+        ASSERT_EQ(NVCV_SUCCESS, nvcvTensorConstruct(&reqs, nullptr, &handle));
+    }
+
+    void TearDown() override
+    {
+        nvcv::Tensor tensor(std::move(handle));
+    }
+
+    NVCVTensorHandle       handle;
+    NVCVTensorRequirements reqs;
+};
+
+TEST_F(TensorTests_Negative, invalid_parameter_TensorCalcRequirementsForImages)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorCalcRequirementsForImages(-1, 224, 224, NVCV_IMAGE_FORMAT_RGBA8, 0,
+                                                                               0, &reqs)); // invalid numImages
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorCalcRequirementsForImages(5, -1, 224, NVCV_IMAGE_FORMAT_RGBA8, 0, 0, &reqs)); // invalid width
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorCalcRequirementsForImages(5, 224, -1, NVCV_IMAGE_FORMAT_RGBA8, 0, 0, &reqs)); // invalid height
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorCalcRequirementsForImages(5, 224, 224, NVCV_IMAGE_FORMAT_NONE, 0, 0, &reqs)); // invalid format
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorCalcRequirementsForImages(5, 224, 224, NVCV_IMAGE_FORMAT_RGBA8, 3,
+                                                                               0, &reqs)); // invalid baseAddrAlignment
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorCalcRequirementsForImages(5, 224, 224, NVCV_IMAGE_FORMAT_RGBA8, 0,
+                                                                               3, &reqs)); // invalid rowAddrAlignment
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorCalcRequirementsForImages(5, 224, 224, NVCV_IMAGE_FORMAT_RGBA8, 0, 0, nullptr)); // null reqs
+    EXPECT_EQ(NVCV_ERROR_NOT_IMPLEMENTED,
+              nvcvTensorCalcRequirementsForImages(
+                  5, 224, 224, NVCV_DETAIL_MAKE_COLOR_FMT1(RGB, UNDEFINED, BL, UNSIGNED, XYZW, ASSOCIATED, X8_Y8_Z8_W8),
+                  0, 0, &reqs)); // BL layout
+    EXPECT_EQ(NVCV_ERROR_NOT_IMPLEMENTED,
+              nvcvTensorCalcRequirementsForImages(5, 224, 224, NVCV_IMAGE_FORMAT_UYVY, 0, 0,
+                                                  &reqs)); // Not implemented subsampled planes (422)
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorCalcRequirementsForImages(5, 224, 224, NVCV_IMAGE_FORMAT_NV24, 0,
+                                                                               0, &reqs)); // semi-planar image format
+    EXPECT_EQ(
+        NVCV_ERROR_INVALID_ARGUMENT,
+        nvcvTensorCalcRequirementsForImages(
+            5, 224, 224, NVCV_DETAIL_MAKE_COLOR_FMT4(RGB, UNDEFINED, PL, UNSIGNED, XYZW, ASSOCIATED, X8, X8, X8, X32),
+            0, 0, &reqs)); // planes of image format don't have the same packing
+}
+
+TEST_F(TensorTests_Negative, invalid_parameter_TensorCalcRequirements)
+{
+    int64_t valid_wh[] = {224, 224};
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorCalcRequirements(2, valid_wh, NVCV_DATA_TYPE_NONE, NVCV_TENSOR_LAYOUT_MAKE("HW"), 0, 0,
+                                         &reqs)); // invalid dtype
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorCalcRequirements(3, valid_wh, NVCV_DATA_TYPE_U8, NVCV_TENSOR_LAYOUT_MAKE("HW"), 0, 0,
+                                         &reqs)); // mismatch rank
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorCalcRequirements(-1, valid_wh, NVCV_DATA_TYPE_U8, NVCV_TENSOR_LAYOUT_MAKE("HW"), 0, 0,
+                                         &reqs)); // invalid rank
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorCalcRequirements(2, valid_wh, NVCV_DATA_TYPE_U8, NVCV_TENSOR_NONE,
+                                                                      3, 0, &reqs)); // invalid baseAddrAlignment
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorCalcRequirements(2, valid_wh, NVCV_DATA_TYPE_U8, NVCV_TENSOR_NONE,
+                                                                      0, 3, &reqs)); // invalid rowAddrAlignment
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorCalcRequirements(2, valid_wh, NVCV_DATA_TYPE_U8, NVCV_TENSOR_NONE, 0, 0, nullptr)); // null reqs
+}
+
+TEST_F(TensorTests_Negative, invalid_parameter_TensorConstruct)
+{
+    ASSERT_EQ(NVCV_SUCCESS, nvcvTensorCalcRequirementsForImages(1, 224, 224, NVCV_IMAGE_FORMAT_RGBA8, 0, 0, &reqs));
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorConstruct(nullptr, nullptr, &handle)); // null reqs
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorConstruct(&reqs, nullptr, nullptr));   // null handle
+}
+
+TEST_F(TensorTests_Negative, invalid_parameter_TensorWrapDataConstruct)
+{
+    NVCVTensorData           tensorData;
+    NVCVTensorBufferStrided &tensorStrided = tensorData.buffer.strided;
+    tensorData.bufferType                  = NVCV_TENSOR_BUFFER_STRIDED_CUDA;
+    tensorData.layout                      = NVCV_TENSOR_NHWC;
+    tensorData.rank                        = 4;
+    tensorData.shape[0]                    = 1;
+    tensorData.shape[1]                    = 224;
+    tensorData.shape[2]                    = 224;
+    tensorData.shape[3]                    = 3;
+    tensorData.dtype                       = NVCV_DATA_TYPE_F32;
+    tensorStrided.strides[3]               = nvcv::FMT_RGBf32.planePixelStrideBytes(0) / nvcv::FMT_RGBf32.numChannels();
+    tensorStrided.strides[2]               = nvcv::FMT_RGBf32.planePixelStrideBytes(0);
+    tensorStrided.strides[1]               = 224 * nvcv::FMT_RGBf32.planePixelStrideBytes(0);
+    tensorStrided.strides[0]               = tensorStrided.strides[1] * tensorData.shape[1];
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorWrapDataConstruct(nullptr, nullptr, nullptr, &handle)); // null tensorData
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorWrapDataConstruct(&tensorData, nullptr, nullptr, nullptr)); // null handle
+}
+
+TEST_F(TensorTests_Negative, invalid_parameter_TensorGetLayout)
+{
+    NVCVTensorLayout layout;
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorGetLayout(nullptr, &layout)); // null handle
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorGetLayout(handle, nullptr));  // null layout
+}
+
+TEST_F(TensorTests_Negative, invalid_parameter_TensorExportData)
+{
+    NVCVTensorData data;
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorExportData(nullptr, &data));  // null handle
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorExportData(handle, nullptr)); // null data
+}
+
+TEST_F(TensorTests_Negative, invalid_parameter_TensorGetShape)
+{
+    int32_t rank                        = NVCV_TENSOR_MAX_RANK;
+    int64_t shape[NVCV_TENSOR_MAX_RANK] = {0};
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorGetShape(nullptr, &rank, shape));  // null handle
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorGetShape(handle, nullptr, shape)); // null rank
+}
+
+TEST_F(TensorTests_Negative, invalid_parameter_TensorGetUserPointer)
+{
+    void *userPtr;
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorGetUserPointer(nullptr, &userPtr)); // null handle
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorGetUserPointer(handle, nullptr));   // null rank
+}
+
+TEST_F(TensorTests_Negative, invalid_parameter_TensorReshape)
+{
+    int64_t          new_shape[] = {1, 224, 112, 2};
+    NVCVTensorHandle outHandle;
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorReshape(nullptr, 4, new_shape, NVCV_TENSOR_NHWC, &outHandle)); // null handle
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorReshape(handle, 0, new_shape, NVCV_TENSOR_NHWC, &outHandle)); // invalid rank
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorReshape(handle, NVCV_TENSOR_MAX_RANK + 1, new_shape,
+                                                             NVCV_TENSOR_NHWC, &outHandle)); // invalid rank 2
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorReshape(handle, 4, new_shape, NVCV_TENSOR_HW, &outHandle)); // mismatch layout
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorReshape(handle, 4, new_shape, NVCV_TENSOR_NHWC, nullptr)); // null out handle
+}
+
+TEST_F(TensorTests_Negative, invalid_parameter_TensorShapePermute)
+{
+    NVCVTensorLayout     srcLayout = NVCV_TENSOR_NHWC;
+    std::vector<int64_t> srcShape{16, 61, 23, 3};
+    NVCVTensorLayout     dstLayout = NVCV_TENSOR_NCHW;
+    std::vector<int64_t> outShape(srcShape.size());
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorShapePermute(srcLayout, nullptr, dstLayout, outShape.data())); // null srcShape
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorShapePermute(srcLayout, srcShape.data(), dstLayout, nullptr)); // null outShape
+}
+
+class TensorPermuteTests
+    : public t::TestWithParam<
+          std::tuple<test::Param<"srcLayout", NVCVTensorLayout>, test::Param<"srcShape", std::vector<int64_t>>,
+                     test::Param<"dstLayout", NVCVTensorLayout>, test::Param<"goldShape", std::vector<int64_t>>>>
+{
+};
+
+// clang-format off
+NVCV_INSTANTIATE_TEST_SUITE_P(_, TensorPermuteTests,
+    test::ValueList<NVCVTensorLayout, std::vector<int64_t>, NVCVTensorLayout, std::vector<int64_t>>
+    {
+        {NVCV_TENSOR_NHWC, {16, 61, 23, 3}, NVCV_TENSOR_NCHW, {16, 3, 61, 23}},
+        {NVCV_TENSOR_CHW, {3, 61, 23}, NVCV_TENSOR_HWC, {61, 23, 3}},
+        {NVCV_TENSOR_CFDHW, {3, 2, 6, 61, 23}, NVCV_TENSOR_FDHWC, {2, 6, 61, 23, 3}},
+        {NVCV_TENSOR_CHW, {3, 61, 23}, NVCV_TENSOR_HW, {61, 23}},
+        {NVCV_TENSOR_HWC, {61, 23, 3}, NVCV_TENSOR_HW, {61, 23}}
+    }
+);
+
+// clang-format on
+
+TEST_P(TensorPermuteTests, smoke)
+{
+    NVCVTensorLayout           srcLayout = std::get<0>(GetParam());
+    std::vector<int64_t>       srcShape  = std::get<1>(GetParam());
+    NVCVTensorLayout           dstLayout = std::get<2>(GetParam());
+    const std::vector<int64_t> goldShape = std::get<3>(GetParam());
+
+    std::vector<int64_t> outShape(goldShape.size());
+    ASSERT_EQ(NVCV_SUCCESS, nvcvTensorShapePermute(srcLayout, srcShape.data(), dstLayout, outShape.data()));
+    EXPECT_EQ(outShape, goldShape);
+}
diff --git a/tests/nvcv_types/system/TestTensorLayout.cpp b/tests/nvcv_types/system/TestTensorLayout.cpp
index 4d55597b1..d0d349fda 100644
--- a/tests/nvcv_types/system/TestTensorLayout.cpp
+++ b/tests/nvcv_types/system/TestTensorLayout.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -419,3 +419,49 @@ TEST_P(TensorLayoutOStreamExecTests, works)
 
     EXPECT_STREQ(gold, ss.str().c_str());
 }
+
+TEST(TensorLayoutTests_Negative, invalid_parameter_TensorLayoutMake)
+{
+    NVCVTensorLayout outLayout;
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorLayoutMake("HW", nullptr));
+
+    std::string exceededDescr(NVCV_TENSOR_MAX_RANK + 1, 'Z');
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT, nvcvTensorLayoutMake(exceededDescr.c_str(), &outLayout)); // exceed range
+}
+
+TEST(TensorLayoutTests_Negative, invalid_parameter_TensorLayoutMakeRange)
+{
+    NVCVTensorLayout outLayout;
+
+    std::string validDescr(NVCV_TENSOR_MAX_RANK - 1, 'Z');
+    std::string exceededDescr(NVCV_TENSOR_MAX_RANK + 1, 'Z');
+
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorLayoutMakeRange(exceededDescr.c_str(), exceededDescr.c_str() + validDescr.size(),
+                                        nullptr)); // null output
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorLayoutMakeRange(nullptr, exceededDescr.c_str() + validDescr.size(), &outLayout)); // null begin
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorLayoutMakeRange(exceededDescr.c_str(), nullptr, &outLayout)); // null end
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorLayoutMakeRange(exceededDescr.c_str(), exceededDescr.c_str() + exceededDescr.size(),
+                                        &outLayout)); // exceed range
+}
+
+TEST(TensorLayoutTests_Negative, invalid_parameter_TensorLayoutMakeFirst)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorLayoutMakeFirst(NVCV_TENSOR_LAYOUT_MAKE("ABCD"), 2, nullptr)); // null output
+}
+
+TEST(TensorLayoutTests_Negative, invalid_parameter_TensorLayoutMakeLast)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorLayoutMakeLast(NVCV_TENSOR_LAYOUT_MAKE("ABCD"), 2, nullptr)); // null output
+}
+
+TEST(TensorLayoutTests_Negative, invalid_parameter_TensorLayoutMakeSubRange)
+{
+    EXPECT_EQ(NVCV_ERROR_INVALID_ARGUMENT,
+              nvcvTensorLayoutMakeSubRange(NVCV_TENSOR_LAYOUT_MAKE("ABCD"), 0, 2, nullptr)); // null output
+}
diff --git a/tests/nvcv_types/unit/TestMath.cpp b/tests/nvcv_types/unit/TestMath.cpp
index 67bc79e48..c2bf64b62 100644
--- a/tests/nvcv_types/unit/TestMath.cpp
+++ b/tests/nvcv_types/unit/TestMath.cpp
@@ -290,3 +290,27 @@ TEST_P(MathDivUpPowerOfTwoTests, works)
 
     EXPECT_EQ(gold, util::DivUpPowerOfTwo(num, den));
 }
+
+class MathSincTests : public t::TestWithParam<std::tuple<test::Param<"value", float>, test::Param<"gold", float>>>
+{
+};
+
+// clang-format off
+NVCV_INSTANTIATE_TEST_SUITE_P(_, MathSincTests,
+    test::ValueList<float, float>
+    {
+        {0.f, 1.f},
+        {1.f, 0.f},
+        {0.5f, 2.f / static_cast<float>(M_PI)},
+        {-0.5f, 2.f / static_cast<float>(M_PI)},
+    });
+
+// clang-format on
+
+TEST_P(MathSincTests, works)
+{
+    const float value = std::get<0>(GetParam());
+    const float gold  = std::get<1>(GetParam());
+
+    EXPECT_NEAR(gold, util::sinc(value), 1e-7f);
+}
diff --git a/tools/mkop/PythonWrap.cpp b/tools/mkop/PythonWrap.cpp
index c2a667053..25c67a77d 100644
--- a/tools/mkop/PythonWrap.cpp
+++ b/tools/mkop/PythonWrap.cpp
@@ -36,9 +36,11 @@ Tensor __OPNAME__Into(Tensor &output, Tensor &input, std::optional<Stream> pstre
     auto op = CreateOperator<cvcuda::__OPNAME__>();
 
     ResourceGuard guard(*pstream);
-    guard.add(LockMode::LOCK_READ, {input});
-    guard.add(LockMode::LOCK_WRITE, {output});
-    guard.add(LockMode::LOCK_NONE, {*op});
+    guard.add(LockMode::LOCK_MODE_READ, {input});
+    guard.add(LockMode::LOCK_MODE_WRITE, {output});
+    // TODO if op kernel allocates resources that are accessed by the device change to READWRITE
+    // is set to none it is possible for the operator to be destroyed before the kernel is executed.
+    guard.add(LockMode::LOCK_MODE_NONE, {*op});
 
     op->submit(pstream->cudaHandle(), input, output);