From 2a737339b4cc39ad79a28defd5183751be512811 Mon Sep 17 00:00:00 2001 From: Miles Price <119885516+milesp-nvidia@users.noreply.github.com> Date: Thu, 5 Dec 2024 10:04:59 -0800 Subject: [PATCH] feat: adding release v0.13.0 of CV-CUDA (#219) --- CMakeLists.txt | 2 +- README.md | 11 +- cmake/BuildPython.cmake | 46 +- cmake/ConfigCompiler.cmake | 13 +- cmake/ConfigPython.cmake | 27 + .../Dockerfile.build.manylinux2014.deps | 117 ++++ docker/manylinux/Dockerfile.builder.deps | 61 ++ docker/manylinux/Dockerfile.cuda.centos7.deps | 19 + .../Dockerfile.cuda.ubuntu20.04.deps | 46 ++ .../Dockerfile.cuda.ubuntu22.04.deps | 32 ++ .../Dockerfile.gcc.manylinux2014.deps | 56 ++ docker/manylinux/Dockerfile.runner.deps | 34 ++ docker/manylinux/ccache.conf | 17 + docker/manylinux/config_external.sh | 43 ++ docker/manylinux/docker_buildx.sh | 160 ++++++ docs/sphinx/index.rst | 1 + docs/sphinx/relnotes/v0.13.0-beta.rst | 65 +++ python/CMakeLists.txt | 15 - python/__init__.py.in | 31 + python/_load_binding.py.in | 65 +++ python/build_wheels.sh | 164 ++++-- python/setup.py.in | 128 +++-- src/cvcuda/priv/CMakeLists.txt | 6 +- src/nvcv/CMakeLists.txt | 2 +- tests/CMakeLists.txt | 6 + tests/cvcuda/stressTest/cvcuda_cache_repro.py | 200 +++++++ .../stressTest/stress_test_inference.py | 542 ++++++++++++++++++ .../cvcuda/stressTest/stress_test_mt_prep.py | 267 +++++++++ .../stressTest/stress_test_preprocess.py | 197 +++++++ tests/test_wheels.sh.in | 124 ++++ 30 files changed, 2350 insertions(+), 147 deletions(-) create mode 100644 docker/manylinux/Dockerfile.build.manylinux2014.deps create mode 100644 docker/manylinux/Dockerfile.builder.deps create mode 100644 docker/manylinux/Dockerfile.cuda.centos7.deps create mode 100644 docker/manylinux/Dockerfile.cuda.ubuntu20.04.deps create mode 100644 docker/manylinux/Dockerfile.cuda.ubuntu22.04.deps create mode 100644 docker/manylinux/Dockerfile.gcc.manylinux2014.deps create mode 100644 docker/manylinux/Dockerfile.runner.deps create mode 100644 docker/manylinux/ccache.conf create mode 100755 docker/manylinux/config_external.sh create mode 100755 docker/manylinux/docker_buildx.sh create mode 100644 docs/sphinx/relnotes/v0.13.0-beta.rst create mode 100644 python/__init__.py.in create mode 100644 python/_load_binding.py.in create mode 100644 tests/cvcuda/stressTest/cvcuda_cache_repro.py create mode 100644 tests/cvcuda/stressTest/stress_test_inference.py create mode 100644 tests/cvcuda/stressTest/stress_test_mt_prep.py create mode 100644 tests/cvcuda/stressTest/stress_test_preprocess.py create mode 100755 tests/test_wheels.sh.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c2f48cd9..be6123aca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ endif() project(cvcuda LANGUAGES C CXX - VERSION 0.12.0 + VERSION 0.13.0 DESCRIPTION "CUDA-accelerated Computer Vision algorithms" ) diff --git a/README.md b/README.md index ddb937410..bc91416f9 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ [![License](https://img.shields.io/badge/License-Apache_2.0-yellogreen.svg)](https://opensource.org/licenses/Apache-2.0) -![Version](https://img.shields.io/badge/Version-v0.12.0--beta-blue) +![Version](https://img.shields.io/badge/Version-v0.13.0--beta-blue) ![Platform](https://img.shields.io/badge/Platform-linux--64_%7C_win--64_wsl2%7C_aarch64-gray) @@ -239,12 +239,13 @@ cpack . -G [DEB|TXZ] Python Wheels -By default during the `release` build, Python bindings and wheels are created for the available CUDA version and the specified Python version(s). The wheels are stored in `build-rel/pythonX.Y/wheel` folder, where `build-rel` is the build directory used to build the release build and `X` and `Y` are Python major and minor versions. +By default, during the `release` build, Python bindings and wheels are created for the available CUDA version and the specified Python version(s). The wheels are now output to the `build-rel/python3/repaired_wheels` folder (after being processed by the `auditwheel repair` command in the case of ManyLinux). The single generated python wheel is compatible with all versions of python specified during the cmake build step. Here, `build-rel` is the build directory used to build the release build. -The built wheels can be installed using pip. -For example, to install the Python wheel built for CUDA 12.x, Python 3.10 on Linux x86_64 systems: +The new Python wheels for PyPI compliance must be built within the ManyLinux 2014 Docker environment. The Docker images can be generated using the `docker/manylinux/docker_buildx.sh` script. These images ensure the wheels meet ManyLinux 2014 and PyPI standards. + +The built wheels can still be installed using `pip`. For example, to install the Python wheel built for CUDA 12.x, Python 3.10 and 3.11 on Linux x86_64 systems: ```shell -pip install cvcuda_cu12--cp310-cp310-linux_x86_64.whl +pip install cvcuda_cu12--cp310.cp311-cp310.cp311-linux_x86_64.whl ``` ## Contributing diff --git a/cmake/BuildPython.cmake b/cmake/BuildPython.cmake index cab2e7371..d42414f4c 100644 --- a/cmake/BuildPython.cmake +++ b/cmake/BuildPython.cmake @@ -45,14 +45,14 @@ list(APPEND PYPROJ_COMMON_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} ) -# It need to overwrite the PYTHON_MODULE_EXTENSION to generate +# It needs to overwrite the PYTHON_MODULE_EXTENSION to generate # python module name with correct name when cross compiling # example: set(PYTHON_MODULE_EXTENSION .cpython-py38-aarch64-linux-gnu.so) if (CMAKE_CROSSCOMPILING) -list(APPEND PYPROJ_COMMON_ARGS - -DCUDAToolkit_ROOT=${CUDAToolkit_ROOT} - -DPYTHON_MODULE_EXTENSION=${PYTHON_MODULE_EXTENSION} -) + list(APPEND PYPROJ_COMMON_ARGS + -DCUDAToolkit_ROOT=${CUDAToolkit_ROOT} + -DPYTHON_MODULE_EXTENSION=${PYTHON_MODULE_EXTENSION} + ) endif() foreach(VER ${PYTHON_VERSIONS}) @@ -61,7 +61,7 @@ foreach(VER ${PYTHON_VERSIONS}) ExternalProject_Add(cvcuda_python${VER} PREFIX ${BASEDIR} SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/python - CMAKE_ARGS ${PYPROJ_COMMON_ARGS} -DPYTHON_VERSION=${VER} -DBUILD_ROOT=${CMAKE_BINARY_DIR} -DPYTHON_VERSION_SHORT=${VER} + CMAKE_ARGS ${PYPROJ_COMMON_ARGS} -DPYTHON_VERSION=${VER} BINARY_DIR ${BASEDIR}/build TMP_DIR ${BASEDIR}/tmp STAMP_DIR ${BASEDIR}/stamp @@ -72,7 +72,37 @@ foreach(VER ${PYTHON_VERSIONS}) endforeach() if(CMAKE_BUILD_TYPE STREQUAL "Release") - foreach(PYTHON_VERSION ${PYTHON_VERSIONS}) - configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py.in" "${CMAKE_BINARY_DIR}/python${PYTHON_VERSION}/setup.py") + set(PACKAGE_LIB_DIR ${CMAKE_BINARY_DIR}/python3/lib) + + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python3) + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python3/lib) + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python3/cvcuda) + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python3/cvcuda/_bindings) + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python3/nvcv) + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python3/nvcv/_bindings) + + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py.in" "${CMAKE_BINARY_DIR}/python3/setup.py") + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/__init__.py.in" "${CMAKE_BINARY_DIR}/python3/cvcuda/__init__.py") + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/__init__.py.in" "${CMAKE_BINARY_DIR}/python3/nvcv/__init__.py") + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/_load_binding.py.in" "${CMAKE_BINARY_DIR}/python3/cvcuda/_load_binding.py") + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/_load_binding.py.in" "${CMAKE_BINARY_DIR}/python3/nvcv/_load_binding.py") + + add_custom_target(wheel ALL) + + foreach(VER ${PYTHON_VERSIONS}) + add_dependencies(wheel cvcuda_python${VER}) endforeach() + + add_custom_command( + TARGET wheel + COMMAND ${CMAKE_COMMAND} -E copy $ ${CMAKE_BINARY_DIR}/python3/lib + COMMAND ${CMAKE_COMMAND} -E copy $ ${CMAKE_BINARY_DIR}/python3/lib + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/lib/python/cvcuda*.so ${CMAKE_BINARY_DIR}/python3/cvcuda/_bindings + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/lib/python/nvcv*.so ${CMAKE_BINARY_DIR}/python3/nvcv/_bindings + ) + + add_custom_command( + TARGET wheel + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/python/build_wheels.sh" "${CMAKE_BINARY_DIR}/python3" + ) endif() diff --git a/cmake/ConfigCompiler.cmake b/cmake/ConfigCompiler.cmake index b75165e09..4031d008a 100644 --- a/cmake/ConfigCompiler.cmake +++ b/cmake/ConfigCompiler.cmake @@ -81,10 +81,11 @@ if(BUILD_TESTS) set(candidate_compilers ${PUBLIC_API_COMPILERS}) else() # If not, by default, we'll try these. - set(candidate_compilers gcc-11 gcc-9 clang-11 clang-14) + set(candidate_compilers gcc-11 gcc-10 gcc-9 clang-11 clang-14) endif() unset(valid_compilers) + set(at_least_one_compiler_found OFF) foreach(comp ${candidate_compilers}) string(MAKE_C_IDENTIFIER "${comp}" comp_str) @@ -93,14 +94,20 @@ if(BUILD_TESTS) find_program(COMPILER_EXEC_${COMP_STR} ${comp}) if(COMPILER_EXEC_${COMP_STR}) list(APPEND valid_compilers ${comp}) + set(at_least_one_compiler_found ON) else() if(PUBLIC_API_COMPILERS) message(FATAL_ERROR "Compiler '${comp}' not found") - else() - message(WARNING "Compiler '${comp}' not found, skipping public API checks for it") endif() endif() endforeach() + + if(NOT at_least_one_compiler_found) + foreach(comp ${candidate_compilers}) + message(WARNING "Compiler '${comp}' not found, skipping public API checks for it") + endforeach() + endif() + set(PUBLIC_API_COMPILERS "${valid_compilers}") endif() diff --git a/cmake/ConfigPython.cmake b/cmake/ConfigPython.cmake index 83f65f8b0..85f1cb23c 100644 --- a/cmake/ConfigPython.cmake +++ b/cmake/ConfigPython.cmake @@ -81,6 +81,33 @@ file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/cvcuda_$> /etc/profile.d/python.sh && \ + echo "export PYBIN=${PYBIN}" >> /etc/profile.d/python.sh && \ + echo "export PYLIB=${PYLIB}" >> /etc/profile.d/python.sh && \ + echo "export PATH=\${PYTHONPATH}/bin:/opt/python/*/bin:\${PATH}" >> /etc/profile.d/python.sh && \ + echo "export LD_LIBRARY_PATH=/usr/local/lib:/opt/python/*/lib:\${PYLIB}:\${LD_LIBRARY_PATH}" >> /etc/profile.d/python.sh && \ + echo "export LIBRARY_PATH=/usr/local/lib:/opt/python/*/lib:\${PYLIB}:\${LIBRARY_PATH}" >> /etc/profile.d/python.sh && \ + chmod +x /etc/profile.d/python.sh + +# Install Python packages +RUN python3 -m pip install --no-cache-dir \ + breathe \ + cibuildwheel \ + clang==${CLANG_VERSION} \ + exhale \ + flake8 \ + future \ + graphviz \ + numpy \ + pre-commit \ + recommonmark \ + setuptools \ + sphinx_rtd_theme \ + sphinx==${SPHINX_VERSION} \ + twine \ + wheel + +# Update the dynamic linker run-time bindings +RUN ldconfig + +# extra deps +COPY --from=extra_deps / / diff --git a/docker/manylinux/Dockerfile.builder.deps b/docker/manylinux/Dockerfile.builder.deps new file mode 100644 index 000000000..1afe990cb --- /dev/null +++ b/docker/manylinux/Dockerfile.builder.deps @@ -0,0 +1,61 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG FROM_IMAGE_NAME=quay.io/pypa/manylinux2014_x86_64 +ARG CUDA_IMAGE +ARG BUILDER_CUDA_EXTRA_DEPS=scratch + +FROM ${BUILDER_CUDA_EXTRA_DEPS} AS cuda_extra_deps +FROM ${CUDA_IMAGE} AS cuda + +# Find and copy libcuda.so* to /cuda_libs +RUN mkdir /cuda_libs && \ + find /usr -name 'libcuda.so*' -exec cp {} /cuda_libs/ \; + +FROM ${FROM_IMAGE_NAME} + +ENV PATH=/usr/local/cuda/bin:${PATH} +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:${LD_LIBRARY_PATH} + +ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility,compat32 + +# Propagating the environment variable to profile.d +RUN echo "export NVIDIA_DRIVER_CAPABILITIES=video,compute,utility,compat32" >> /etc/profile.d/nvidia.sh && \ + echo "export PATH=/usr/local/cuda/bin:\${PATH}" >> /etc/profile.d/nvidia.sh && \ + echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:\${LD_LIBRARY_PATH}" >> /etc/profile.d/nvidia.sh && \ + chmod +x /etc/profile.d/nvidia.sh + +# CUDA +COPY --from=cuda /usr/local/cuda /usr/local/cuda + +# Copy libcuda.so* files +COPY --from=cuda /cuda_libs/* /usr/lib64/ + +# Test CUDA compiler +RUN nvcc --version + +# Ensure tmp is writable by all users recursively +RUN chmod -R a+rw /tmp + +RUN git clone https://github.com/google/googletest.git -b release-1.10.0 && \ + pushd googletest && \ + mkdir build && \ + pushd build && \ + cmake .. && \ + make -j$(nproc) && make install && \ + popd && popd && rm -rf googletest + +# Extra deps +COPY --from=cuda_extra_deps / / diff --git a/docker/manylinux/Dockerfile.cuda.centos7.deps b/docker/manylinux/Dockerfile.cuda.centos7.deps new file mode 100644 index 000000000..3cfa7efee --- /dev/null +++ b/docker/manylinux/Dockerfile.cuda.centos7.deps @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG FROM_IMAGE_NAME=nvidia/cuda:11.4.3-devel-centos7 +FROM ${FROM_IMAGE_NAME} AS cuda + +RUN ln -sf /usr/share/zoneinfo/US/Pacific /etc/localtime diff --git a/docker/manylinux/Dockerfile.cuda.ubuntu20.04.deps b/docker/manylinux/Dockerfile.cuda.ubuntu20.04.deps new file mode 100644 index 000000000..413df4177 --- /dev/null +++ b/docker/manylinux/Dockerfile.cuda.ubuntu20.04.deps @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG FROM_IMAGE_NAME=nvidia/cuda:11.4.3-devel-ubuntu20.04 +FROM ${FROM_IMAGE_NAME} AS cuda + +ARG DEBIAN_FRONTEND=noninteractive + +# need to update and install in one go, or else installation might use +# stale data from server stored in docker cache, with packages that don't exist anymore. +RUN apt-get update && \ + apt-get install -y --no-install-recommends git git-lfs software-properties-common wget \ + && add-apt-repository ppa:ubuntu-toolchain-r/test \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + git git-lfs \ + ninja-build \ + ccache \ + libgtest-dev libgmock-dev \ + shellcheck \ + curl + +RUN ln -sf /usr/share/zoneinfo/US/Pacific /etc/localtime + +# Allow using this image in systems without proper CUDA runtime/driver support. +# We'll be using this image only for building, don't need strict CUDA checks. +ENV NVIDIA_DISABLE_REQUIRE=true + +RUN apt-get update \ + && apt-get install -y --no-install-recommends python3 python3-pip python3-pytest python3-dev doxygen \ + && rm -rf /var/lib/apt/lists/* + +# python3 is python3.8 in ubuntu20.04 +RUN python3 -m pip install pre-commit diff --git a/docker/manylinux/Dockerfile.cuda.ubuntu22.04.deps b/docker/manylinux/Dockerfile.cuda.ubuntu22.04.deps new file mode 100644 index 000000000..9d2373784 --- /dev/null +++ b/docker/manylinux/Dockerfile.cuda.ubuntu22.04.deps @@ -0,0 +1,32 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG FROM_IMAGE_NAME=nvidia/cuda:11.4.3-devel-ubuntu22.04 +FROM ${FROM_IMAGE_NAME} AS cuda + +ARG DEBIAN_FRONTEND=noninteractive + +# need to update and install in one go, or else installation might use +# stale data from server stored in docker cache, with packages that don't exist anymore. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + git git-lfs \ + ninja-build \ + ccache \ + libgtest-dev libgmock-dev \ + pre-commit shellcheck \ + curl + +RUN ln -sf /usr/share/zoneinfo/US/Pacific /etc/localtime diff --git a/docker/manylinux/Dockerfile.gcc.manylinux2014.deps b/docker/manylinux/Dockerfile.gcc.manylinux2014.deps new file mode 100644 index 000000000..06c2933b5 --- /dev/null +++ b/docker/manylinux/Dockerfile.gcc.manylinux2014.deps @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG FROM_IMAGE_NAME=quay.io/pypa/manylinux2014_x86_64 +ARG BUILDER_EXTRA_DEPS=scratch + +FROM ${BUILDER_EXTRA_DEPS} AS extra_deps +FROM ${FROM_IMAGE_NAME} + +ARG GCC_VERSION=10 + +ENV GCC_VERSION=${GCC_VERSION} + +# Install EPEL and SCL repositories +RUN yum install -y epel-release && yum repolist + +# Install yum Dependencies +RUN yum install -y \ + wget nasm doxygen graphviz gettext xz openssl-devel openssl-static autogen zip dpkg \ + devtoolset-${GCC_VERSION} \ + devtoolset-${GCC_VERSION}-libasan-devel \ + devtoolset-${GCC_VERSION}-liblsan-devel \ + devtoolset-${GCC_VERSION}-libtsan-devel \ + devtoolset-${GCC_VERSION}-libubsan-devel && \ + yum clean all && rm -rf /var/cache/yum + +# Update PATH and LD_LIBRARY_PATH to use GCC from devtoolset +ENV PATH=/opt/rh/devtoolset-${GCC_VERSION}/root/usr/bin:$PATH +ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${GCC_VERSION}/root/usr/lib64:$LD_LIBRARY_PATH + +# Set the compiler environment variables +ENV CC=gcc +ENV CXX=g++ + +# Propagating the environment variables to profile.d +RUN echo "export CC=gcc" > /etc/profile.d/gcc.sh && \ + echo "export CXX=g++" >> /etc/profile.d/gcc.sh && \ + echo "source /opt/rh/devtoolset-${GCC_VERSION}/enable" >> /etc/profile.d/gcc.sh && \ + echo "alias gcc-${GCC_VERSION}=gcc" >> /etc/profile.d/gcc.sh && \ + echo "alias g++-${GCC_VERSION}=g++" >> /etc/profile.d/gcc.sh && \ + chmod +x /etc/profile.d/gcc.sh + +# Don't want the short-unicode version for Python 2.7 +RUN rm -f /opt/python/cp27-cp27m diff --git a/docker/manylinux/Dockerfile.runner.deps b/docker/manylinux/Dockerfile.runner.deps new file mode 100644 index 000000000..5271dbd43 --- /dev/null +++ b/docker/manylinux/Dockerfile.runner.deps @@ -0,0 +1,34 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG FROM_IMAGE_NAME=quay.io/pypa/manylinux2014_x86_64 +ARG BUILDER_CUDA_EXTRA_DEPS=scratch + +FROM ${BUILDER_CUDA_EXTRA_DEPS} AS cuda_extra_deps +FROM ${FROM_IMAGE_NAME} + +ENV PATH=/usr/local/cuda/bin:${PATH} +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:${LD_LIBRARY_PATH} + +ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility,compat32 + +# Propagating the environment variable to profile.d +RUN echo "export NVIDIA_DRIVER_CAPABILITIES=video,compute,utility,compat32" >> /etc/profile.d/nvidia.sh && \ + echo "export PATH=/usr/local/cuda/bin:\${PATH}" >> /etc/profile.d/nvidia.sh && \ + echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:\${LD_LIBRARY_PATH}" >> /etc/profile.d/nvidia.sh && \ + chmod +x /etc/profile.d/nvidia.sh + +# Extra deps +COPY --from=cuda_extra_deps / / diff --git a/docker/manylinux/ccache.conf b/docker/manylinux/ccache.conf new file mode 100644 index 000000000..1fb25208b --- /dev/null +++ b/docker/manylinux/ccache.conf @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +max_size = 20G +cache_dir = /cache/ccache diff --git a/docker/manylinux/config_external.sh b/docker/manylinux/config_external.sh new file mode 100755 index 000000000..f343a4c9c --- /dev/null +++ b/docker/manylinux/config_external.sh @@ -0,0 +1,43 @@ +#!/bin/bash -ex + +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +export DOCKER_BUILDKIT=${DOCKER_BUILDKIT:-1} + +export PLATFORM=${PLATFORM:-"linux/amd64"} +export ARCH=${ARCH:-"x86_64"} + +export REGISTRY_MANYLINUX_PREFIX=${REGISTRY_MANYLINUX_PREFIX:-"quay.io/pypa/"} +export REGISTRY_CUDA_PREFIX=${REGISTRY_CUDA_PREFIX:-"nvidia/"} +export REGISTRY_HOST_PREFIX=${REGISTRY_HOST_PREFIX:-""} + +export MANYLINUX_VERSION="2014" +export MANYLINUX_BASE_OS="centos7" +export MANYLINUX_IMAGE_TAG="2024.10.26-1" + +export GCC_VERSIONS=( + "10" +) + +export CUDA_VERSIONS=( + "11.7.1" + "12.2.0" +) + +export TEST_OS_VERSIONS=( + "ubuntu20.04" + "ubuntu22.04" +) diff --git a/docker/manylinux/docker_buildx.sh b/docker/manylinux/docker_buildx.sh new file mode 100755 index 000000000..1474d8521 --- /dev/null +++ b/docker/manylinux/docker_buildx.sh @@ -0,0 +1,160 @@ +#!/bin/bash -ex + +# SPDX-License-Identifier: Apache-2.0 + +# Ensure failures are caught when commands are piped +set -o pipefail + +# Set default version if not provided +export VERSION="${VERSION:-1}" + +# Get the directory of the script +SCRIPT_DIR="$(readlink -f $(dirname "$0"))" + +# Move to the script directory +pushd "${SCRIPT_DIR}" >/dev/null + +# Source configuration files +if ! source "${SCRIPT_DIR}/config_internal.sh"; then + source "${SCRIPT_DIR}/config_external.sh" +fi + +# Initialize variables +BUILDER_NAME="cvcuda_builder" + +# Initialize buildx instance +docker buildx inspect "${BUILDER_NAME}" >/dev/null 2>&1 || docker buildx create --name "${BUILDER_NAME}" +docker buildx use "${BUILDER_NAME}" +docker buildx inspect --bootstrap + +####### BASE IMAGES ####### + +# Build Manylinux images with different GCC versions +for GCC_VERSION in "${GCC_VERSIONS[@]}"; do + IMAGE_NAME="${REGISTRY_HOST_PREFIX}manylinux${MANYLINUX_VERSION}-${ARCH}.gcc${GCC_VERSION}" + DOCKERFILE="${SCRIPT_DIR}/Dockerfile.gcc.manylinux${MANYLINUX_VERSION}.deps" + FROM_IMAGE_NAME="${REGISTRY_MANYLINUX_PREFIX}manylinux${MANYLINUX_VERSION}_${ARCH}:${MANYLINUX_IMAGE_TAG}" + + docker buildx build \ + --cache-to type=inline \ + --cache-from type=registry,ref="${IMAGE_NAME}" \ + -t "${IMAGE_NAME}" \ + -t "${IMAGE_NAME}:v${VERSION}" \ + -f "${DOCKERFILE}" \ + --build-arg "FROM_IMAGE_NAME=${FROM_IMAGE_NAME}" \ + --build-arg "GCC_VERSION=${GCC_VERSION}" \ + --platform "${PLATFORM}" \ + --provenance=false \ + --push \ + . +done + +# Build CUDA images on manylinux platform +for CUDA_VERSION in "${CUDA_VERSIONS[@]}"; do + IMAGE_NAME="${REGISTRY_HOST_PREFIX}cuda${CUDA_VERSION}-${MANYLINUX_BASE_OS}-${ARCH}" + DOCKERFILE="${SCRIPT_DIR}/Dockerfile.cuda.${MANYLINUX_BASE_OS}.deps" + FROM_IMAGE_NAME="${REGISTRY_CUDA_PREFIX}cuda:${CUDA_VERSION}-devel-${MANYLINUX_BASE_OS}" + + docker buildx build \ + --cache-to type=inline \ + --cache-from type=registry,ref="${IMAGE_NAME}" \ + -t "${IMAGE_NAME}" \ + -t "${IMAGE_NAME}:v${VERSION}" \ + -f "${DOCKERFILE}" \ + --build-arg FROM_IMAGE_NAME="${FROM_IMAGE_NAME}" \ + --platform "${PLATFORM}" \ + --provenance=false \ + --push \ + . +done + +# Build CUDA images on test OS platforms +for CUDA_VERSION in "${CUDA_VERSIONS[@]}"; do + for OS_VERSION in "${TEST_OS_VERSIONS[@]}"; do + IMAGE_NAME="${REGISTRY_HOST_PREFIX}cuda${CUDA_VERSION}-${OS_VERSION}-${ARCH}" + DOCKERFILE="${SCRIPT_DIR}/Dockerfile.cuda.${OS_VERSION}.deps" + FROM_IMAGE_NAME="${REGISTRY_CUDA_PREFIX}cuda:${CUDA_VERSION}-devel-${OS_VERSION}" + + docker buildx build \ + --cache-to type=inline \ + --cache-from type=registry,ref="${IMAGE_NAME}" \ + -t "${IMAGE_NAME}" \ + -t "${IMAGE_NAME}:v${VERSION}" \ + -f "${DOCKERFILE}" \ + --build-arg FROM_IMAGE_NAME="${FROM_IMAGE_NAME}" \ + --platform "${PLATFORM}" \ + --provenance=false \ + --push \ + . + done +done + +# Build base images for building dependencies +for GCC_VERSION in "${GCC_VERSIONS[@]}"; do + IMAGE_NAME="${REGISTRY_HOST_PREFIX}cvcuda_deps-${ARCH}.gcc${GCC_VERSION}" + DOCKERFILE="${SCRIPT_DIR}/Dockerfile.build.manylinux${MANYLINUX_VERSION}.deps" + FROM_IMAGE_NAME="${REGISTRY_HOST_PREFIX}manylinux${MANYLINUX_VERSION}-${ARCH}.gcc${GCC_VERSION}" + + docker buildx build \ + --cache-to type=inline \ + --cache-from type=registry,ref="${IMAGE_NAME}" \ + -t "${IMAGE_NAME}" \ + -t "${IMAGE_NAME}:v${VERSION}" \ + -f "${DOCKERFILE}" \ + --build-arg FROM_IMAGE_NAME="${FROM_IMAGE_NAME}" \ + --build-arg ARCH="${ARCH}" \ + --platform "${PLATFORM}" \ + --provenance=false \ + --push \ + . +done + +####### BUILDER IMAGES ####### + +# Generate the builder image over cuda and compiler versions +for CUDA_VERSION in "${CUDA_VERSIONS[@]}"; do + for GCC_VERSION in "${GCC_VERSIONS[@]}"; do + IMAGE_NAME="${REGISTRY_HOST_PREFIX}builder-cuda${CUDA_VERSION}-gcc${GCC_VERSION}-${ARCH}" + DOCKERFILE="${SCRIPT_DIR}/Dockerfile.builder.deps" + FROM_IMAGE_NAME="${REGISTRY_HOST_PREFIX}cvcuda_deps-${ARCH}.gcc${GCC_VERSION}" + CUDA_IMAGE="${REGISTRY_HOST_PREFIX}cuda${CUDA_VERSION}-${MANYLINUX_BASE_OS}-${ARCH}" + + docker buildx build \ + --cache-to type=inline \ + --cache-from type=registry,ref="${IMAGE_NAME}" \ + -t "${IMAGE_NAME}" \ + -t "${IMAGE_NAME}:v${VERSION}" \ + -f "${DOCKERFILE}" \ + --build-arg FROM_IMAGE_NAME="${FROM_IMAGE_NAME}" \ + --build-arg CUDA_IMAGE="${CUDA_IMAGE}" \ + --platform "${PLATFORM}" \ + --provenance=false \ + --push \ + . + done +done + +####### RUNNER IMAGES ####### + +# Generate the runner image over cuda and os versions +for CUDA_VERSION in "${CUDA_VERSIONS[@]}"; do + for OS_VERSION in "${TEST_OS_VERSIONS[@]}"; do + IMAGE_NAME="${REGISTRY_HOST_PREFIX}runner-cuda${CUDA_VERSION}-${OS_VERSION}-${ARCH}" + DOCKERFILE="${SCRIPT_DIR}/Dockerfile.runner.deps" + FROM_IMAGE_NAME="${REGISTRY_HOST_PREFIX}cuda${CUDA_VERSION}-${OS_VERSION}-${ARCH}" + + docker buildx build \ + --cache-to type=inline \ + --cache-from type=registry,ref="${IMAGE_NAME}" \ + -t "${IMAGE_NAME}" \ + -t "${IMAGE_NAME}:v${VERSION}" \ + -f "${DOCKERFILE}" \ + --build-arg FROM_IMAGE_NAME="${FROM_IMAGE_NAME}" \ + --platform "${PLATFORM}" \ + --provenance=false \ + --push \ + . + done +done + +popd >/dev/null diff --git a/docs/sphinx/index.rst b/docs/sphinx/index.rst index 69d555b1b..04f069b75 100644 --- a/docs/sphinx/index.rst +++ b/docs/sphinx/index.rst @@ -123,6 +123,7 @@ Copyright :maxdepth: 1 :hidden: + v0.13.0-beta v0.12.0-beta v0.11.0-beta v0.10.1-beta diff --git a/docs/sphinx/relnotes/v0.13.0-beta.rst b/docs/sphinx/relnotes/v0.13.0-beta.rst new file mode 100644 index 000000000..fdf98df46 --- /dev/null +++ b/docs/sphinx/relnotes/v0.13.0-beta.rst @@ -0,0 +1,65 @@ +.. + # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + # SPDX-License-Identifier: Apache-2.0 + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +.. _v0.13.0-beta: + +v0.13.0-beta +============ + +Release Highlights +------------------ + +CV-CUDA v0.13.0 includes ManyLinux 2014 compliant wheels alongside the following changes:​ + +* **New Features**:​ + + * Added Python wheel generation compliant with ManyLinux 2014 and PyPI standards. + + * The multiple python version wheels are now unified into a single wheel file per CUDA version​. + + * Included scripts to build two ManyLinux 2014 Docker images (CUDA 11, CUDA 12) for build, and four Ubuntu images (20.04 and 22.04 x CUDA 11, CUDA 12) for testing. + + * Python wheels must be built within the ManyLinux 2014 docker images to guarantee ManyLinux2014 compliance. + +* **Bug Fixes**:​ + + * Upgraded pybind11 to version 2.13.6 for improved compatibility and functionality.​ + + * Resolved Python ABI compatibility issues present in previous versions by upgrading pybind11 reported in previous versions.​ + + +Compatibility and Known Limitations +----------------------------------- + +For the full list, see main README on `CV-CUDA GitHub `_. + +License +------- + +CV-CUDA is licensed under the `Apache 2.0 `_ license. + +Resources +--------- + +1. `CV-CUDA GitHub `_ +2. `CV-CUDA Increasing Throughput and Reducing Costs for AI-Based Computer Vision with CV-CUDA `_ +3. `NVIDIA Announces Microsoft, Tencent, Baidu Adopting CV-CUDA for Computer Vision AI `_ +4. `CV-CUDA helps Tencent Cloud audio and video PaaS platform achieve full-process GPU acceleration for video enhancement AI `_ + +Acknowledgements +---------------- + +CV-CUDA is developed jointly by NVIDIA and the ByteDance Machine Learning team. diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 2f175f089..b6de39aec 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -39,10 +39,6 @@ string(REPLACE "." "" PYTHON_MODULE_NAME "${PYTHON_MODULE_NAME}") include(GNUInstallDirs) set(PYTHON_MODULE_FILENAME_LIST "" CACHE INTERNAL "") -if(CMAKE_BUILD_TYPE STREQUAL "Release") - add_custom_target(wheel ALL) -endif() - function(nvcv_python_add_module) cmake_parse_arguments(ARG "SHARED;MODULE" "TARGET;OUTPUT_NAME" "SOURCES" ${ARGV}) @@ -80,10 +76,6 @@ function(nvcv_python_add_module) set(PYTHON_MODULE_FILENAME_LIST "${PYTHON_MODULE_FILENAME_LIST};${prefix}${ARG_OUTPUT_NAME}${suffix}" CACHE INTERNAL "") - if(CMAKE_BUILD_TYPE STREQUAL "Release") - add_dependencies(wheel ${ARG_TARGET}) - endif() - install(TARGETS ${ARG_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/python COMPONENT ${PYTHON_MODULE_NAME} @@ -102,10 +94,3 @@ string(JOIN " " PYTHON_MODULE_FILENAME_LIST ${PYTHON_MODULE_FILENAME_LIST}) configure_file(cpack/debian_python_postinst.in cpack/postinst @ONLY) configure_file(cpack/debian_python_prerm.in cpack/prerm @ONLY) - -# Create Python wheel -if(CMAKE_BUILD_TYPE STREQUAL "Release") - add_custom_command( - TARGET wheel - COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/build_wheels.sh" "${BUILD_ROOT}" ${PYTHON_VERSION_SHORT} ) -endif() diff --git a/python/__init__.py.in b/python/__init__.py.in new file mode 100644 index 000000000..7a602de55 --- /dev/null +++ b/python/__init__.py.in @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from ._load_binding import load_binding as _load_binding + +# Dynamically load the appropriate binding +_binding = _load_binding( + __name__, + os.path.join(os.path.dirname(__file__), '_bindings') +) + +# Import all symbols from the binding into the top-level namespace +__all__ = dir(_binding) +globals().update({symbol: getattr(_binding, symbol) for symbol in __all__}) + +# Clean up internal variables to avoid exposing them in the package namespace +del _load_binding, _binding, os diff --git a/python/_load_binding.py.in b/python/_load_binding.py.in new file mode 100644 index 000000000..4001e1d28 --- /dev/null +++ b/python/_load_binding.py.in @@ -0,0 +1,65 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import importlib.util +import sysconfig +from functools import lru_cache + + +@lru_cache(maxsize=1) +def load_binding(module_name: str, bindings_dir: str): + """ + Dynamically selects the correct binding for the current Python version + """ + # Get the Python ABI tag + python_version = f"{sys.version_info.major}{sys.version_info.minor}" + abi_tag = sysconfig.get_config_var('SOABI') + + # Construct the expected filename + binding_so_filename = f'{module_name}.{abi_tag}.so' + + # Find the .so file in the package directory + binding_so_path = os.path.join(bindings_dir, binding_so_filename) + if not os.path.exists(binding_so_path): + raise ImportError( + f'Could not find the binding file for Python {python_version} at ' + f'{binding_so_path}. Make sure the package is installed.' + ) + + # Dynamically load the .so file + spec = importlib.util.spec_from_file_location(module_name, binding_so_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser(description='Load a Python binding') + parser.add_argument('module_name', type=str, + help='The name of the module to load') + parser.add_argument('bindings_dir', type=str, + help='The directory containing the bindings') + args = parser.parse_args() + + binding = load_binding(args.module_name, args.bindings_dir) + + print(f'Loaded module: {binding}') + print(f' Binding version: {binding.__version__}') + print(f' Binding description: {binding.__doc__}') + print(f' Binding functions: {dir(binding)}') diff --git a/python/build_wheels.sh b/python/build_wheels.sh index ecc162092..2df64a503 100755 --- a/python/build_wheels.sh +++ b/python/build_wheels.sh @@ -1,6 +1,6 @@ #!/bin/bash -e -# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,70 +15,114 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Creates the Python self contained wheels +if [ "$#" -ne 1 ]; then + echo "Usage: build_wheels.sh " + exit 1 +fi -# Usage: build_wheels.sh [build_artifacts_dir] [python_versions] -# Note: This script is automatically called by cmake/make. The proper way to -# build python wheels is to issue the command: -# -# Do not run this script outside of cmake. +PYTHON_BUILD_DIR=$(realpath "$1") +BUILD_DIR=$(dirname "${PYTHON_BUILD_DIR}") +WHEEL_DIR="${PYTHON_BUILD_DIR}/dist" +REPAIRED_WHEEL_DIR="${PYTHON_BUILD_DIR}/repaired_wheels" +WHEEL_BUILD_DIR="${PYTHON_BUILD_DIR}/build_wheel" +LIB_DIR="${PYTHON_BUILD_DIR}/cvcuda_cu${CUDA_VERSION_MAJOR}.libs" +SUPPORTED_PYTHONS=("38" "39" "310" "311" "312" "313") +PACKAGES=("cvcuda" "nvcv") + +detect_platform_tag() { + if [ -n "${AUDITWHEEL_PLAT}" ]; then + echo "${AUDITWHEEL_PLAT}" + else + echo "linux_$(uname -m)" + fi +} + +PLATFORM_TAG=$(detect_platform_tag) +echo "Detected Platform Tag: ${PLATFORM_TAG}" + +LIBRARIES=( + "libcvcuda.so" + "libnvcv_types.so" +) + +mkdir -p "${WHEEL_DIR}" "${REPAIRED_WHEEL_DIR}" "${WHEEL_BUILD_DIR}" "${LIB_DIR}" + +# Detect available Python bindings +AVAILABLE_PYTHONS=() +PYTHON_EXECUTABLES=() +for py_ver in "${SUPPORTED_PYTHONS[@]}"; do + py_exec="python3.${py_ver:1}" + if command -v "${py_exec}" &> /dev/null; then + if compgen -G "${PYTHON_BUILD_DIR}/cvcuda/_bindings/cvcuda.cpython-${py_ver}-*.so" > /dev/null; then + AVAILABLE_PYTHONS+=("cp${py_ver}") + PYTHON_EXECUTABLES+=("${py_exec}") + fi + fi +done +PYTHON_EXECUTABLE="${PYTHON_EXECUTABLES[0]}" -set -e # Stops this script if any one command fails. +# Print the available Python bindings +echo "Available Python Bindings: ${AVAILABLE_PYTHONS[*]}" -if [ "$#" -lt 2 ]; then - echo "Usage: build_wheels.sh [python_versions,...]" +if [ "${#AVAILABLE_PYTHONS[@]}" -eq 0 ]; then + echo "Error: No Python bindings detected." exit 1 fi -BUILD_DIR=$(realpath "$1"); shift -PY_VERSIONS=("$@") -LIB_DIR="${BUILD_DIR}/lib" - -echo "BUILD_DIR: $BUILD_DIR" -echo "Python Versions: ${PY_VERSIONS[*]}" - -for py_version in "${PY_VERSIONS[@]}" -do - py_version_flat="${py_version//./}" # Gets the non dotted version string - echo "Building Python wheels for: Python${py_version}" - - # Step 1: Create a directories to store all wheels related files for this python version - py_dir="${BUILD_DIR}/python${py_version}" - wheel_dir="${py_dir}/wheel" - mkdir -p "${wheel_dir}" - rm -rf ${wheel_dir:?}/* - mkdir -p "${wheel_dir}/cvcuda.libs" - - cd "${wheel_dir}" - - # Step 2: Copy necessary .so files under one directory - # We will copy the target of the linked file and not the symlink only. - # Also the new file-name of the .so will be the actual so-name present inside the header of the .so - # This can be retrieved by using patchelf. - # This allows us to copy .so files without knowing their versions and also making sure they still - # work after copying. - # Copy the core .so files first - for so_file_name in libcvcuda.so libnvcv_types.so - do - cp -L "${LIB_DIR}/${so_file_name}" \ - "${wheel_dir}/cvcuda.libs/`patchelf --print-soname "${LIB_DIR}/${so_file_name}"`" - done - - # Copy the bindings .so files + patch them in their rpath. - # This allows the bindings to find the core .so files in a directory named cvcuda.libs only. - for so_file_path in ${LIB_DIR}/python/*.cpython-${py_version_flat}*.so - do - so_file_name=$(basename ${so_file_path}) - cp -L "${so_file_path}" \ - "${wheel_dir}/" - - patchelf --force-rpath --set-rpath '$ORIGIN'/cvcuda.libs "${wheel_dir}/${so_file_name}" - done - - # Step 3: Copy the setup.py corresponding to current python version to our wheels directory. - cp "${py_dir}/setup.py" "${wheel_dir}" - - # Step 3: Create wheel - python${py_version} setup.py bdist_wheel --dist-dir="${wheel_dir}" +# Copy and patch shared libraries +echo "Copying and patching shared libraries..." +for lib in "${LIBRARIES[@]}"; do + src_path="${BUILD_DIR}/lib/${lib}" + if [ -f "${src_path}" ]; then + cp "${src_path}" "${LIB_DIR}/" + echo "Copied: ${src_path} -> ${LIB_DIR}/" + patchelf --force-rpath --set-rpath '$ORIGIN/../cvcuda_cu${CUDA_VERSION_MAJOR}.libs' "${LIB_DIR}/${lib}" + else + echo "Warning: Shared library ${src_path} not found. Skipping." + fi +done + +# Create wheel structure +ln -sf "${PYTHON_BUILD_DIR}/setup.py" "${WHEEL_BUILD_DIR}/" +ln -sf "${PYTHON_BUILD_DIR}/cvcuda" "${WHEEL_BUILD_DIR}/" +ln -sf "${PYTHON_BUILD_DIR}/nvcv" "${WHEEL_BUILD_DIR}/" +ln -sf "${LIB_DIR}" "${WHEEL_BUILD_DIR}/cvcuda_cu${CUDA_VERSION_MAJOR}.libs" + +# Build wheel +echo "Building wheel..." +pushd "${WHEEL_BUILD_DIR}" > /dev/null +${PYTHON_EXECUTABLE} -m build --wheel --outdir="${WHEEL_DIR}" || ${PYTHON_EXECUTABLE} setup.py bdist_wheel --dist-dir="${WHEEL_DIR}" + +# Modify the wheel's Python and ABI tags for detected versions +# Ensuring the tag is propagated to the wheel +${PYTHON_EXECUTABLE} -m pip install --upgrade wheel +python_tag=$(IFS=. ; echo "${AVAILABLE_PYTHONS[*]}") +for whl in "${WHEEL_DIR}"/*.whl; do + ${PYTHON_EXECUTABLE} -m wheel tags --remove \ + --python-tag "${python_tag}" \ + --abi-tag "${python_tag}" \ + --platform-tag "${PLATFORM_TAG}" \ + "${whl}" +done +popd > /dev/null +echo "Repairing wheel for compliance..." +${PYTHON_EXECUTABLE} -m pip install --upgrade auditwheel +for whl in "${WHEEL_DIR}"/*.whl; do + ${PYTHON_EXECUTABLE} -m auditwheel repair "${whl}" --plat "${PLATFORM_TAG}" -w "${REPAIRED_WHEEL_DIR}" + rm "${whl}" done + +echo "Verifying wheel filenames..." +for repaired_whl in "${REPAIRED_WHEEL_DIR}"/*.whl; do + repaired_whl_name="$(basename "${repaired_whl}")" + echo "Wheel: ${repaired_whl_name}" + IFS='-' read -r dist_name version python_tag abi_tag platform_tag <<< "$(echo "${repaired_whl_name}" | sed 's/\.whl$//')" + echo " Distribution Name: ${dist_name}" + echo " Version: ${version}" + echo " Python Tag: ${python_tag}" + echo " ABI Tag: ${abi_tag}" + echo " Platform Tag: ${platform_tag}" +done + +echo "Repaired wheels are located in: ${REPAIRED_WHEEL_DIR}" diff --git a/python/setup.py.in b/python/setup.py.in index c22e9d0ff..e1447c0c6 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,81 +1,107 @@ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, +# distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# This is a Python setuptools setup script to generate Python wheels. -# It is in a template form with placeholder fields that looks like ${}. -# This script will be automatically invoked by cmake when Python bindings are built. -# Do not invoke this outside of cmake. - - -from setuptools import setup, Extension +import os +from collections import defaultdict +from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext class NoBuildExtension(build_ext): """ - Since CV-CUDA Python wheels are pure pre-compiled binary distribution at this point - without any Python or any other source code files and since the binaries are generated - by cmake system outside and without the knowledge of the setuptools, we must - create a dummy class to build an extension here with no source code in it and - no build steps in it to let setuptools create a platform library instead of a - pure library. Without any extensions in a setup tools project setuptools will - end up creating a purelib package. One can compile cmake/pybind11 code here - as an extension but since that part is handled outside of this file for now - we will simply create an empty extension and a corresponding build step that - actually does nothing but let setuptools know that this is a pure binary distribution. + Prevent setuptools from trying to build extensions since the actual + compilation is handled externally (e.g., via CMake). """ - def run(self): - return # Do nothing during build time. + pass + + +def find_shared_libraries(lib_dir): + """ + Locate the central shared libraries (libcvcuda.so, libnvcv_types.so) + and Python bindings (*.cpython-*.so) in the given directory. + + Args: + lib_dir (str): The directory to search for shared libraries. + + Returns: + dict: A dictionary containing shared libraries and bindings. + """ + shared_libraries = defaultdict(list) + + for root, _, files in os.walk(lib_dir): + for file in files: + if file.endswith('.so'): + file_path = os.path.relpath(os.path.join(root, file), + start=lib_dir) + + # Central shared libraries + if file.startswith('libcvcuda'): + shared_libraries['cvcuda'].append(file_path) + elif file.startswith('libnvcv_types'): + shared_libraries['nvcv'].append(file_path) + # Python bindings + elif file.startswith('cvcuda') and 'cpython' in file: + shared_libraries['cvcuda_bindings'].append(file_path) + elif file.startswith('nvcv') and 'cpython' in file: + shared_libraries['nvcv_bindings'].append(file_path) + + return shared_libraries -# Define our PyPI trove classifiers for this project. Many values here are -# placeholders which will be filled in by cmake when this is built. -pypi_trove_classifiers = [ - "Development Status :: 4 - Beta", - "Environment :: GPU :: NVIDIA CUDA", - "Environment :: GPU :: NVIDIA CUDA :: ${CUDA_VERSION_MAJOR}", - "Operating System :: POSIX :: Linux", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: ${PYTHON_VERSION}", - "Programming Language :: Python :: Implementation :: CPython", -] +# Locate libraries and bindings in the specified directory +libs = find_shared_libraries(os.path.dirname(__file__)) +print(libs) -# Finally call the setup. setup( - name="cvcuda-cu${CUDA_VERSION_MAJOR}", - description="${CMAKE_PROJECT_DESCRIPTION}", - author="NVIDIA Corporation", - url="https://github.com/CVCUDA/CV-CUDA", - version="${CMAKE_PROJECT_VERSION}${PROJECT_VERSION_SUFFIX}", - packages=[""], # Must be empty to support current CV-CUDA style distribution - package_dir={"": "."}, + name='cvcuda-cu${CUDA_VERSION_MAJOR}', + version='${CMAKE_PROJECT_VERSION}${PROJECT_VERSION_SUFFIX}', + description='${CMAKE_PROJECT_DESCRIPTION}', + author='NVIDIA Corporation', + author_email='support@nvidia.com', + url='https://github.com/CVCUDA/CV-CUDA', + packages=find_packages(include=["cvcuda", "nvcv"]), + package_dir={ + 'cvcuda': 'cvcuda', + 'nvcv': 'nvcv', + }, package_data={ - "": ["*.so", "cvcuda.libs/*.*"] - }, # Includes the binding .so + core .so files + 'cvcuda': ['_bindings/*.*'], + 'nvcv': ['_bindings/*.*'], + 'cvcuda_cu${CUDA_VERSION_MAJOR}.libs': ['*.*'], + }, include_package_data=True, - install_requires=["numpy>=1.23.5"], - python_requires="==${PYTHON_VERSION}.*", + install_requires=['numpy>=1.23.5'], + python_requires='>=3.8, <3.14', zip_safe=False, - cmdclass={ - "build_ext": NoBuildExtension, # This allows us to make it a platlib. - }, + cmdclass={'build_ext': NoBuildExtension}, ext_modules=[ - Extension( - name="UnusedEmptyExtension", sources=[] - ), # This allows us to make it a platlib. + Extension(name='UnusedEmptyExtension', sources=[]), + ], + classifiers=[ + 'Development Status :: 4 - Beta', + 'Environment :: GPU :: NVIDIA CUDA', + 'Environment :: GPU :: NVIDIA CUDA :: ${CUDA_VERSION_MAJOR}', + 'Operating System :: POSIX :: Linux', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: Implementation :: CPython', ], - classifiers=pypi_trove_classifiers, ) diff --git a/src/cvcuda/priv/CMakeLists.txt b/src/cvcuda/priv/CMakeLists.txt index 8a61def0e..fbc4fa533 100644 --- a/src/cvcuda/priv/CMakeLists.txt +++ b/src/cvcuda/priv/CMakeLists.txt @@ -103,8 +103,8 @@ target_link_libraries(cvcuda_priv nvcv_util_sanitizer cvcuda_legacy CUDA::cudart_static - CUDA::cusolver - CUDA::cublas - CUDA::cublasLt + CUDA::cusolver_static + CUDA::cublas_static + CUDA::cublasLt_static -lrt ) diff --git a/src/nvcv/CMakeLists.txt b/src/nvcv/CMakeLists.txt index 2f2ae1ea1..ac81ea9fe 100644 --- a/src/nvcv/CMakeLists.txt +++ b/src/nvcv/CMakeLists.txt @@ -17,7 +17,7 @@ cmake_minimum_required(VERSION 3.20.1) project(nvcv LANGUAGES C CXX - VERSION 0.12.0 + VERSION 0.13.0 DESCRIPTION "NVCV is NVIDIA Computer Vision library" ) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e6d06d960..293176418 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -45,9 +45,12 @@ target_link_libraries(nvcv_test_main if(UNIX) file(TO_NATIVE_PATH "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/" TESTS_DRIVER_DIR) set(TESTS_DRIVER "${TESTS_DRIVER_DIR}/run_tests.sh") + set(WHEEL_TESTER "${TESTS_DRIVER_DIR}/test_wheels.sh") configure_file(${CMAKE_CURRENT_SOURCE_DIR}/run_tests.sh.in ${TESTS_DRIVER} @ONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/test_wheels.sh.in ${WHEEL_TESTER} + @ONLY) macro(nvcv_add_test TESTCMD TESTGROUP) get_filename_component(TESTNAME "${TESTCMD}" NAME) @@ -86,6 +89,9 @@ if(UNIX) install(PROGRAMS ${TESTS_DRIVER} TYPE BIN COMPONENT tests) + install(PROGRAMS ${WHEEL_TESTER} + TYPE BIN + COMPONENT tests) else() macro(nvcv_add_test) add_test(${ARGV}) diff --git a/tests/cvcuda/stressTest/cvcuda_cache_repro.py b/tests/cvcuda/stressTest/cvcuda_cache_repro.py new file mode 100644 index 000000000..8d46438d8 --- /dev/null +++ b/tests/cvcuda/stressTest/cvcuda_cache_repro.py @@ -0,0 +1,200 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cvcuda +import torch +import random +import threading +import queue +import time +import gc + + +def preprocess(input, out_size): + frame_nhwc = cvcuda.as_tensor( + torch.as_tensor(input).to(device="cuda:0", non_blocking=True), + "NHWC", + ) + resized = cvcuda.resize( + frame_nhwc, + ( + frame_nhwc.shape[0], + out_size[1], + out_size[0], + frame_nhwc.shape[3], + ), + cvcuda.Interp.LINEAR, + ) + # Convert to floating point range 0-1. + normalized = cvcuda.convertto(resized, np.float32, scale=1 / 255) + # Convert it to NCHW layout and return it. + normalized = cvcuda.reformat(normalized, "NCHW") + return normalized + + +def preprocess_into(input, out_size): + torch.cuda.synchronize() + cvcuda_RGBtensor = cvcuda.as_tensor(input.cuda(), "NHWC") + torch.cuda.synchronize() + torch_RGBtensor_resized = torch.empty( + ( + cvcuda_RGBtensor.shape[0], + out_size[1], + out_size[0], + cvcuda_RGBtensor.shape[3], + ), + dtype=torch.uint8, + device="cuda:0", + ) + cvcuda_RGBtensor_resized = cvcuda.as_tensor( + torch_RGBtensor_resized.cuda(), + "NHWC", + ) + cvcuda.resize_into( + cvcuda_RGBtensor_resized, + cvcuda_RGBtensor, + cvcuda.Interp.LINEAR, + ) + + torch_nchw = torch.empty( + (input.shape[0], 3, out_size[1], out_size[0]), + dtype=torch.uint8, + device="cuda:0", + ) + cvcuda_nchw = cvcuda.as_tensor(torch_nchw.cuda(0), "NCHW") + cvcuda.reformat_into(cvcuda_nchw, cvcuda_RGBtensor_resized) + return torch_nchw + + +def generate_images(N, width=None, height=None, random_size=False): + if random_size: + w = random.randint(1, 10) + h = random.randint(1, 10) + else: + w = width + h = height + return torch.as_tensor(torch.rand(N, h, w, 3), dtype=torch.uint8) + + +def worker(device_id, task_queue, result_queue): + while True: + task = task_queue.get() + if task is None: + break + gradient_img_batch, image_size = task + result = preprocess(gradient_img_batch, image_size) + result_queue.put(result) + task_queue.task_done() + + +def worker_into(device_id, task_queue, result_queue): + while True: + task = task_queue.get() + if task is None: + break + gradient_img_batch, image_size = task + result = preprocess_into(gradient_img_batch, image_size) + result_queue.put(result) + task_queue.task_done() + + +def test_random_batch_size(): + device_id = 0 + num_threads = 10 + + task_queue = queue.Queue() + result_queue = queue.Queue() + + threads = [] + for i in range(num_threads): + t = threading.Thread(target=worker, args=(device_id, task_queue, result_queue)) + t.start() + threads.append(t) + + # Set the duration to run the function (in seconds) + duration = 10 + start_time = time.time() + + while time.time() - start_time < duration: + batch_size = random.randint(5, 10) + target_img_width = random.randint(110, 115) + target_img_height = random.randint(220, 230) + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + task_queue.put((gradient_img_batch, image_size)) + + # Signal the threads to stop + for _ in range(num_threads): + task_queue.put(None) + + for t in threads: + t.join() + + print("Random Batch Size test complete") + + +def test_random_batch_size_into(): + device_id = 0 + num_threads = 10 + + task_queue = queue.Queue() + result_queue = queue.Queue() + + threads = [] + for i in range(num_threads): + t = threading.Thread( + target=worker_into, args=(device_id, task_queue, result_queue) + ) + t.start() + threads.append(t) + + # Set the duration to run the function (in seconds) + duration = 10 + start_time = time.time() + + while time.time() - start_time < duration: + batch_size = random.randint(5, 10) + target_img_width = random.randint(110, 115) + target_img_height = random.randint(220, 230) + gradient_img_batch = generate_images(N=batch_size, random_size=True) + # print(gradient_img_batch.size()) + image_size = (target_img_width, target_img_height) + task_queue.put((gradient_img_batch, image_size)) + + # Signal the threads to stop + for _ in range(num_threads): + task_queue.put(None) + + for t in threads: + t.join() + + print("Into Random Batch Size test complete") + + +def main(): + test_random_batch_size_into() + collected = gc.collect() + print(f"Garbage collector: collected {collected} objects.") + time.sleep(1) + torch.cuda.empty_cache() + time.sleep(1) + test_random_batch_size() + collected = gc.collect() + print(f"Garbage collector: collected {collected} objects.") + + +if __name__ == "__main__": + main() diff --git a/tests/cvcuda/stressTest/stress_test_inference.py b/tests/cvcuda/stressTest/stress_test_inference.py new file mode 100644 index 000000000..c09c5555f --- /dev/null +++ b/tests/cvcuda/stressTest/stress_test_inference.py @@ -0,0 +1,542 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import numpy as np +import cvcuda +import torch +import random +import nvcv + +import os +import sys +import urllib.request +import time + +import tensorrt as trt +import tensorflow as tf + +common_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + "common", + "python", +) +assets_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + "assets", +) +sys.path.insert(0, common_dir) + +from trt_utils import setup_tensort_bindings # noqa: E402 + +time_of_test_in_min = 15 +max_batch_size = 10 + +os.environ["CUDA_LAUNCH_BLOCKING"] = "1" + + +def preprocess(input, out_size): + frame_nhwc = cvcuda.as_tensor( + torch.as_tensor(input).to(device="cuda:0", non_blocking=True), + "NHWC", + ) + resized = cvcuda.resize( + frame_nhwc, + ( + frame_nhwc.shape[0], + out_size[1], + out_size[0], + frame_nhwc.shape[3], + ), + cvcuda.Interp.LINEAR, + ) + # Convert to floating point range 0-1. + normalized = cvcuda.convertto(resized, np.float32, scale=1 / 255) + # Convert it to NCHW layout and return it. + normalized = cvcuda.reformat(normalized, "NCHW") + return normalized + + +def preprocess_into(input, out_size): + torch.cuda.synchronize() + cvcuda_RGBtensor = cvcuda.as_tensor(input.cuda(), "NHWC") + torch.cuda.synchronize() + torch_RGBtensor_resized = torch.empty( + ( + cvcuda_RGBtensor.shape[0], + out_size[1], + out_size[0], + cvcuda_RGBtensor.shape[3], + ), + dtype=torch.uint8, + device="cuda:0", + ) + cvcuda_RGBtensor_resized = cvcuda.as_tensor( + torch_RGBtensor_resized.cuda(), + "NHWC", + ) + cvcuda.resize_into( + cvcuda_RGBtensor_resized, + cvcuda_RGBtensor, + cvcuda.Interp.LINEAR, + ) + + torch_nchw = torch.empty( + (input.shape[0], 3, out_size[1], out_size[0]), + dtype=torch.uint8, + device="cuda:0", + ) + cvcuda_nchw = cvcuda.as_tensor(torch_nchw.cuda(0), "NCHW") + # normalized = cvcuda.convertto(cvcuda_nchw, np.float32, scale=1 / 255) + cvcuda.reformat_into(cvcuda_nchw, cvcuda_RGBtensor_resized) + return torch_nchw + + +def generate_images(N, width=None, height=None, random_size=False): + if random_size: + w = random.randint(100, 500) + h = random.randint(100, 500) + else: + w = width + h = height + return torch.as_tensor(torch.rand(N, h, w, 3), dtype=torch.uint8) + + +class ObjectDetectionTensorflow: + def __init__( + self, + output_dir, + batch_size, + image_size, + device_id, + ): + self.logger = logging.getLogger(__name__) + self.output_dir = output_dir + self.batch_size = batch_size + self.image_size = image_size + self.device_id = device_id + + physical_devices = tf.config.list_physical_devices("GPU") + tf.config.experimental.set_memory_growth(physical_devices[self.device_id], True) + + hdf5_model_path = os.path.join(output_dir, "resnet34_peoplenet.hdf5") + + if not os.path.isfile(hdf5_model_path): + # We need to download the HDF5 model first from NGC. + model_url = ( + "https://api.ngc.nvidia.com/v2/models/" + "org/nvidia/team/tao/peoplenet/trainable_unencrypted_v2.6/" + "files?redirect=true&path=model.hdf5" + ) + self.logger.info("Downloading the PeopleNet model from NGC: %s" % model_url) + urllib.request.urlretrieve(model_url, hdf5_model_path) + self.logger.info("Download complete. Saved to: %s" % hdf5_model_path) + + with tf.device("/GPU:%d" % self.device_id): + self.model = tf.keras.models.load_model(hdf5_model_path) + self.logger.info("TensorFlow PeopleNet model is loaded.") + + self.logger.info("Using TensorFlow as the inference engine.") + + def __call__(self, frame_nchw): + + if isinstance(frame_nchw, torch.Tensor): + # We convert torch.Tensor to tf.Tensor by: + # torch.Tensor -> Pytorch Flat Tensor -> DlPack -> tf.Tensor -> Un-flatten + frame_nchw_shape = frame_nchw.shape + frame_nchw = frame_nchw.flatten() + frame_nchw_tf = tf.experimental.dlpack.from_dlpack(frame_nchw.__dlpack__()) + frame_nchw_tf = tf.reshape(frame_nchw_tf, frame_nchw_shape) + + elif isinstance(frame_nchw, nvcv.Tensor): + # We convert nvcv.Tensor to tf.Tensor by: + # nvcv.Tensor -> PyTorch Tensor -> Pytorch Flat Tensor -> DlPack -> tf.Tensor -> Un-flatten + frame_nchw_pyt = torch.as_tensor( + frame_nchw.cuda(), device="cuda:%d" % self.device_id + ) + frame_nchw_pyt = frame_nchw_pyt.flatten() + frame_nchw_tf = tf.experimental.dlpack.from_dlpack( + frame_nchw_pyt.__dlpack__() + ) + frame_nchw_tf = tf.reshape(frame_nchw_tf, frame_nchw.shape) + + elif isinstance(frame_nchw, np.ndarray): + frame_nchw_tf = tf.convert_to_tensor(frame_nchw) + + else: + raise ValueError( + "Invalid type of input tensor for tensorflow inference: %s" + % str(type(frame_nchw)) + ) + + with tf.device("/GPU:%d" % self.device_id): + output_tensors = self.model(frame_nchw_tf) # returns a tuple. + + # Convert the output to PyTorch Tensors + boxes = torch.from_dlpack(tf.experimental.dlpack.to_dlpack(output_tensors[0])) + score = torch.from_dlpack( + tf.experimental.dlpack.to_dlpack(output_tensors[1]) + ) # inference.tensorflow + return boxes, score + + +class ObjectDetectionTensorRT: + def __init__( + self, + output_dir, + batch_size, + image_size, + device_id, + ): + self.logger = logging.getLogger(__name__) + self.output_dir = output_dir + self.batch_size = batch_size + self.image_size = image_size + self.device_id = device_id + + # Download and prepare the models for the first use. + etlt_model_path = os.path.join(self.output_dir, "resnet34_peoplenet_int8.etlt") + trt_engine_file_path = os.path.join( + self.output_dir, + "resnet34_peoplenet_int8.%d.%d.%d.trtmodel" + % ( + batch_size, + image_size[1], + image_size[0], + ), + ) + + # Check if we have a previously generated model. + if not os.path.isfile(trt_engine_file_path): + if not os.path.isfile(etlt_model_path): + # We need to download the ETLE model first from NGC. + model_url = ( + "https://api.ngc.nvidia.com/v2/models/" + "nvidia/tao/peoplenet/versions/deployable_quantized_v2.6.1/" + "files/resnet34_peoplenet_int8.etlt" + ) + self.logger.info( + "Downloading the PeopleNet model from NGC: %s" % model_url + ) + urllib.request.urlretrieve(model_url, etlt_model_path) + self.logger.info("Download complete. Saved to: %s" % etlt_model_path) + + # Convert ETLE to TensorRT model using the TAO-Converter. + self.logger.info("Converting the PeopleNet model to TensorRT...") + if os.system( + "tao-converter -e %s -k tlt_encode -d 3,%d,%d -m %d -i nchw %s" + % ( + trt_engine_file_path, + image_size[1], + image_size[0], + batch_size, + etlt_model_path, + ) + ): + raise Exception("Conversion failed.") + else: + self.logger.info( + "Conversion complete. Saved to: %s" % trt_engine_file_path + ) + + # Once the TensorRT engine generation is all done, we load it. + trt_logger = trt.Logger(trt.Logger.ERROR) + with open(trt_engine_file_path, "rb") as f, trt.Runtime(trt_logger) as runtime: + # Keeping this as a class variable because we want to be able to + # allocate the output tensors either on its first use or when the + # batch size changes + self.trt_model = runtime.deserialize_cuda_engine(f.read()) + + # Create execution context. + self.model = self.trt_model.create_execution_context() + + # We will allocate the output tensors and its bindings either when we + # use it for the first time or when the batch size changes. + self.output_tensors, self.output_idx = None, None + + self.logger.info("Using TensorRT as the inference engine.") + + def __call__(self, tensor): + + # Grab the data directly from the pre-allocated tensor. + input_bindings = [tensor.cuda().__cuda_array_interface__["data"][0]] + output_bindings = [] + + actual_batch_size = tensor.shape[0] + + # Need to allocate the output tensors + if not self.output_tensors or actual_batch_size != self.batch_size: + self.output_tensors, self.output_idx = setup_tensort_bindings( + self.trt_model, + actual_batch_size, + self.device_id, + self.logger, + ) + + for t in self.output_tensors: + output_bindings.append(t.data_ptr()) + io_bindings = input_bindings + output_bindings + + # Call inference for implicit batch + self.model.execute_async( + actual_batch_size, + bindings=io_bindings, + stream_handle=cvcuda.Stream.current.handle, + ) + + boxes = self.output_tensors[0] + score = self.output_tensors[1] # inference.tensorrt + return boxes, score + + +def test_random_image_size(): + target_img_width = 960 + target_img_height = 544 + image_size = (target_img_width, target_img_height) + batch_size = 1 + device_id = 0 + backend = "tensorflow" + output_dir = "" + if backend == "tensorflow": + inference = ObjectDetectionTensorflow( + output_dir, + batch_size, + image_size, + device_id, + ) + + elif backend == "tensorrt": + inference = ObjectDetectionTensorRT( + output_dir, + batch_size, + image_size, + device_id, + ) + else: + raise ValueError("Unknown backend: %s" % backend) + + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration: + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + result = preprocess(gradient_img_batch, image_size) + bboxes, probabilities = inference(result) + print("Random Image Size Test Complete") + + +def test_increasing_batch_size(): + target_img_width = 960 + target_img_height = 544 + image_size = (target_img_width, target_img_height) + batch_size = 1 + device_id = 0 + backend = "tensorflow" + output_dir = "" + if backend == "tensorflow": + inference = ObjectDetectionTensorflow( + output_dir, + batch_size, + image_size, + device_id, + ) + + elif backend == "tensorrt": + inference = ObjectDetectionTensorRT( + output_dir, + batch_size, + image_size, + device_id, + ) + else: + raise ValueError("Unknown backend: %s" % backend) + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration and batch_size < max_batch_size: + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + result = preprocess(gradient_img_batch, image_size) + bboxes, probabilities = inference(result) + batch_size += 1 + print("Random Image Size Test Complete") + + +def test_random_batch_size(): + target_img_width = 960 + target_img_height = 544 + image_size = (target_img_width, target_img_height) + batch_size = 1 + device_id = 0 + backend = "tensorflow" + output_dir = "" + if backend == "tensorflow": + inference = ObjectDetectionTensorflow( + output_dir, + batch_size, + image_size, + device_id, + ) + + elif backend == "tensorrt": + inference = ObjectDetectionTensorRT( + output_dir, + batch_size, + image_size, + device_id, + ) + else: + raise ValueError("Unknown backend: %s" % backend) + + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration: + gradient_img_batch = generate_images(width=1080, height=1920, N=batch_size) + image_size = (target_img_width, target_img_height) + result = preprocess(gradient_img_batch, image_size) + bboxes, probabilities = inference(result) + batch_size = random.randint(1, 80) + print("Random Batch Size Test Complete") + + +def test_random_image_size_into(): + target_img_width = 960 + target_img_height = 544 + image_size = (target_img_width, target_img_height) + batch_size = 1 + device_id = 0 + backend = "tensorflow" + output_dir = "" + if backend == "tensorflow": + inference = ObjectDetectionTensorflow( + output_dir, + batch_size, + image_size, + device_id, + ) + + elif backend == "tensorrt": + inference = ObjectDetectionTensorRT( + output_dir, + batch_size, + image_size, + device_id, + ) + else: + raise ValueError("Unknown backend: %s" % backend) + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration: + # while True: + gradient_img_batch = generate_images(N=batch_size, random_size=True) + # print(gradient_img_batch.size()) + image_size = (target_img_width, target_img_height) + result = preprocess_into(gradient_img_batch, image_size) + bboxes, probabilities = inference(result) + # print(f"bboxes :{bboxes}") + # print(f"probabilities :{probabilities}") + print("Into operator Random Image Size Test Complete") + + +def test_increasing_batch_size_into(): + target_img_width = 960 + target_img_height = 544 + image_size = (target_img_width, target_img_height) + batch_size = 1 + device_id = 0 + backend = "tensorflow" + output_dir = "" + if backend == "tensorflow": + inference = ObjectDetectionTensorflow( + output_dir, + batch_size, + image_size, + device_id, + ) + + elif backend == "tensorrt": + inference = ObjectDetectionTensorRT( + output_dir, + batch_size, + image_size, + device_id, + ) + else: + raise ValueError("Unknown backend: %s" % backend) + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration and batch_size < max_batch_size: + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + result = preprocess_into(gradient_img_batch, image_size) + bboxes, probabilities = inference(result) + batch_size += 1 + print("Into operator Random Image Size Test Complete") + + +def test_random_batch_size_into(): + target_img_width = 960 + target_img_height = 544 + image_size = (target_img_width, target_img_height) + batch_size = 1 + device_id = 0 + backend = "tensorflow" + output_dir = "" + if backend == "tensorflow": + inference = ObjectDetectionTensorflow( + output_dir, + batch_size, + image_size, + device_id, + ) + + elif backend == "tensorrt": + inference = ObjectDetectionTensorRT( + output_dir, + batch_size, + image_size, + device_id, + ) + else: + raise ValueError("Unknown backend: %s" % backend) + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration: + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + result = preprocess_into(gradient_img_batch, image_size) + bboxes, probabilities = inference(result) + batch_size = random.randint(1, 80) + print("Into Operator Random Batch Size Test Complete") + + +def main(): + print(torch.cuda.get_device_properties(0)) + test_random_image_size() + test_random_batch_size() + test_random_image_size_into() + test_random_batch_size_into() + + # test_increasing_batch_size() + # test_increasing_batch_size_into() + + +if __name__ == "__main__": + main() diff --git a/tests/cvcuda/stressTest/stress_test_mt_prep.py b/tests/cvcuda/stressTest/stress_test_mt_prep.py new file mode 100644 index 000000000..32bffb053 --- /dev/null +++ b/tests/cvcuda/stressTest/stress_test_mt_prep.py @@ -0,0 +1,267 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cvcuda +import torch +import random +import threading +import queue +import time + + +def preprocess(input, out_size): + frame_nhwc = cvcuda.as_tensor( + torch.as_tensor(input).to(device="cuda:0", non_blocking=True), + "NHWC", + ) + resized = cvcuda.resize( + frame_nhwc, + ( + frame_nhwc.shape[0], + out_size[1], + out_size[0], + frame_nhwc.shape[3], + ), + cvcuda.Interp.LINEAR, + ) + # Convert to floating point range 0-1. + normalized = cvcuda.convertto(resized, np.float32, scale=1 / 255) + # Convert it to NCHW layout and return it. + normalized = cvcuda.reformat(normalized, "NCHW") + return normalized + + +def preprocess_into(input, out_size): + torch.cuda.synchronize() + cvcuda_RGBtensor = cvcuda.as_tensor(input.cuda(), "NHWC") + torch.cuda.synchronize() + torch_RGBtensor_resized = torch.empty( + ( + cvcuda_RGBtensor.shape[0], + out_size[1], + out_size[0], + cvcuda_RGBtensor.shape[3], + ), + dtype=torch.uint8, + device="cuda:0", + ) + cvcuda_RGBtensor_resized = cvcuda.as_tensor( + torch_RGBtensor_resized.cuda(), + "NHWC", + ) + cvcuda.resize_into( + cvcuda_RGBtensor_resized, + cvcuda_RGBtensor, + cvcuda.Interp.LINEAR, + ) + + torch_nchw = torch.empty( + (input.shape[0], 3, out_size[1], out_size[0]), + dtype=torch.uint8, + device="cuda:0", + ) + cvcuda_nchw = cvcuda.as_tensor(torch_nchw.cuda(0), "NCHW") + cvcuda.reformat_into(cvcuda_nchw, cvcuda_RGBtensor_resized) + return torch_nchw + + +def generate_images(N, width=None, height=None, random_size=False): + if random_size: + w = random.randint(1, 10) + h = random.randint(1, 10) + else: + w = width + h = height + return torch.as_tensor(torch.rand(N, h, w, 3), dtype=torch.uint8) + + +def worker(device_id, task_queue, result_queue): + while True: + task = task_queue.get() + if task is None: + break + gradient_img_batch, image_size = task + result = preprocess(gradient_img_batch, image_size) + result_queue.put(result) + task_queue.task_done() + + +def worker_into(device_id, task_queue, result_queue): + while True: + task = task_queue.get() + if task is None: + break + gradient_img_batch, image_size = task + result = preprocess_into(gradient_img_batch, image_size) + result_queue.put(result) + task_queue.task_done() + + +def test_random_image_size(): + device_id = 0 + num_threads = 15 + + task_queue = queue.Queue() + result_queue = queue.Queue() + + threads = [] + for i in range(num_threads): + t = threading.Thread(target=worker, args=(device_id, task_queue, result_queue)) + t.start() + threads.append(t) + + # Set the duration to run the function (in seconds) + duration = 10 + start_time = time.time() + + while time.time() - start_time < duration: + batch_size = 10 + target_img_width = random.randint(220, 230) + target_img_height = random.randint(220, 230) + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + task_queue.put((gradient_img_batch, image_size)) + + # Signal the threads to stop + for _ in range(num_threads): + task_queue.put(None) + + for t in threads: + t.join() + + print("Random Output Image Size test complete") + + +def test_random_batch_size(): + device_id = 0 + num_threads = 10 + + task_queue = queue.Queue() + result_queue = queue.Queue() + + threads = [] + for i in range(num_threads): + t = threading.Thread(target=worker, args=(device_id, task_queue, result_queue)) + t.start() + threads.append(t) + + # Set the duration to run the function (in seconds) + duration = 10 + start_time = time.time() + + while time.time() - start_time < duration: + batch_size = random.randint(5, 10) + target_img_width = random.randint(110, 115) + target_img_height = random.randint(220, 230) + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + task_queue.put((gradient_img_batch, image_size)) + + # Signal the threads to stop + for _ in range(num_threads): + task_queue.put(None) + + for t in threads: + t.join() + + print("Random Batch Size test complete") + + +def test_random_image_size_into(): + device_id = 0 + num_threads = 15 + + task_queue = queue.Queue() + result_queue = queue.Queue() + + threads = [] + for i in range(num_threads): + t = threading.Thread( + target=worker_into, args=(device_id, task_queue, result_queue) + ) + t.start() + threads.append(t) + + # Set the duration to run the function (in seconds) + duration = 10 + start_time = time.time() + + while time.time() - start_time < duration: + batch_size = 10 + target_img_width = random.randint(220, 230) + target_img_height = random.randint(220, 230) + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + task_queue.put((gradient_img_batch, image_size)) + + # Signal the threads to stop + for _ in range(num_threads): + task_queue.put(None) + + for t in threads: + t.join() + + print("Into Random Output Image Size test complete") + + +def test_random_batch_size_into(): + device_id = 0 + num_threads = 10 + + task_queue = queue.Queue() + result_queue = queue.Queue() + + threads = [] + for i in range(num_threads): + t = threading.Thread( + target=worker_into, args=(device_id, task_queue, result_queue) + ) + t.start() + threads.append(t) + + # Set the duration to run the function (in seconds) + duration = 10 + start_time = time.time() + + while time.time() - start_time < duration: + batch_size = random.randint(5, 10) + target_img_width = random.randint(110, 115) + target_img_height = random.randint(220, 230) + gradient_img_batch = generate_images(N=batch_size, random_size=True) + # print(gradient_img_batch.size()) + image_size = (target_img_width, target_img_height) + task_queue.put((gradient_img_batch, image_size)) + + # Signal the threads to stop + for _ in range(num_threads): + task_queue.put(None) + + for t in threads: + t.join() + + print("Into Random Batch Size test complete") + + +def main(): + # test_random_image_size() + test_random_batch_size_into() + time.sleep(10) + test_random_batch_size() + # test_random_image_size_into() + + +if __name__ == "__main__": + main() diff --git a/tests/cvcuda/stressTest/stress_test_preprocess.py b/tests/cvcuda/stressTest/stress_test_preprocess.py new file mode 100644 index 000000000..510c15345 --- /dev/null +++ b/tests/cvcuda/stressTest/stress_test_preprocess.py @@ -0,0 +1,197 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cvcuda +import torch +import random +import time + +time_of_test_in_min = 0.1 + + +def preprocess(input, out_size): + frame_nhwc = cvcuda.as_tensor( + torch.as_tensor(input).to(device="cuda:0", non_blocking=True), + "NHWC", + ) + resized = cvcuda.resize( + frame_nhwc, + ( + frame_nhwc.shape[0], + out_size[1], + out_size[0], + frame_nhwc.shape[3], + ), + cvcuda.Interp.LINEAR, + ) + # Convert to floating point range 0-1. + normalized = cvcuda.convertto(resized, np.float32, scale=1 / 255) + # Convert it to NCHW layout and return it. + normalized = cvcuda.reformat(normalized, "NCHW") + return normalized + + +def preprocess_into(input, out_size): + cvcuda_RGBtensor = cvcuda.as_tensor(input.cuda(), "NHWC") + + torch_RGBtensor_resized = torch.empty( + ( + cvcuda_RGBtensor.shape[0], + out_size[1], + out_size[0], + cvcuda_RGBtensor.shape[3], + ), + dtype=torch.uint8, + device="cuda:0", + ) + cvcuda_RGBtensor_resized = cvcuda.as_tensor( + torch_RGBtensor_resized.cuda(), + "NHWC", + ) + cvcuda.resize_into( + cvcuda_RGBtensor_resized, + cvcuda_RGBtensor, + cvcuda.Interp.LINEAR, + ) + + torch_nchw = torch.empty( + (input.shape[0], 3, out_size[1], out_size[0]), + dtype=torch.uint8, + device="cuda:0", + ) + cvcuda_nchw = cvcuda.as_tensor(torch_nchw.cuda(0), "NCHW") + # normalized = cvcuda.convertto(cvcuda_nchw, np.float32, scale=1 / 255) + cvcuda.reformat_into(cvcuda_nchw, cvcuda_RGBtensor_resized) + return torch_nchw + + +def generate_images(N, width=None, height=None, random_size=False): + if random_size: + w = random.randint(100, 500) + h = random.randint(100, 500) + else: + w = width + h = height + return torch.as_tensor(torch.rand(N, h, w, 3), dtype=torch.uint8) + + +def test_random_image_size(): + target_img_width = 224 + target_img_height = 224 + batch_size = 20 + + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration: + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + result = preprocess(gradient_img_batch, image_size) # noqa: F841 + print("Random Image Size Test Complete") + + +def test_increasing_batch_size(): + target_img_width = 224 + target_img_height = 224 + batch_size = 1 + + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration: + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + result = preprocess(gradient_img_batch, image_size) # noqa: F841 + batch_size += 1 + print("Increasing Batch Size Test Complete") + + +def test_random_batch_size(): + target_img_width = 224 + target_img_height = 224 + batch_size = 1 + + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration: + gradient_img_batch = generate_images(width=1080, height=1920, N=batch_size) + image_size = (target_img_width, target_img_height) + result = preprocess(gradient_img_batch, image_size) # noqa: F841 + batch_size = random.randint(1, 80) + print("Random Batch Size Test Complete") + + +def test_random_image_size_into(): + target_img_width = 224 + target_img_height = 224 + batch_size = 20 + + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration: + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + result = preprocess_into(gradient_img_batch, image_size) # noqa: F841 + print("Into operator Random Image Size Test Complete") + + +def test_increasing_batch_size_into(): + target_img_width = 224 + target_img_height = 224 + batch_size = 1 + + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration: + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + result = preprocess_into(gradient_img_batch, image_size) # noqa: F841 + batch_size += 1 + print("Into operator Random Image Size Test Complete") + + +def test_random_batch_size_into(): + target_img_width = 224 + target_img_height = 224 + batch_size = 1 + + duration = time_of_test_in_min * 60 # 5 minutes + start_time = time.time() + + while time.time() - start_time < duration: + gradient_img_batch = generate_images(N=batch_size, random_size=True) + image_size = (target_img_width, target_img_height) + result = preprocess_into(gradient_img_batch, image_size) # noqa: F841 + batch_size = random.randint(1, 80) + print("Into Operator Increasing Batch Size Test Complete") + + +def main(): + print(torch.cuda.get_device_properties(0)) + test_random_image_size() + test_random_batch_size() + test_random_image_size_into() + test_random_batch_size_into() + + test_increasing_batch_size() + test_increasing_batch_size_into() + + +if __name__ == "__main__": + main() diff --git a/tests/test_wheels.sh.in b/tests/test_wheels.sh.in new file mode 100755 index 000000000..e60690ecf --- /dev/null +++ b/tests/test_wheels.sh.in @@ -0,0 +1,124 @@ +#!/bin/bash -e + +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Enable recursive globbing +shopt -s globstar + +if [ -z "$1" ]; then + echo "Usage: $0 " + exit 1 +fi + +WHEEL_LIST=($1/**/cvcuda*.whl) +PACKAGES=("cvcuda" "nvcv") + +# Check for GPU and driver +if command -v nvidia-smi &> /dev/null; then + if ! nvidia-smi > /dev/null 2>&1; then + echo "Warning: No GPU detected or driver not working with nvidia-smi. Skipping wheel testing..." + exit 0 + fi + echo "GPU detected and driver is working (via nvidia-smi)." +elif command -v tegrastats &> /dev/null; then + if ! tegrastats > /dev/null 2>&1; then + echo "Warning: tegrastats could not verify GPU. Skipping wheel testing..." + exit 0 + fi + echo "GPU detected and driver is working (via tegrastats)." +else + echo "Warning: Neither nvidia-smi nor tegrastats found. Skipping wheel testing..." + exit 0 +fi + +# Check if there are any wheels to test +if [ ${#WHEEL_LIST[@]} -eq 0 ]; then + echo "No wheels found in the specified directory." + exit 0 +fi + +# Extract compatible Python versions from the wheel filenames +get_compatible_pythons() { + local wheel_file="$1" + local compatible_versions=() + python_tag=$(basename "${wheel_file}" | cut -d'-' -f3) + IFS='.' read -ra tags <<< "${python_tag}" + + for tag in "${tags[@]}"; do + if [[ "${tag}" =~ cp(3[0-9]{1,2}) ]]; then + py_ver="${BASH_REMATCH[1]}" + compatible_versions+=("${py_ver}") + fi + done + + echo "${compatible_versions[@]}" +} + +# Test each wheel +for whl in "${WHEEL_LIST[@]}"; do + echo "Testing wheel: $(basename "${whl}")" + + # Determine compatible Python versions + compatible_pythons=($(get_compatible_pythons "${whl}")) + if [ "${#compatible_pythons[@]}" -eq 0 ]; then + echo "Error: No compatible Python versions found for $(basename "${whl}"). Skipping." + continue + fi + + for py_ver in "${compatible_pythons[@]}"; do + py_exec="python3.${py_ver:1}" + if ! command -v "${py_exec}" &> /dev/null; then + echo "Skipping Python ${py_ver}: ${py_exec} not found." + continue + fi + + echo "Testing with ${py_exec}..." + + # Create a temporary virtual environment + test_env_dir=$(mktemp -d) + ${py_exec} -m venv --without-pip "${test_env_dir}/test_env" + source "${test_env_dir}/test_env/bin/activate" + + # Manually install pip using get-pip.py + echo "Manually installing pip..." + curl -sS https://bootstrap.pypa.io/get-pip.py | ${py_exec} + + # Ensure pip is up to date + echo "Upgrading pip..." + ${py_exec} -m pip install --upgrade pip + + # Install and test the wheel + echo "Installing ${whl}..." + ${py_exec} -m pip install "${whl}" + + for package in "${PACKAGES[@]}"; do + echo "Testing import for package ${package}..." + if ! ${py_exec} -c "import ${package}" &> /dev/null; then + echo " Error: Failed to import ${package} with ${py_exec}." + deactivate + rm -rf "${test_env_dir}" + exit 1 + fi + echo " ${package} imported successfully with ${py_exec}." + done + + deactivate + rm -rf "${test_env_dir}" + echo "Testing with ${py_exec} completed successfully." + done +done + +echo "All wheels tested successfully."