From 3b4f714de500c165e5d37ced8d08ae7c24544f6d Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches <61422851+Beanavil@users.noreply.github.com> Date: Sat, 3 Aug 2024 03:31:25 +0200 Subject: [PATCH] Develop Stream 2024-07-01 (#377) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Using .lint:clang-format * Added the hipcub thread test to the cmakelist * fix: config.hpp is included in every header * add half and bfloat16 cases to warp reduce and scan tests * fix format * Enabled half and bfloat16 tests for nvidia gpus * Fixed that bfloat16 and half thread tests work and added them * Moved thrust headers to different file and added iterator_facade_category * Added IteratorCategory for arg_index, constant, counting and transform * Added iterator_wrapper to reduce amount of duplicate code and use rocprim as much as possible * Use keyword Using in iterators and put helper function in detail space * Make use of iterator_wrapper for arg_index_input * Changes names and adds some documentation to iterator_wrapper * Removed unnecessary constructor and added explicit tag * Moved more to code to IteratorWrapper based on coe Lorinc * Revert "Moved more to code to IteratorWrapper based on coe Lorinc" This reverts commit 894f60cae177096bcf1000e5dd321174faf2476b. * Some iterators should have the device_system_tag as is the case in cub * Implemented minor comments nara * Add large indices test for device segmented reduce * Fix hipCUB's device segmented reduce for large indices * Update CHANGELOG * Bumped CCCL version * Moved get_large_sizes to common test utils file * Changed TwiddleIn/Out implementation to make use of rocprim::radix_key_codec * clang-format: trick clang-format into always breaking after c-style function attributes * Fixed formatting * Bumped project version to 3.3.0 --------- Co-authored-by: Lőrinc Serfőző Co-authored-by: Nick Breed Co-authored-by: Bence Parajdi Co-authored-by: Robin Voetter --- .clang-format | 34 ++- .gitlab-ci.yml | 16 +- CHANGELOG.md | 14 +- CMakeLists.txt | 2 +- README.md | 2 +- cmake/Dependencies.cmake | 2 +- .../backend/rocprim/block/block_histogram.hpp | 4 +- .../backend/rocprim/block/block_load.hpp | 6 +- .../rocprim/block/block_merge_sort.hpp | 4 +- .../rocprim/block/block_radix_rank.hpp | 6 +- .../rocprim/block/block_raking_layout.hpp | 4 +- .../backend/rocprim/block/block_reduce.hpp | 4 +- .../backend/rocprim/block/block_scan.hpp | 6 +- .../backend/rocprim/block/block_shuffle.hpp | 6 +- .../backend/rocprim/block/block_store.hpp | 6 +- .../block/radix_rank_sort_operations.hpp | 12 +- .../backend/rocprim/device/device_reduce.hpp | 13 +- .../device/device_segmented_reduce.hpp | 13 +- .../backend/rocprim/grid/grid_barrier.hpp | 6 +- .../backend/rocprim/grid/grid_even_share.hpp | 6 +- .../backend/rocprim/grid/grid_mapping.hpp | 4 +- .../backend/rocprim/grid/grid_queue.hpp | 6 +- .../iterator/arg_index_input_iterator.hpp | 52 +++-- .../cache_modified_input_iterator.hpp | 27 ++- .../cache_modified_output_iterator.hpp | 26 ++- .../iterator/constant_input_iterator.hpp | 44 ++-- .../iterator/counting_input_iterator.hpp | 45 ++-- .../iterator/discard_output_iterator.hpp | 37 ++-- .../rocprim/iterator/iterator_category.hpp | 74 +++++++ .../rocprim/iterator/iterator_wrapper.hpp | 203 ++++++++++++++++++ .../iterator/tex_obj_input_iterator.hpp | 36 ++-- .../iterator/tex_ref_input_iterator.hpp | 29 +-- .../iterator/transform_input_iterator.hpp | 58 +++-- .../backend/rocprim/thread/thread_load.hpp | 7 +- .../backend/rocprim/thread/thread_reduce.hpp | 4 +- .../backend/rocprim/thread/thread_search.hpp | 12 +- .../backend/rocprim/thread/thread_store.hpp | 5 +- .../hipcub/backend/rocprim/util_math.hpp | 4 +- .../hipcub/backend/rocprim/util_ptx.hpp | 8 +- .../hipcub/backend/rocprim/util_type.hpp | 40 ++-- .../block/block_adjacent_difference.hpp | 5 +- .../cache_modified_input_iterator.hpp | 3 +- .../cache_modified_output_iterator.hpp | 3 +- hipcub/include/hipcub/thread/thread_load.hpp | 3 +- .../include/hipcub/thread/thread_reduce.hpp | 3 +- hipcub/include/hipcub/thread/thread_store.hpp | 3 +- test/extra/CMakeLists.txt | 2 +- test/hipcub/CMakeLists.txt | 1 + .../test_hipcub_block_adjacent_difference.cpp | 2 - ...test_hipcub_device_adjacent_difference.cpp | 30 +-- test/hipcub/test_hipcub_device_histogram.cpp | 8 - .../test_hipcub_device_reduce_by_key.cpp | 9 +- test/hipcub/test_hipcub_device_scan.cpp | 8 +- .../test_hipcub_device_segmented_reduce.cpp | 167 ++++++++++++-- test/hipcub/test_hipcub_device_select.cpp | 8 +- test/hipcub/test_hipcub_thread.cpp | 100 ++++++--- test/hipcub/test_hipcub_warp_reduce.cpp | 29 ++- test/hipcub/test_hipcub_warp_scan.cpp | 55 +++-- test/hipcub/test_utils.hpp | 14 -- test/hipcub/test_utils_data_generation.hpp | 20 +- 60 files changed, 963 insertions(+), 397 deletions(-) create mode 100644 hipcub/include/hipcub/backend/rocprim/iterator/iterator_category.hpp create mode 100644 hipcub/include/hipcub/backend/rocprim/iterator/iterator_wrapper.hpp diff --git a/.clang-format b/.clang-format index bf4a9927..b181be48 100644 --- a/.clang-format +++ b/.clang-format @@ -41,7 +41,6 @@ AllowShortLoopsOnASingleLine: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: Yes -AttributeMacros: ['HIPCUB_DEVICE', 'HIPCUB_HOST', 'HIPCUB_HOST_DEVICE', 'HIPCUB_SHARED_MEMORY', 'HIPCUB_RUNTIME_FUNCTION'] BinPackArguments: false BinPackParameters: false BitFieldColonSpacing: Both @@ -133,4 +132,37 @@ SpacesInConditionalStatement: false SpacesInContainerLiterals: true SpacesInParentheses: false SpacesInSquareBrackets: false + +AttributeMacros: + - __host__ + - __device__ + - __global__ + - __forceinline__ + - __shared__ + - __launch_bounds__ + - HIPCUB_DEVICE + - HIPCUB_HOST + - HIPCUB_HOST_DEVICE + - HIPCUB_SHARED_MEMORY + - HIPCUB_RUNTIME_FUNCTION + - HIPCUB_DETAIL_DEPRECATED_DEBUG_SYNCHRONOUS + +# Trick clang into thinking that our C-style attributes are C++-style attributes +# Make sure that the sizes line up for linebreaks etc +Macros: + - __host__=[[host]] + - __device__=[[device]] + - __global__=[[global]] + - __forceinline__=[[forceinline]] + - __shared__=[[shared]] + - __launch_bounds__(x)=[[launch_bounds(x)]] + - __attribute__(x)=[[attribute(x)]] + - HIPCUB_DEVICE=[[DEVICE___]] + - HIPCUB_HOST=[[HOST___]] + - HIPCUB_HOST_DEVICE=[[HOST_DEVICE___]] + - HIPCUB_SHARED_MEMORY=[[SHARED_MEMORY___]] + - HIPCUB_RUNTIME_FUNCTION=[[RUNTIME_FUNCTION___]] + - HIPCUB_DETAIL_DEPRECATED_DEBUG_SYNCHRONOUS=[[DETAIL_DEPRECATED_DEBUG_SYNCHRONOUS___]] +BreakAfterAttributes: Always + --- diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b46dff11..35c5e529 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -27,6 +27,7 @@ include: - /defaults.yaml - /deps-cmake.yaml - /deps-docs.yaml + - /deps-format.yaml - /deps-rocm.yaml - /deps-nvcc.yaml - /gpus-rocm.yaml @@ -41,20 +42,7 @@ stages: clang-format: extends: - - .deps:rocm - stage: lint - needs: [] - tags: - - build - variables: - CLANG_FORMAT: "/opt/rocm/llvm/bin/clang-format" - GIT_CLANG_FORMAT: "/opt/rocm/llvm/bin/git-clang-format" - rules: - - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' - script: - - cd $CI_PROJECT_DIR - - git config --global --add safe.directory $CI_PROJECT_DIR - - scripts/code-format/check-format.sh $CI_MERGE_REQUEST_DIFF_BASE_SHA --binary "$CLANG_FORMAT" + - .lint:clang-format copyright-date: extends: diff --git a/CHANGELOG.md b/CHANGELOG.md index 43e746d8..04852253 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ Documentation for hipCUB is available at [https://rocm.docs.amd.com/projects/hipCUB/en/latest/](https://rocm.docs.amd.com/projects/hipCUB/en/latest/). +## (Unreleased) hipCUB-3.3.0 for ROCm 6.3.0 + +### Fixed + +* Not all headers in hipCUB included `config.hpp` which could have resulted in build errors. + +### Added +* Add support for large indices in `hipcub::DeviceSegmentedReduce::*`. rocPRIM's backend provides support for all reduce variants, but CUB's does not have support yet for `DeviceSegmentedReduce::Arg*`, so large indices support has been excluded for these as well in hipCUB. + +### Changed +* The NVIDIA backend now requires CUB, Thrust and libcu++ 2.3.2. If it is not found it will be downloaded from the NVIDIA CCCL repository. + ## (Unreleased) hipCUB-3.2.0 for ROCm 6.2.0 ### Added @@ -38,7 +50,7 @@ Documentation for hipCUB is available at by setting the `CUB_DEBUG_SYNC` (or higher debug level) or the `HIPCUB_DEBUG_SYNC` preprocessor definition. * The compile time deprecation warnings can be disabled by defining the `HIPCUB_IGNORE_DEPRECATED_API` preprocessor definition. -## (Unreleased) hipCUB-3.1.0 for ROCm 6.1.0 +## hipCUB-3.1.0 for ROCm 6.1.0 ### Changes diff --git a/CMakeLists.txt b/CMakeLists.txt index f5506c21..8c0ca193 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,7 +106,7 @@ if(BUILD_ADDRESS_SANITIZER) endif() # Setup VERSION -set(VERSION_STRING "3.2.0") +set(VERSION_STRING "3.3.0") rocm_setup_version(VERSION ${VERSION_STRING}) # Print configuration summary diff --git a/README.md b/README.md index adb777c4..26f902df 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ python3 -m http.server * Requires CMake 3.16.9 or later * For NVIDIA GPUs: * CUDA Toolkit - * CUB library + * CCCL library (>= 2.3.2) * Automatically downloaded and built by the CMake script * Requires CMake 3.15.0 or later * Python 3.6 or higher (for HIP on Windows only; this is only required for install scripts) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index edcfe835..5fce12d7 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -139,7 +139,7 @@ endif(USER_BUILD_BENCHMARK) # CUB (only for CUDA platform) if(HIP_COMPILER STREQUAL "nvcc") - set(CCCL_MINIMUM_VERSION 2.2.0) + set(CCCL_MINIMUM_VERSION 2.3.2) if(NOT DOWNLOAD_CUB) find_package(CUB ${CCCL_MINIMUM_VERSION} CONFIG) find_package(Thrust ${CCCL_MINIMUM_VERSION} CONFIG) diff --git a/hipcub/include/hipcub/backend/rocprim/block/block_histogram.hpp b/hipcub/include/hipcub/backend/rocprim/block/block_histogram.hpp index 5f2a701b..4a84cbc6 100644 --- a/hipcub/include/hipcub/backend/rocprim/block/block_histogram.hpp +++ b/hipcub/include/hipcub/backend/rocprim/block/block_histogram.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2022, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,8 @@ #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_ #define HIPCUB_ROCPRIM_BLOCK_BLOCK_HISTOGRAM_HPP_ +#include "../../../config.hpp" + #include "../util_ptx.hpp" #include diff --git a/hipcub/include/hipcub/backend/rocprim/block/block_load.hpp b/hipcub/include/hipcub/backend/rocprim/block/block_load.hpp index fab287c9..eff64c41 100644 --- a/hipcub/include/hipcub/backend/rocprim/block/block_load.hpp +++ b/hipcub/include/hipcub/backend/rocprim/block/block_load.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,14 +30,14 @@ #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_LOAD_HPP_ #define HIPCUB_ROCPRIM_BLOCK_BLOCK_LOAD_HPP_ -#include - #include "../../../config.hpp" #include #include "block_load_func.hpp" +#include + BEGIN_HIPCUB_NAMESPACE namespace detail diff --git a/hipcub/include/hipcub/backend/rocprim/block/block_merge_sort.hpp b/hipcub/include/hipcub/backend/rocprim/block/block_merge_sort.hpp index 87663162..f5a0fbac 100644 --- a/hipcub/include/hipcub/backend/rocprim/block/block_merge_sort.hpp +++ b/hipcub/include/hipcub/backend/rocprim/block/block_merge_sort.hpp @@ -1,6 +1,6 @@ /****************************************************************************** * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. -* Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. +* Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,6 +29,8 @@ #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_MERGE_SORT_HPP_ #define HIPCUB_ROCPRIM_BLOCK_BLOCK_MERGE_SORT_HPP_ +#include "../../../config.hpp" + #include "../thread/thread_sort.hpp" #include "../util_math.hpp" #include "../util_type.hpp" diff --git a/hipcub/include/hipcub/backend/rocprim/block/block_radix_rank.hpp b/hipcub/include/hipcub/backend/rocprim/block/block_radix_rank.hpp index 204ab6e7..2ba3cc9d 100644 --- a/hipcub/include/hipcub/backend/rocprim/block/block_radix_rank.hpp +++ b/hipcub/include/hipcub/backend/rocprim/block/block_radix_rank.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021-2022, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,8 +35,6 @@ #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_RADIX_RANK_HPP_ #define HIPCUB_ROCPRIM_BLOCK_BLOCK_RADIX_RANK_HPP_ -#include - #include "../../../config.hpp" #include "../../../util_type.hpp" #include "../../../util_ptx.hpp" @@ -48,6 +46,8 @@ #include +#include + BEGIN_HIPCUB_NAMESPACE namespace detail diff --git a/hipcub/include/hipcub/backend/rocprim/block/block_raking_layout.hpp b/hipcub/include/hipcub/backend/rocprim/block/block_raking_layout.hpp index 489e60b9..3f36f747 100644 --- a/hipcub/include/hipcub/backend/rocprim/block/block_raking_layout.hpp +++ b/hipcub/include/hipcub/backend/rocprim/block/block_raking_layout.hpp @@ -35,13 +35,13 @@ #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_RAKING_LAYOUT_HPP_ #define HIPCUB_ROCPRIM_BLOCK_BLOCK_RAKING_LAYOUT_HPP_ -#include - #include "../../../config.hpp" #include #include +#include + BEGIN_HIPCUB_NAMESPACE /** diff --git a/hipcub/include/hipcub/backend/rocprim/block/block_reduce.hpp b/hipcub/include/hipcub/backend/rocprim/block/block_reduce.hpp index fff843b3..e93a2598 100644 --- a/hipcub/include/hipcub/backend/rocprim/block/block_reduce.hpp +++ b/hipcub/include/hipcub/backend/rocprim/block/block_reduce.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,8 @@ #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_REDUCE_HPP_ #define HIPCUB_ROCPRIM_BLOCK_BLOCK_REDUCE_HPP_ +#include "../../../config.hpp" + #include #include diff --git a/hipcub/include/hipcub/backend/rocprim/block/block_scan.hpp b/hipcub/include/hipcub/backend/rocprim/block/block_scan.hpp index 57e4d1bc..6b7525f4 100644 --- a/hipcub/include/hipcub/backend/rocprim/block/block_scan.hpp +++ b/hipcub/include/hipcub/backend/rocprim/block/block_scan.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,14 +30,14 @@ #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_SCAN_HPP_ #define HIPCUB_ROCPRIM_BLOCK_BLOCK_SCAN_HPP_ -#include - #include "../../../config.hpp" #include "../thread/thread_operators.hpp" #include +#include + BEGIN_HIPCUB_NAMESPACE namespace detail diff --git a/hipcub/include/hipcub/backend/rocprim/block/block_shuffle.hpp b/hipcub/include/hipcub/backend/rocprim/block/block_shuffle.hpp index 835f4c63..43f7441c 100644 --- a/hipcub/include/hipcub/backend/rocprim/block/block_shuffle.hpp +++ b/hipcub/include/hipcub/backend/rocprim/block/block_shuffle.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,14 +30,14 @@ #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_SHUFFLE_HPP_ #define HIPCUB_ROCPRIM_BLOCK_BLOCK_SHUFFLE_HPP_ -#include - #include "../../../config.hpp" #include "../thread/thread_operators.hpp" #include +#include + BEGIN_HIPCUB_NAMESPACE diff --git a/hipcub/include/hipcub/backend/rocprim/block/block_store.hpp b/hipcub/include/hipcub/backend/rocprim/block/block_store.hpp index ae896b56..1c60e777 100644 --- a/hipcub/include/hipcub/backend/rocprim/block/block_store.hpp +++ b/hipcub/include/hipcub/backend/rocprim/block/block_store.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,14 +30,14 @@ #ifndef HIPCUB_ROCPRIM_BLOCK_BLOCK_STORE_HPP_ #define HIPCUB_ROCPRIM_BLOCK_BLOCK_STORE_HPP_ -#include - #include "../../../config.hpp" #include "block_store_func.hpp" #include +#include + BEGIN_HIPCUB_NAMESPACE namespace detail diff --git a/hipcub/include/hipcub/backend/rocprim/block/radix_rank_sort_operations.hpp b/hipcub/include/hipcub/backend/rocprim/block/radix_rank_sort_operations.hpp index b736a75e..23c7c963 100644 --- a/hipcub/include/hipcub/backend/rocprim/block/radix_rank_sort_operations.hpp +++ b/hipcub/include/hipcub/backend/rocprim/block/radix_rank_sort_operations.hpp @@ -1,6 +1,6 @@ /****************************************************************************** * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,13 +35,13 @@ #ifndef HIPCUB_ROCPRIM_BLOCK_RADIX_RANK_SORT_OPERATIONS_HPP_ #define HIPCUB_ROCPRIM_BLOCK_RADIX_RANK_SORT_OPERATIONS_HPP_ -#include - #include "../../../config.hpp" - #include - #include - #include +#include +#include +#include + +#include BEGIN_HIPCUB_NAMESPACE diff --git a/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp b/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp index 47755b68..3f550d6f 100644 --- a/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp +++ b/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp @@ -30,13 +30,8 @@ #ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_ #define HIPCUB_ROCPRIM_DEVICE_DEVICE_REDUCE_HPP_ -#include -#include - -#include // hip_bfloat16 -#include // __half - #include "../../../config.hpp" + #include "../../../util_deprecated.hpp" #include "../iterator/arg_index_input_iterator.hpp" #include "../thread/thread_operators.hpp" @@ -44,6 +39,12 @@ #include #include +#include // hip_bfloat16 +#include // __half + +#include +#include + BEGIN_HIPCUB_NAMESPACE namespace detail { diff --git a/hipcub/include/hipcub/backend/rocprim/device/device_segmented_reduce.hpp b/hipcub/include/hipcub/backend/rocprim/device/device_segmented_reduce.hpp index 63e5b9ec..f4e71afd 100644 --- a/hipcub/include/hipcub/backend/rocprim/device/device_segmented_reduce.hpp +++ b/hipcub/include/hipcub/backend/rocprim/device/device_segmented_reduce.hpp @@ -30,9 +30,6 @@ #ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_ #define HIPCUB_ROCPRIM_DEVICE_DEVICE_SEGMENTED_REDUCE_HPP_ -#include -#include - #include "../../../config.hpp" #include "../../../util_deprecated.hpp" @@ -44,6 +41,9 @@ #include +#include +#include + BEGIN_HIPCUB_NAMESPACE namespace detail @@ -77,8 +77,11 @@ __global__ __launch_bounds__( const unsigned int flat_id = ::rocprim::detail::block_thread_id<0>(); const unsigned int segment_id = ::rocprim::detail::block_id<0>(); - const unsigned int begin_offset = begin_offsets[segment_id]; - const unsigned int end_offset = end_offsets[segment_id]; + // Large indices need bigger offset type than unsigned int + using offset_type = typename std::iterator_traits::value_type; + + const offset_type begin_offset = begin_offsets[segment_id]; + const offset_type end_offset = end_offsets[segment_id]; // transform the segment output if(flat_id == 0) diff --git a/hipcub/include/hipcub/backend/rocprim/grid/grid_barrier.hpp b/hipcub/include/hipcub/backend/rocprim/grid/grid_barrier.hpp index 36ad189a..2f7d2062 100644 --- a/hipcub/include/hipcub/backend/rocprim/grid/grid_barrier.hpp +++ b/hipcub/include/hipcub/backend/rocprim/grid/grid_barrier.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,12 +30,12 @@ #ifndef HIPCUB_ROCPRIM_GRID_GRID_BARRIER_HPP_ #define HIPCUB_ROCPRIM_GRID_GRID_BARRIER_HPP_ -#include - #include "../../../config.hpp" #include "../../../thread/thread_load.hpp" +#include + BEGIN_HIPCUB_NAMESPACE /** diff --git a/hipcub/include/hipcub/backend/rocprim/grid/grid_even_share.hpp b/hipcub/include/hipcub/backend/rocprim/grid/grid_even_share.hpp index 91993b55..c6fd19b7 100644 --- a/hipcub/include/hipcub/backend/rocprim/grid/grid_even_share.hpp +++ b/hipcub/include/hipcub/backend/rocprim/grid/grid_even_share.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,12 +30,12 @@ #ifndef HIPCUB_ROCPRIM_GRID_GRID_EVEN_SHARE_HPP_ #define HIPCUB_ROCPRIM_GRID_GRID_EVEN_SHARE_HPP_ -#include - #include "../../../config.hpp" #include "../../../grid/grid_mapping.hpp" #include "../util_type.hpp" +#include + BEGIN_HIPCUB_NAMESPACE /** diff --git a/hipcub/include/hipcub/backend/rocprim/grid/grid_mapping.hpp b/hipcub/include/hipcub/backend/rocprim/grid/grid_mapping.hpp index ee99b248..b19f222a 100644 --- a/hipcub/include/hipcub/backend/rocprim/grid/grid_mapping.hpp +++ b/hipcub/include/hipcub/backend/rocprim/grid/grid_mapping.hpp @@ -29,11 +29,11 @@ #ifndef HIPCUB_ROCPRIM_GRID_GRID_MAPPING_HPP_ #define HIPCUB_ROCPRIM_GRID_GRID_MAPPING_HPP_ -#include - #include "../../../config.hpp" #include "../../../thread/thread_load.hpp" +#include + BEGIN_HIPCUB_NAMESPACE diff --git a/hipcub/include/hipcub/backend/rocprim/grid/grid_queue.hpp b/hipcub/include/hipcub/backend/rocprim/grid/grid_queue.hpp index ef8decf2..59b19495 100644 --- a/hipcub/include/hipcub/backend/rocprim/grid/grid_queue.hpp +++ b/hipcub/include/hipcub/backend/rocprim/grid/grid_queue.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021-2023, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,10 +30,10 @@ #ifndef HIPCUB_ROCPRIM_GRID_GRID_QUEUE_HPP_ #define HIPCUB_ROCPRIM_GRID_GRID_QUEUE_HPP_ -#include - #include "../../../config.hpp" +#include + BEGIN_HIPCUB_NAMESPACE /** diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/arg_index_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/arg_index_input_iterator.hpp index 06105f8d..3d068923 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/arg_index_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/arg_index_input_iterator.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,31 +30,49 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_ARG_INDEX_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_ARG_INDEX_INPUT_ITERATOR_HPP_ -#include -#include - #include "../../../config.hpp" +#include "iterator_category.hpp" +#include "iterator_wrapper.hpp" + #include -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION +#include BEGIN_HIPCUB_NAMESPACE -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +template::value_type> +class ArgIndexInputIterator + : public detail::IteratorWrapper< + rocprim::arg_index_iterator, + ArgIndexInputIterator> +{ + using Iterator = rocprim::arg_index_iterator; + using Base + = detail::IteratorWrapper>; + +public: + using iterator_category = typename detail::IteratorCategory::type; + using self_type = typename Iterator::self_type; + + __host__ __device__ __forceinline__ ArgIndexInputIterator( + InputIterator iterator, typename Iterator::difference_type offset = 0) + : Base(Iterator(iterator, offset)) + {} -template< - typename InputIterator, - typename Difference = std::ptrdiff_t, - typename Value = typename std::iterator_traits::value_type -> -using ArgIndexInputIterator = ::rocprim::arg_index_iterator; + // Cast from wrapped iterator to class itself + __host__ __device__ __forceinline__ explicit ArgIndexInputIterator(Iterator iterator) + : Base(iterator) + {} +}; -#endif +#endif // DOXYGEN_SHOULD_SKIP_THIS END_HIPCUB_NAMESPACE diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_input_iterator.hpp index 673db63b..d1bcbb92 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_input_iterator.hpp @@ -30,17 +30,14 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_CACHE_MODIFIED_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_CACHE_MODIFIED_INPUT_ITERATOR_HPP_ -#include -#include +#include "../../../config.hpp" #include "../thread/thread_load.hpp" #include "../util_type.hpp" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION +#include "iterator_category.hpp" + +#include BEGIN_HIPCUB_NAMESPACE @@ -51,14 +48,16 @@ template < class CacheModifiedInputIterator { public: - // Required iterator traits - typedef CacheModifiedInputIterator self_type; ///< My own type - typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another - typedef ValueType value_type; ///< The type of the element the iterator can point to - typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to - typedef ValueType reference; ///< The type of a reference to an element the iterator can point to - typedef std::random_access_iterator_tag iterator_category; ///< The iterator category + using self_type = CacheModifiedInputIterator; ///< My own type + using difference_type + = OffsetT; ///< Type to express the result of subtracting one iterator from another + using value_type = ValueType; ///< The type of the element the iterator can point to + using pointer = ValueType*; ///< The type of a pointer to an element the iterator can point to + using reference + = ValueType; ///< The type of a reference to an element the iterator can point to + using iterator_category = typename detail::IteratorCategory:: + type; ///< The iterator category public: diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_output_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_output_iterator.hpp index c9e7ebff..4798035c 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_output_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_output_iterator.hpp @@ -30,19 +30,15 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_CACHE_MODIFIED_OUTPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_CACHE_MODIFIED_OUTPUT_ITERATOR_HPP_ -#include -#include +#include "../../../config.hpp" #include "../thread/thread_load.hpp" #include "../thread/thread_store.hpp" #include "../util_type.hpp" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION +#include "iterator_category.hpp" +#include BEGIN_HIPCUB_NAMESPACE @@ -71,14 +67,16 @@ class CacheModifiedOutputIterator }; public: - // Required iterator traits - typedef CacheModifiedOutputIterator self_type; ///< My own type - typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another - typedef void value_type; ///< The type of the element the iterator can point to - typedef void pointer; ///< The type of a pointer to an element the iterator can point to - typedef Reference reference; ///< The type of a reference to an element the iterator can point to - typedef std::random_access_iterator_tag iterator_category; ///< The iterator category + using self_type = CacheModifiedOutputIterator; ///< My own type + using difference_type + = OffsetT; ///< Type to express the result of subtracting one iterator from another + using value_type = void; ///< The type of the element the iterator can point to + using pointer = void; ///< The type of a pointer to an element the iterator can point to + using reference + = Reference; ///< The type of a reference to an element the iterator can point to + using iterator_category = typename detail::IteratorCategory:: + type; ///< The iterator category private: diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/constant_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/constant_input_iterator.hpp index b428102c..7781b2f1 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/constant_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/constant_input_iterator.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,28 +30,42 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_CONSTANT_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_CONSTANT_INPUT_ITERATOR_HPP_ -#include -#include - #include "../../../config.hpp" +#include "iterator_category.hpp" +#include "iterator_wrapper.hpp" + #include -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION +#include BEGIN_HIPCUB_NAMESPACE -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +template +class ConstantInputIterator + : public detail::IteratorWrapper, + ConstantInputIterator> +{ + using Iterator = rocprim::constant_iterator; + using Base = detail::IteratorWrapper>; + +public: + using iterator_category = typename detail::IteratorCategory::type; + using self_type = typename Iterator::self_type; + + __host__ __device__ __forceinline__ ConstantInputIterator( + const typename Iterator::value_type value, const size_t index = 0) + : Base(Iterator(value, index)) + {} -template< - typename ValueType, - typename OffsetT = std::ptrdiff_t -> -using ConstantInputIterator = ::rocprim::constant_iterator; + // Cast from wrapped iterator to class itself + __host__ __device__ __forceinline__ explicit ConstantInputIterator(Iterator iterator) + : Base(iterator) + {} +}; #endif diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/counting_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/counting_input_iterator.hpp index 6eb62a78..007e2a02 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/counting_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/counting_input_iterator.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,28 +30,43 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_COUNTING_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_COUNTING_INPUT_ITERATOR_HPP_ -#include -#include - #include "../../../config.hpp" +#include "iterator_category.hpp" +#include "iterator_wrapper.hpp" + #include -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION +#include BEGIN_HIPCUB_NAMESPACE -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +template +class CountingInputIterator + : public detail::IteratorWrapper, + CountingInputIterator> +{ + using Iterator = rocprim::counting_iterator; + using Base + = detail::IteratorWrapper>; + +public: + using iterator_category = typename detail::IteratorCategory::type; + using self_type = typename Iterator::self_type; + + __host__ __device__ __forceinline__ CountingInputIterator( + const typename Iterator::value_type value) + : Base(Iterator(value)) + {} -template< - typename ValueType, - typename OffsetT = std::ptrdiff_t -> -using CountingInputIterator = ::rocprim::counting_iterator; + // Cast from wrapped iterator to class itself + __host__ __device__ __forceinline__ explicit CountingInputIterator(Iterator iterator) + : Base(iterator) + {} +}; #endif diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/discard_output_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/discard_output_iterator.hpp index 3254c4b1..4fbb5851 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/discard_output_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/discard_output_iterator.hpp @@ -30,17 +30,14 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_DISCARD_OUTPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_DISCARD_OUTPUT_ITERATOR_HPP_ +#include "../../../config.hpp" + +#include "iterator_category.hpp" + #include #include -#include "../../../config.hpp" - BEGIN_HIPCUB_NAMESPACE -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION /** * \addtogroup UtilIterator @@ -55,25 +52,15 @@ template class DiscardOutputIterator { public: - // Required iterator traits - typedef DiscardOutputIterator self_type; ///< My own type - typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another - typedef void value_type; ///< The type of the element the iterator can point to - typedef void pointer; ///< The type of a pointer to an element the iterator can point to - typedef void reference; ///< The type of a reference to an element the iterator can point to - -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename thrust::detail::iterator_facade_category< - thrust::any_system_tag, - thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else - typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION + using self_type = DiscardOutputIterator; ///< My own type + using difference_type + = OffsetT; ///< Type to express the result of subtracting one iterator from another + using value_type = void; ///< The type of the element the iterator can point to + using pointer = void; ///< The type of a pointer to an element the iterator can point to + using reference = void; ///< The type of a reference to an element the iterator can point to + using iterator_category = + typename detail::IteratorCategory::type; ///< The iterator category private: diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/iterator_category.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/iterator_category.hpp new file mode 100644 index 00000000..8ec573b3 --- /dev/null +++ b/hipcub/include/hipcub/backend/rocprim/iterator/iterator_category.hpp @@ -0,0 +1,74 @@ +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef HIPCUB_ROCPRIM_ITERATOR_CATEGORY_HPP +#define HIPCUB_ROCPRIM_ITERATOR_CATEGORY_HPP + +#include "../../../config.hpp" + +#if(THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include + +BEGIN_HIPCUB_NAMESPACE + +namespace detail +{ + +// Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods +template +struct IteratorCategory +{ + using system_tag + = std::conditional::type; + using type = + typename thrust::detail::iterator_facade_category::type; +}; + +} // namespace detail + +END_HIPCUB_NAMESPACE + +#else + + #include + +BEGIN_HIPCUB_NAMESPACE + +namespace detail +{ + +template +struct IteratorCategory +{ + using type = typename std::random_access_iterator_tag; +}; + +} // namespace detail + +END_HIPCUB_NAMESPACE + +#endif // THRUST_VERSION + +#endif // HIPCUB_ROCPRIM_ITERATOR_CATEGORY_HPP diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/iterator_wrapper.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/iterator_wrapper.hpp new file mode 100644 index 00000000..98fe3fd3 --- /dev/null +++ b/hipcub/include/hipcub/backend/rocprim/iterator/iterator_wrapper.hpp @@ -0,0 +1,203 @@ +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef HIPCUB_ROCPRIM_WRAPPER_ITERATOR_HPP_ +#define HIPCUB_ROCPRIM_WRAPPER_ITERATOR_HPP_ + +#include "../../../config.hpp" + +#include + +BEGIN_HIPCUB_NAMESPACE + +namespace detail +{ + +/// \class IteratorWrapper +/// \brief A wrapper for iterators to be able to make iterator_traits overwritable +/// +/// \tparam WrappedIterator - the iterator that is wrapped +/// \tparam DerivedIterator - the iterator that this base class inherits +template +class IteratorWrapper +{ +public: + using value_type = typename WrappedIterator::value_type; + using reference = typename WrappedIterator::reference; + using pointer = typename WrappedIterator::pointer; + using difference_type = typename WrappedIterator::difference_type; + using iterator_category = typename WrappedIterator::iterator_category; + + WrappedIterator iterator_; + + __host__ __device__ __forceinline__ IteratorWrapper(WrappedIterator iterator) + : iterator_(iterator) + {} + +private: + __host__ __device__ __forceinline__ + DerivedIterator& derived() + { + return static_cast(*this); + } + +public: + __host__ __device__ __forceinline__ + DerivedIterator& + operator++() + { + iterator_++; + return derived(); + } + + __host__ __device__ __forceinline__ + DerivedIterator + operator++(int) + { + DerivedIterator old_ci = derived(); + iterator_++; + return old_ci; + } + + __host__ __device__ __forceinline__ + DerivedIterator& + operator--() + { + iterator_--; + return derived(); + } + + __host__ __device__ __forceinline__ + DerivedIterator + operator--(int) + { + DerivedIterator old_ci = derived(); + iterator_--; + return old_ci; + } + + __host__ __device__ __forceinline__ + value_type + operator*() const + { + return iterator_.operator*(); + } + + __host__ __device__ __forceinline__ + pointer + operator->() const + { + return iterator_.operator->(); + } + + __host__ __device__ __forceinline__ + value_type + operator[](difference_type distance) const + { + return iterator_[distance]; + } + + __host__ __device__ __forceinline__ + DerivedIterator + operator+(difference_type distance) const + { + return DerivedIterator(iterator_ + distance); + } + + __host__ __device__ __forceinline__ + DerivedIterator& + operator+=(difference_type distance) + { + iterator_ += distance; + return derived(); + } + + __host__ __device__ __forceinline__ + DerivedIterator + operator-(difference_type distance) const + { + return DerivedIterator(iterator_ - distance); + } + + __host__ __device__ __forceinline__ + DerivedIterator& + operator-=(difference_type distance) + { + iterator_ -= distance; + return derived(); + } + + __host__ __device__ __forceinline__ + difference_type + operator-(DerivedIterator other) const + { + return iterator_.operator-(other.iterator_); + } + + __host__ __device__ __forceinline__ + bool operator==(DerivedIterator other) const + { + return iterator_ == other.iterator_; + } + + __host__ __device__ __forceinline__ + bool operator!=(DerivedIterator other) const + { + return iterator_ != other.iterator_; + } + + __host__ __device__ __forceinline__ + bool operator<(DerivedIterator other) const + { + return iterator_ < other.iterator_; + } + + __host__ __device__ __forceinline__ + bool operator<=(DerivedIterator other) const + { + return iterator_ <= other.iterator_; + } + + __host__ __device__ __forceinline__ + bool operator>(DerivedIterator other) const + { + return iterator_ > other.iterator_; + } + + __host__ __device__ __forceinline__ + bool operator>=(DerivedIterator other) const + { + return iterator_ >= other.iterator_; + } + + [[deprecated]] + friend std::ostream& + operator<<(std::ostream& os, const DerivedIterator& iter) + { + os << iter.iterator_; + return os; + } +}; + +} // namespace detail + +END_HIPCUB_NAMESPACE + +#endif // HIPCUB_ROCPRIM_WRAPPER_ITERATOR_HPP_ diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp index 7a2ea482..7a4a21c3 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,20 +30,14 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_TEX_OBJ_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_TEX_OBJ_INPUT_ITERATOR_HPP_ -#include -#include - #include "../../../config.hpp" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION - +#include "iterator_category.hpp" #include +#include + BEGIN_HIPCUB_NAMESPACE template< @@ -52,7 +46,12 @@ template< > class TexObjInputIterator : public ::rocprim::texture_cache_iterator { - public: +public: + using iterator_category = typename detail::IteratorCategory< + typename rocprim::texture_cache_iterator::value_type, + typename rocprim::texture_cache_iterator::reference, + false>::type; ///< The iterator category + template inline hipError_t BindTexture(Qualified* ptr, @@ -70,17 +69,14 @@ class TexObjInputIterator : public ::rocprim::texture_cache_iterator HIPCUB_HOST_DEVICE inline ~TexObjInputIterator() = default; - HIPCUB_HOST_DEVICE inline - TexObjInputIterator() : ::rocprim::texture_cache_iterator() - { - } + HIPCUB_HOST_DEVICE inline TexObjInputIterator() + : ::rocprim::texture_cache_iterator() + {} - HIPCUB_HOST_DEVICE inline - TexObjInputIterator(const ::rocprim::texture_cache_iterator other) + HIPCUB_HOST_DEVICE inline TexObjInputIterator( + const ::rocprim::texture_cache_iterator other) : ::rocprim::texture_cache_iterator(other) - { - } - + {} }; END_HIPCUB_NAMESPACE diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp index 016d866f..256efe5e 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,18 +30,14 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_TEX_REF_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_TEX_REF_INPUT_ITERATOR_HPP_ -#include -#include - #include "../../../config.hpp" -#if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION +#include "iterator_category.hpp" #include +#include + BEGIN_HIPCUB_NAMESPACE template< @@ -51,14 +47,19 @@ template< > class TexRefInputIterator : public ::rocprim::texture_cache_iterator { - public: +public: + using iterator_category = typename detail::IteratorCategory< + typename rocprim::texture_cache_iterator::value_type, + typename rocprim::texture_cache_iterator::reference, + false>::type; ///< The iterator category + template - inline - hipError_t BindTexture(Qualified* ptr, - size_t bytes = size_t(-1), - size_t texture_offset = 0) + inline hipError_t + BindTexture(Qualified* ptr, size_t bytes = size_t(-1), size_t texture_offset = 0) { - return ::rocprim::texture_cache_iterator::bind_texture(ptr, bytes, texture_offset); + return ::rocprim::texture_cache_iterator::bind_texture(ptr, + bytes, + texture_offset); } inline hipError_t UnbindTexture() diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/transform_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/transform_input_iterator.hpp index 0800be78..6be09de4 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/transform_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/transform_input_iterator.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,31 +30,53 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_TRANSFORM_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_TRANSFORM_INPUT_ITERATOR_HPP_ -#include -#include - #include "../../../config.hpp" -#include +#include "iterator_category.hpp" +#include "iterator_wrapper.hpp" -#if (THRUST_VERSION >= 100700) - // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION +#include +#include +#include +#include +#include BEGIN_HIPCUB_NAMESPACE -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +template +class TransformInputIterator + : public detail::IteratorWrapper< + rocprim::transform_iterator, + TransformInputIterator> +{ + using Iterator = rocprim::transform_iterator; + using Base = detail::IteratorWrapper< + Iterator, + TransformInputIterator>; + +public: + using iterator_category = typename detail::IteratorCategory::type; + using self_type = typename Iterator::self_type; + using unary_function = typename Iterator::unary_function; + + __host__ __device__ __forceinline__ TransformInputIterator(InputIteratorT iterator, + ConversionOp transform) + : Base(Iterator(iterator, transform)) + {} -template< - typename ValueType, - typename ConversionOp, - typename InputIteratorT, - typename OffsetT = std::ptrdiff_t // ignored -> -using TransformInputIterator = ::rocprim::transform_iterator; + // Cast from wrapped iterator to class itself + __host__ __device__ __forceinline__ explicit TransformInputIterator(Iterator iterator) + : Base(iterator) + {} +}; #endif diff --git a/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp b/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp index cba30173..1e86606d 100644 --- a/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp +++ b/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,6 +29,9 @@ #ifndef HIPCUB_ROCPRIM_THREAD_THREAD_LOAD_HPP_ #define HIPCUB_ROCPRIM_THREAD_THREAD_LOAD_HPP_ + +#include "../../../config.hpp" + BEGIN_HIPCUB_NAMESPACE enum CacheLoadModifier : int32_t @@ -45,7 +48,7 @@ enum CacheLoadModifier : int32_t template HIPCUB_DEVICE __forceinline__ T AsmThreadLoad(void * ptr) { - T retval = 0; + T retval; __builtin_memcpy(&retval, ptr, sizeof(T)); return retval; } diff --git a/hipcub/include/hipcub/backend/rocprim/thread/thread_reduce.hpp b/hipcub/include/hipcub/backend/rocprim/thread/thread_reduce.hpp index 7d3674f5..04debc43 100644 --- a/hipcub/include/hipcub/backend/rocprim/thread/thread_reduce.hpp +++ b/hipcub/include/hipcub/backend/rocprim/thread/thread_reduce.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,8 @@ #ifndef HIPCUB_ROCPRIM_THREAD_THREAD_REDUCE_HPP_ #define HIPCUB_ROCPRIM_THREAD_THREAD_REDUCE_HPP_ +#include "../../../config.hpp" + BEGIN_HIPCUB_NAMESPACE /// Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations) diff --git a/hipcub/include/hipcub/backend/rocprim/thread/thread_search.hpp b/hipcub/include/hipcub/backend/rocprim/thread/thread_search.hpp index ea3a9460..f383d0b4 100644 --- a/hipcub/include/hipcub/backend/rocprim/thread/thread_search.hpp +++ b/hipcub/include/hipcub/backend/rocprim/thread/thread_search.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,12 +27,14 @@ * ******************************************************************************/ - #ifndef HIBCUB_ROCPRIM_THREAD_THREAD_SEARCH_HPP_ - #define HIBCUB_ROCPRIM_THREAD_THREAD_SEARCH_HPP_ +#ifndef HIBCUB_ROCPRIM_THREAD_THREAD_SEARCH_HPP_ +#define HIBCUB_ROCPRIM_THREAD_THREAD_SEARCH_HPP_ - #include +#include "../../../config.hpp" - BEGIN_HIPCUB_NAMESPACE +#include + +BEGIN_HIPCUB_NAMESPACE #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document diff --git a/hipcub/include/hipcub/backend/rocprim/thread/thread_store.hpp b/hipcub/include/hipcub/backend/rocprim/thread/thread_store.hpp index 3108feee..f59e70b9 100644 --- a/hipcub/include/hipcub/backend/rocprim/thread/thread_store.hpp +++ b/hipcub/include/hipcub/backend/rocprim/thread/thread_store.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,6 +29,9 @@ #ifndef HIPCUB_ROCPRIM_THREAD_THREAD_STORE_HPP_ #define HIPCUB_ROCPRIM_THREAD_THREAD_STORE_HPP_ + +#include "../../../config.hpp" + BEGIN_HIPCUB_NAMESPACE enum CacheStoreModifier diff --git a/hipcub/include/hipcub/backend/rocprim/util_math.hpp b/hipcub/include/hipcub/backend/rocprim/util_math.hpp index 1d466cd8..cafeb2fa 100644 --- a/hipcub/include/hipcub/backend/rocprim/util_math.hpp +++ b/hipcub/include/hipcub/backend/rocprim/util_math.hpp @@ -1,6 +1,6 @@ /****************************************************************************** * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,6 +29,8 @@ #ifndef HIPCUB_ROCPRIM_UTIL_MATH_HPP_ #define HIPCUB_ROCPRIM_UTIL_MATH_HPP_ +#include "../../config.hpp" + /** * \file * Define helper math functions. diff --git a/hipcub/include/hipcub/backend/rocprim/util_ptx.hpp b/hipcub/include/hipcub/backend/rocprim/util_ptx.hpp index 7f66cf80..2fb74f2d 100644 --- a/hipcub/include/hipcub/backend/rocprim/util_ptx.hpp +++ b/hipcub/include/hipcub/backend/rocprim/util_ptx.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2023, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,14 +30,14 @@ #ifndef HIPCUB_ROCPRIM_UTIL_PTX_HPP_ #define HIPCUB_ROCPRIM_UTIL_PTX_HPP_ -#include -#include - #include "../../config.hpp" #include "util_type.hpp" #include +#include +#include + BEGIN_HIPCUB_NAMESPACE // Missing compared to CUB: diff --git a/hipcub/include/hipcub/backend/rocprim/util_type.hpp b/hipcub/include/hipcub/backend/rocprim/util_type.hpp index 494994aa..b3f89a06 100644 --- a/hipcub/include/hipcub/backend/rocprim/util_type.hpp +++ b/hipcub/include/hipcub/backend/rocprim/util_type.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021-2023, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,17 +30,18 @@ #ifndef HIPCUB_ROCPRIM_UTIL_TYPE_HPP_ #define HIPCUB_ROCPRIM_UTIL_TYPE_HPP_ -#include -#include - #include "../../config.hpp" #include +#include #include #include #include +#include +#include + BEGIN_HIPCUB_NAMESPACE #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document @@ -454,15 +455,16 @@ struct BaseTraits NULL_TYPE = false, }; + using key_codec = rocprim::radix_key_codec; static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key; + return key_codec::encode(rocprim::detail::bit_cast(key)); } static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key; + return key_codec::decode(rocprim::detail::bit_cast(key)); } static HIPCUB_HOST_DEVICE __forceinline__ T Max() @@ -502,14 +504,16 @@ struct BaseTraits NULL_TYPE = false, }; + using key_codec = rocprim::radix_key_codec; + static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key ^ HIGH_BIT; + return key_codec::encode(rocprim::detail::bit_cast(key)); }; static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key ^ HIGH_BIT; + return key_codec::decode(rocprim::detail::bit_cast(key)); }; static HIPCUB_HOST_DEVICE __forceinline__ T Max() @@ -593,6 +597,8 @@ struct BaseTraits static const UnsignedBits LOWEST_KEY = UnsignedBits(-1); static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; + using key_codec = rocprim::radix_key_codec; + enum { PRIMITIVE = true, @@ -601,14 +607,12 @@ struct BaseTraits static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT; - return key ^ mask; + return key_codec::encode(rocprim::detail::bit_cast(key)); }; static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1); - return key ^ mask; + return key_codec::decode(rocprim::detail::bit_cast(key)); }; static HIPCUB_HOST_DEVICE __forceinline__ T Max() { @@ -655,14 +659,16 @@ struct NumericTraits<__uint128_t> static constexpr bool PRIMITIVE = false; static constexpr bool NULL_TYPE = false; + using key_codec = rocprim::radix_key_codec; + static __host__ __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key; + return key_codec::encode(rocprim::detail::bit_cast(key)); } static __host__ __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key; + return key_codec::decode(rocprim::detail::bit_cast(key)); } static __host__ __device__ __forceinline__ T Max() @@ -690,14 +696,16 @@ struct NumericTraits<__int128_t> static constexpr bool PRIMITIVE = false; static constexpr bool NULL_TYPE = false; + using key_codec = rocprim::radix_key_codec; + static __host__ __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key ^ HIGH_BIT; + return key_codec::encode(rocprim::detail::bit_cast(key)); }; static __host__ __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key ^ HIGH_BIT; + return key_codec::decode(rocprim::detail::bit_cast(key)); }; static __host__ __device__ __forceinline__ T Max() diff --git a/hipcub/include/hipcub/block/block_adjacent_difference.hpp b/hipcub/include/hipcub/block/block_adjacent_difference.hpp index e038d030..122b9f8f 100644 --- a/hipcub/include/hipcub/block/block_adjacent_difference.hpp +++ b/hipcub/include/hipcub/block/block_adjacent_difference.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,8 @@ #ifndef HIPCUB_BLOCK_BLOCK_AJACENT_DIFFERENCE_HPP_ #define HIPCUB_BLOCK_BLOCK_AJACENT_DIFFERENCE_HPP_ +#include "../config.hpp" + BEGIN_HIPCUB_NAMESPACE namespace detail @@ -58,7 +60,6 @@ END_HIPCUB_NAMESPACE #ifdef __HIP_PLATFORM_AMD__ #include "../backend/rocprim/block/block_adjacent_difference.hpp" #elif defined(__HIP_PLATFORM_NVIDIA__) - #include "../config.hpp" #include #endif diff --git a/hipcub/include/hipcub/iterator/cache_modified_input_iterator.hpp b/hipcub/include/hipcub/iterator/cache_modified_input_iterator.hpp index b18a0053..debdce3d 100644 --- a/hipcub/include/hipcub/iterator/cache_modified_input_iterator.hpp +++ b/hipcub/include/hipcub/iterator/cache_modified_input_iterator.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,6 @@ #define HIPCUB_ITERATOR_CACHE_MODIFIED_INPUT_HPP_ #ifdef __HIP_PLATFORM_AMD__ - #include "../config.hpp" #include "../backend/rocprim/iterator/cache_modified_input_iterator.hpp" #elif defined(__HIP_PLATFORM_NVIDIA__) #include "../config.hpp" diff --git a/hipcub/include/hipcub/iterator/cache_modified_output_iterator.hpp b/hipcub/include/hipcub/iterator/cache_modified_output_iterator.hpp index bd4160b9..c13d8379 100644 --- a/hipcub/include/hipcub/iterator/cache_modified_output_iterator.hpp +++ b/hipcub/include/hipcub/iterator/cache_modified_output_iterator.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,6 @@ #define HIPCUB_ITERATOR_CACHE_MODIFIED_OUTPUT_HPP_ #ifdef __HIP_PLATFORM_AMD__ - #include "../config.hpp" #include "../backend/rocprim/iterator/cache_modified_output_iterator.hpp" #elif defined(__HIP_PLATFORM_NVIDIA__) #include "../config.hpp" diff --git a/hipcub/include/hipcub/thread/thread_load.hpp b/hipcub/include/hipcub/thread/thread_load.hpp index fb6a6ce7..89b4b8b3 100644 --- a/hipcub/include/hipcub/thread/thread_load.hpp +++ b/hipcub/include/hipcub/thread/thread_load.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,6 @@ #define HIPCUB_THREAD_LOAD_USE_CACHE_MODIFIERS 1 #endif - #include "../config.hpp" #include "../backend/rocprim/thread/thread_load.hpp" #elif defined(__HIP_PLATFORM_NVIDIA__) #include "../config.hpp" diff --git a/hipcub/include/hipcub/thread/thread_reduce.hpp b/hipcub/include/hipcub/thread/thread_reduce.hpp index 92c0016d..a5d155a3 100644 --- a/hipcub/include/hipcub/thread/thread_reduce.hpp +++ b/hipcub/include/hipcub/thread/thread_reduce.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,6 @@ #define HIPCUB_THREAD_THREAD_REDUCE_HPP_ #ifdef __HIP_PLATFORM_AMD__ - #include "../config.hpp" #include "../backend/rocprim/thread/thread_reduce.hpp" #elif defined(__HIP_PLATFORM_NVIDIA__) #include "../config.hpp" diff --git a/hipcub/include/hipcub/thread/thread_store.hpp b/hipcub/include/hipcub/thread/thread_store.hpp index b7b92dd1..df61aaa6 100644 --- a/hipcub/include/hipcub/thread/thread_store.hpp +++ b/hipcub/include/hipcub/thread/thread_store.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,6 @@ #define HIPCUB_THREAD_STORE_USE_CACHE_MODIFIERS 1 #endif - #include "../config.hpp" #include "../backend/rocprim/thread/thread_store.hpp" #elif defined(__HIP_PLATFORM_NVIDIA__) #include "../config.hpp" diff --git a/test/extra/CMakeLists.txt b/test/extra/CMakeLists.txt index 5602171e..6440a20a 100644 --- a/test/extra/CMakeLists.txt +++ b/test/extra/CMakeLists.txt @@ -42,7 +42,7 @@ include(VerifyCompiler) # CUB (only for CUDA platform) if(HIP_COMPILER STREQUAL "nvcc") - set(CCCL_MINIMUM_VERSION 2.2.0) + set(CCCL_MINIMUM_VERSION 2.3.2) if(NOT DOWNLOAD_CUB) find_package(CUB ${CCCL_MINIMUM_VERSION} CONFIG) find_package(Thrust ${CCCL_MINIMUM_VERSION} CONFIG) diff --git a/test/hipcub/CMakeLists.txt b/test/hipcub/CMakeLists.txt index 1afe40a7..e0c5fa55 100644 --- a/test/hipcub/CMakeLists.txt +++ b/test/hipcub/CMakeLists.txt @@ -228,5 +228,6 @@ add_hipcub_test("hipcub.WarpReduce" test_hipcub_warp_reduce.cpp) add_hipcub_test("hipcub.WarpScan" test_hipcub_warp_scan.cpp) add_hipcub_test("hipcub.WarpStore" test_hipcub_warp_store.cpp) add_hipcub_test("hipcub.Iterators" test_hipcub_iterators.cpp) +add_hipcub_test("hipcub.Thread" test_hipcub_thread.cpp) add_hipcub_test("hipcub.ThreadOperators" test_hipcub_thread_operators.cpp) add_hipcub_test("hipcub.ThreadSort" test_hipcub_thread_sort.cpp) diff --git a/test/hipcub/test_hipcub_block_adjacent_difference.cpp b/test/hipcub/test_hipcub_block_adjacent_difference.cpp index d845e4c6..c4d71284 100644 --- a/test/hipcub/test_hipcub_block_adjacent_difference.cpp +++ b/test/hipcub/test_hipcub_block_adjacent_difference.cpp @@ -300,10 +300,8 @@ struct custom_op2 typedef ::testing::Types, params_subtract, params_subtract, -#ifndef __HIP_PLATFORM_NVIDIA__ params_subtract, params_subtract, -#endif params_subtract, params_subtract, diff --git a/test/hipcub/test_hipcub_device_adjacent_difference.cpp b/test/hipcub/test_hipcub_device_adjacent_difference.cpp index 35162ae7..90c12aea 100644 --- a/test/hipcub/test_hipcub_device_adjacent_difference.cpp +++ b/test/hipcub/test_hipcub_device_adjacent_difference.cpp @@ -27,6 +27,7 @@ #include "hipcub/iterator/counting_input_iterator.hpp" #include "hipcub/iterator/discard_output_iterator.hpp" #include "hipcub/iterator/transform_input_iterator.hpp" +#include "test_utils.hpp" #include "test_utils_data_generation.hpp" template @@ -134,14 +135,9 @@ typedef ::testing::Types, params, params, params, - params -#ifndef __HIP_PLATFORM_NVIDIA__ - , - // Kernel doesn't work on NVidia. + params, params, - params -#endif - > + params> Params; std::vector get_sizes() @@ -152,24 +148,6 @@ std::vector get_sizes() return sizes; } -std::vector get_large_sizes(int seed_value) -{ - // clang-format off - std::vector sizes = { - (size_t{1} << 32) - 1, size_t{1} << 32, - (size_t{1} << 35) - 1, size_t{1} << 35 - }; - // clang-format on - const std::vector random_sizes - = test_utils::get_random_data(2, - (size_t{1} << 30) + 1, - (size_t{1} << 35) - 2, - seed_value); - sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); - std::sort(sizes.begin(), sizes.end()); - return sizes; -} - TYPED_TEST_SUITE(HipcubDeviceAdjacentDifference, Params); TYPED_TEST(HipcubDeviceAdjacentDifference, SubtractLeftCopy) @@ -386,7 +364,7 @@ TYPED_TEST(HipcubDeviceAdjacentDifferenceLargeTests, LargeIndicesAndOpOnce) = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - const std::vector sizes = get_large_sizes(seed_value); + const std::vector sizes = test_utils::get_large_sizes(seed_value); for(const auto size : sizes) { diff --git a/test/hipcub/test_hipcub_device_histogram.cpp b/test/hipcub/test_hipcub_device_histogram.cpp index 07d7b4d8..508cffd4 100644 --- a/test/hipcub/test_hipcub_device_histogram.cpp +++ b/test/hipcub/test_hipcub_device_histogram.cpp @@ -135,9 +135,7 @@ typedef ::testing::Types, params1, params1, params1, -#ifndef __HIP_PLATFORM_NVIDIA__ params1, -#endif params1, params1, params1> @@ -347,9 +345,7 @@ typedef ::testing::Types< params2, params2, params2, -#ifndef __HIP_PLATFORM_NVIDIA__ params2, -#endif params2, params2> Params2; @@ -574,9 +570,7 @@ typedef ::testing::Types, params3, params3, params3, -#ifndef __HIP_PLATFORM_NVIDIA__ params3, -#endif params3, params3, params3> @@ -871,9 +865,7 @@ typedef ::testing::Types< params4, params4, params4, -#ifndef __HIP_PLATFORM_NVIDIA__ params4, -#endif params4, params4> Params4; diff --git a/test/hipcub/test_hipcub_device_reduce_by_key.cpp b/test/hipcub/test_hipcub_device_reduce_by_key.cpp index 57a6537c..43042e56 100644 --- a/test/hipcub/test_hipcub_device_reduce_by_key.cpp +++ b/test/hipcub/test_hipcub_device_reduce_by_key.cpp @@ -62,15 +62,10 @@ typedef ::testing::Types< params, params, params, - params -#ifdef __HIP_PLATFORM_AMD__ - , - // Kernel doesn't work on NVidia. + params, // Sum for half and bfloat will result in values too big due to limited range. params, - params -#endif - > + params> Params; TYPED_TEST_SUITE(HipcubDeviceReduceByKey, Params); diff --git a/test/hipcub/test_hipcub_device_scan.cpp b/test/hipcub/test_hipcub_device_scan.cpp index 6bdbac38..0fbe5c0c 100644 --- a/test/hipcub/test_hipcub_device_scan.cpp +++ b/test/hipcub/test_hipcub_device_scan.cpp @@ -66,13 +66,9 @@ typedef ::testing::Types, DeviceScanParams, DeviceScanParams, DeviceScanParams, - DeviceScanParams -#ifdef __HIP_PLATFORM_AMD__ - , + DeviceScanParams, DeviceScanParams, - DeviceScanParams -#endif - > + DeviceScanParams> HipcubDeviceScanTestsParams; std::vector get_sizes() diff --git a/test/hipcub/test_hipcub_device_segmented_reduce.cpp b/test/hipcub/test_hipcub_device_segmented_reduce.cpp index 3cf7c293..ab6a58a5 100644 --- a/test/hipcub/test_hipcub_device_segmented_reduce.cpp +++ b/test/hipcub/test_hipcub_device_segmented_reduce.cpp @@ -23,11 +23,12 @@ #include "common_test_header.hpp" // Thread operators fixes for extended float types +#include "test_utils_data_generation.hpp" #include "test_utils_thread_operators.hpp" // hipcub API #include "hipcub/device/device_segmented_reduce.hpp" -#include "test_utils_data_generation.hpp" +#include "hipcub/iterator/counting_input_iterator.hpp" std::vector get_sizes() { @@ -67,19 +68,16 @@ class HipcubDeviceSegmentedReduceOp : public ::testing::Test { using params = Params; }; -typedef ::testing::Types, - params1, - params1, - params1, - params1, - params1, - params1 -#ifdef __HIP_PLATFORM_AMD__ - , - params1, - params1 -#endif - > +typedef ::testing::Types< + params1, + params1, + params1, + params1, + params1, + params1, + params1, + params1, + params1> Params1; TYPED_TEST_SUITE(HipcubDeviceSegmentedReduceOp, Params1); @@ -783,13 +781,13 @@ TYPED_TEST(HipcubDeviceSegmentedReduce, ArgMax) } template -class HipcubDeviceReduceArgMinMaxSpecialTests : public testing::Test +class HipcubDeviceSegmentedReduceArgMinMaxSpecialTests : public testing::Test {}; -using HipcubDeviceReduceArgMinMaxSpecialTestsParams +using HipcubDeviceSegmentedReduceArgMinMaxSpecialTestsParams = ::testing::Types; -TYPED_TEST_SUITE(HipcubDeviceReduceArgMinMaxSpecialTests, - HipcubDeviceReduceArgMinMaxSpecialTestsParams); +TYPED_TEST_SUITE(HipcubDeviceSegmentedReduceArgMinMaxSpecialTests, + HipcubDeviceSegmentedReduceArgMinMaxSpecialTestsParams); template void test_argminmax_allinf(TypeParam value, TypeParam empty_value) @@ -912,7 +910,7 @@ void test_argminmax_allinf(TypeParam value, TypeParam empty_value) // TODO: enable for NVIDIA platform once CUB backend incorporates fix #ifdef __HIP_PLATFORM_AMD__ /// ArgMin with all +Inf should result in +Inf. -TYPED_TEST(HipcubDeviceReduceArgMinMaxSpecialTests, ReduceArgMinInf) +TYPED_TEST(HipcubDeviceSegmentedReduceArgMinMaxSpecialTests, ReduceArgMinInf) { test_argminmax_allinf( test_utils::numeric_limits::infinity(), @@ -920,10 +918,139 @@ TYPED_TEST(HipcubDeviceReduceArgMinMaxSpecialTests, ReduceArgMinInf) } /// ArgMax with all -Inf should result in -Inf. -TYPED_TEST(HipcubDeviceReduceArgMinMaxSpecialTests, ReduceArgMaxInf) +TYPED_TEST(HipcubDeviceSegmentedReduceArgMinMaxSpecialTests, ReduceArgMaxInf) { test_argminmax_allinf( test_utils::numeric_limits::infinity_neg(), test_utils::numeric_limits::lowest()); } #endif // __HIP_PLATFORM_AMD__ + +// --------------------------------------------------------- +// Test for large indices +// --------------------------------------------------------- + +TEST(HipcubDeviceSegmentedReduceLargeIndicesTests, LargeIndices) +{ + int device_id = test_common_utils::obtain_device_from_ctest(); + SCOPED_TRACE(testing::Message() << "with device_id= " << device_id); + HIP_CHECK(hipSetDevice(device_id)); + + using T = size_t; + using input_type = T; + using output_type = T; + using IteratorType = hipcub::CountingInputIterator; + using reduce_op_type = typename hipcub::Sum; + using offset_type = T; + + const input_type init = input_type(0); + reduce_op_type reduce_op; + + static constexpr hipStream_t stream = 0; // default + + for(size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) + { + unsigned int seed_value + = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = test_utils::get_large_sizes(seed_value); + + for(const auto size : sizes) + { + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + const size_t min_segment_length = size_t{1} << 31; + const size_t max_segment_length = std::max(min_segment_length, size); + + std::random_device rd; + std::default_random_engine gen(seed_value); + std::uniform_int_distribution segment_length_dis(min_segment_length, + max_segment_length); + + const auto gauss_sum = [&](output_type n) + { return (n % 2 == 0) ? (n / 2) * (n - 1) : n * ((n - 1) / 2); }; + + std::vector aggregates_expected; + std::vector offsets; + + unsigned int num_segments = 0; + offset_type offset = 0; + while(offset < size) + { + const size_t segment_length = segment_length_dis(gen); + offsets.push_back(offset); + + const offset_type end = std::min(size, offset + segment_length); + output_type aggregate = init; + aggregate = reduce_op(aggregate, gauss_sum(end) - gauss_sum(offset)); + aggregates_expected.push_back(aggregate); + + num_segments++; + offset += segment_length; + } + offsets.push_back(size); + + // Device inputs + IteratorType d_input(input_type{0}); + offset_type* d_offsets = nullptr; + HIP_CHECK(test_common_utils::hipMallocHelper(&d_offsets, + sizeof(offset_type) * (num_segments + 1))); + HIP_CHECK(hipMemcpy(d_offsets, + offsets.data(), + sizeof(offset_type) * (num_segments + 1), + hipMemcpyHostToDevice)); + + // Device outputs + output_type* d_aggregates_output; + HIP_CHECK(test_common_utils::hipMallocHelper(&d_aggregates_output, + sizeof(output_type) * num_segments)); + HIP_CHECK(hipDeviceSynchronize()); + + // Temp storage + size_t temp_storage_size_bytes; + void* d_temp_storage = nullptr; + + // Get size of d_temp_storage + HIP_CHECK(hipcub::DeviceSegmentedReduce::Sum(d_temp_storage, + temp_storage_size_bytes, + d_input, + d_aggregates_output, + num_segments, + d_offsets, + d_offsets + 1, + stream)); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0U); + + // Allocate temporary storage + HIP_CHECK(test_common_utils::hipMallocHelper(&d_temp_storage, temp_storage_size_bytes)); + + // Run + HIP_CHECK(hipcub::DeviceSegmentedReduce::Sum(d_temp_storage, + temp_storage_size_bytes, + d_input, + d_aggregates_output, + num_segments, + d_offsets, + d_offsets + 1, + stream)); + + // Copy output to host + std::vector aggregates_output(num_segments); + HIP_CHECK(hipMemcpy(aggregates_output.data(), + d_aggregates_output, + sizeof(output_type) * num_segments, + hipMemcpyDeviceToHost)); + + HIP_CHECK(hipFree(d_offsets)); + HIP_CHECK(hipFree(d_temp_storage)); + HIP_CHECK(hipFree(d_aggregates_output)); + + // Check if output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(aggregates_output, aggregates_expected)); + } + } +} diff --git a/test/hipcub/test_hipcub_device_select.cpp b/test/hipcub/test_hipcub_device_select.cpp index 57a66158..33a914ae 100644 --- a/test/hipcub/test_hipcub_device_select.cpp +++ b/test/hipcub/test_hipcub_device_select.cpp @@ -53,13 +53,9 @@ class HipcubDeviceSelectTests : public ::testing::Test }; typedef ::testing::Types, - DeviceSelectParams -#ifdef __HIP_PLATFORM_AMD__ - , + DeviceSelectParams, DeviceSelectParams, - DeviceSelectParams -#endif - > + DeviceSelectParams> HipcubDeviceSelectTestsParams; std::vector get_sizes() diff --git a/test/hipcub/test_hipcub_thread.cpp b/test/hipcub/test_hipcub_thread.cpp index 7cd4fd63..bdc25a95 100644 --- a/test/hipcub/test_hipcub_thread.cpp +++ b/test/hipcub/test_hipcub_thread.cpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2021, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2024, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,14 +27,17 @@ * ******************************************************************************/ - #include "hipcub/thread/thread_load.hpp" #include "hipcub/thread/thread_store.hpp" #include "hipcub/thread/thread_reduce.hpp" #include "hipcub/thread/thread_scan.hpp" #include "hipcub/thread/thread_search.hpp" +#include "test_utils_bfloat16.hpp" +#include "test_utils_half.hpp" + #include "common_test_header.hpp" +#include template< class T, @@ -78,9 +81,11 @@ typedef ::testing::Types< params, params, params, + params, + params, params, hipcub::LOAD_CV, hipcub::STORE_WB>, - params, hipcub::LOAD_CV, hipcub::STORE_WB> -> ThreadOperationTestParams; + params, hipcub::LOAD_CV, hipcub::STORE_WB>> + ThreadOperationTestParams; TYPED_TEST_SUITE(HipcubThreadOperationTests, ThreadOperationTestParams); @@ -98,7 +103,9 @@ TYPED_TEST(HipcubThreadOperationTests, Load) SCOPED_TRACE(testing::Message() << "with device_id= " << device_id); HIP_CHECK(hipSetDevice(device_id)); - using T = typename TestFixture::type; + using T = typename TestFixture::type; + using native_T = test_utils::convert_to_native_t; + constexpr hipcub::CacheLoadModifier Modifier = TestFixture::load_modifier; constexpr uint32_t block_size = 256; @@ -111,7 +118,15 @@ TYPED_TEST(HipcubThreadOperationTests, Load) SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data - std::vector input = test_utils::get_random_data(size, 2, 100, seed_value); + std::vector input_native + = test_utils::get_random_data(size, 2, 100, seed_value); + std::vector input(size); + + for(size_t i = 0; i < size; i++) + { + input[i] = test_utils::convert_to_device(input_native[i]); + } + std::vector output(size); // Calculate expected results on host @@ -145,7 +160,7 @@ TYPED_TEST(HipcubThreadOperationTests, Load) // Verifying results for(size_t i = 0; i < output.size(); i++) { - ASSERT_EQ(output[i], expected[i]); + ASSERT_EQ(static_cast(output[i]), static_cast(expected[i])); } HIP_CHECK(hipFree(device_input)); @@ -167,7 +182,9 @@ TYPED_TEST(HipcubThreadOperationTests, Store) SCOPED_TRACE(testing::Message() << "with device_id= " << device_id); HIP_CHECK(hipSetDevice(device_id)); - using T = typename TestFixture::type; + using T = typename TestFixture::type; + using native_T = test_utils::convert_to_native_t; + constexpr hipcub::CacheStoreModifier Modifier = TestFixture::store_modifier; constexpr uint32_t block_size = 256; constexpr uint32_t grid_size = 128; @@ -179,7 +196,15 @@ TYPED_TEST(HipcubThreadOperationTests, Store) SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data - std::vector input = test_utils::get_random_data(size, 2, 100, seed_value); + std::vector input_native + = test_utils::get_random_data(size, 2, 100, seed_value); + std::vector input(size); + + for(size_t i = 0; i < size; i++) + { + input[i] = test_utils::convert_to_device(input_native[i]); + } + std::vector output(size); // Calculate expected results on host @@ -213,7 +238,7 @@ TYPED_TEST(HipcubThreadOperationTests, Store) // Verifying results for(size_t i = 0; i < output.size(); i++) { - ASSERT_EQ(output[i], expected[i]); + ASSERT_EQ(static_cast(output[i]), static_cast(expected[i])); } HIP_CHECK(hipFree(device_input)); @@ -246,7 +271,9 @@ TYPED_TEST(HipcubThreadOperationTests, Reduction) SCOPED_TRACE(testing::Message() << "with device_id= " << device_id); HIP_CHECK(hipSetDevice(device_id)); - using T = typename TestFixture::type; + using T = typename TestFixture::type; + using native_T = test_utils::convert_to_native_t; + constexpr uint32_t length = 4; constexpr uint32_t block_size = 128 / length; constexpr uint32_t grid_size = 128; @@ -259,7 +286,15 @@ TYPED_TEST(HipcubThreadOperationTests, Reduction) SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data - std::vector input = test_utils::get_random_data(size, 2, 100, seed_value); + std::vector input_native + = test_utils::get_random_data(size, 2, 100, seed_value); + std::vector input(size); + + for(size_t i = 0; i < size; i++) + { + input[i] = test_utils::convert_to_device(input_native[i]); + } + std::vector output(size); std::vector expected(size); @@ -277,7 +312,6 @@ TYPED_TEST(HipcubThreadOperationTests, Reduction) expected[offset] = result; } } - //std::vector expected = input; // Preparing device T* device_input; @@ -307,8 +341,7 @@ TYPED_TEST(HipcubThreadOperationTests, Reduction) // Verifying results for(size_t i = 0; i < output.size(); i+=length) { - //std::cout << "i: " << i << " " << expected[i] << " - " << output[i] << std::endl; - ASSERT_EQ(output[i], expected[i]); + ASSERT_EQ(static_cast(output[i]), static_cast(expected[i])); } HIP_CHECK(hipFree(device_input)); @@ -334,7 +367,9 @@ TYPED_TEST(HipcubThreadOperationTests, Scan) SCOPED_TRACE(testing::Message() << "with device_id= " << device_id); HIP_CHECK(hipSetDevice(device_id)); - using T = typename TestFixture::type; + using T = typename TestFixture::type; + using native_T = test_utils::convert_to_native_t; + constexpr uint32_t length = 4; constexpr uint32_t block_size = 128 / length; constexpr uint32_t grid_size = 128; @@ -347,7 +382,15 @@ TYPED_TEST(HipcubThreadOperationTests, Scan) SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data - std::vector input = test_utils::get_random_data(size, 2, 100, seed_value); + std::vector input_native + = test_utils::get_random_data(size, 2, 100, seed_value); + std::vector input(size); + + for(size_t i = 0; i < size; i++) + { + input[i] = test_utils::convert_to_device(input_native[i]); + } + std::vector output(size); std::vector expected(size); @@ -395,8 +438,7 @@ TYPED_TEST(HipcubThreadOperationTests, Scan) // Verifying results for(size_t i = 0; i < output.size(); i++) { - //std::cout << "i: " << i << " " << input[i] << " - " << expected[i] << " - " << output[i] << std::endl; - ASSERT_EQ(output[i], expected[i]); + ASSERT_EQ(static_cast(output[i]), static_cast(expected[i])); } HIP_CHECK(hipFree(device_input)); @@ -429,8 +471,10 @@ TYPED_TEST(HipcubThreadOperationTests, Bounds) SCOPED_TRACE(testing::Message() << "with device_id= " << device_id); HIP_CHECK(hipSetDevice(device_id)); - using T = typename TestFixture::type; - using OffsetT = uint32_t; + using T = typename TestFixture::type; + using native_T = test_utils::convert_to_native_t; + using OffsetT = uint32_t; + constexpr uint32_t block_size = 256; constexpr uint32_t grid_size = 1; @@ -440,7 +484,7 @@ TYPED_TEST(HipcubThreadOperationTests, Bounds) SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); uint32_t num_items = test_utils::get_random_value(1, 12, seed_value); - T val = test_utils::get_random_value(2, 100, seed_value); + T val = test_utils::convert_to_device(test_utils::get_random_value(2, 100, seed_value)); uint32_t size = block_size * grid_size * num_items; @@ -467,7 +511,8 @@ TYPED_TEST(HipcubThreadOperationTests, Bounds) while (local_num_items > 0) { OffsetT half = local_num_items >> 1; - if (input[input_offset + retval + half] < val) + if(static_cast(input[input_offset + retval + half]) + < static_cast(val)) { retval = retval + (half + 1); local_num_items = local_num_items - (half + 1); @@ -485,7 +530,8 @@ TYPED_TEST(HipcubThreadOperationTests, Bounds) while (local_num_items > 0) { OffsetT half = local_num_items >> 1; - if (val < input[input_offset + retval + half]) + if(static_cast(val) + < static_cast(input[input_offset + retval + half])) { local_num_items = half; } @@ -542,8 +588,10 @@ TYPED_TEST(HipcubThreadOperationTests, Bounds) // Verifying results for(size_t i = 0; i < output_lower_bound.size(); i++) { - ASSERT_EQ(output_lower_bound[i], expected_lower_bound[i]); - ASSERT_EQ(output_upper_bound[i], expected_upper_bound[i]); + ASSERT_EQ(static_cast(output_lower_bound[i]), + static_cast(expected_lower_bound[i])); + ASSERT_EQ(static_cast(output_upper_bound[i]), + static_cast(expected_upper_bound[i])); } HIP_CHECK(hipFree(device_input)); diff --git a/test/hipcub/test_hipcub_warp_reduce.cpp b/test/hipcub/test_hipcub_warp_reduce.cpp index d5949664..819cf1ed 100644 --- a/test/hipcub/test_hipcub_warp_reduce.cpp +++ b/test/hipcub/test_hipcub_warp_reduce.cpp @@ -63,6 +63,27 @@ typedef ::testing::Types< #ifdef __HIP_PLATFORM_AMD__ params, #endif + // half + params, + params, + params, + params, + params, + params, +#ifdef __HIP_PLATFORM_AMD__ + params, +#endif + // bfloat16 + params, + params, + params, + params, + params, + params, +#ifdef __HIP_PLATFORM_AMD__ + params, +#endif + // shared memory reduce // Integer params, @@ -168,8 +189,8 @@ TYPED_TEST(HipcubWarpReduceTests, Reduce) // Generate data std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); - std::vector output(size / logical_warp_size, 0); - std::vector expected(output.size(), 1); + std::vector output(size / logical_warp_size); + std::vector expected(output.size()); // Calculate expected results on host for(size_t i = 0; i < output.size(); i++) @@ -318,8 +339,8 @@ TYPED_TEST(HipcubWarpReduceTests, ReduceValid) // Generate data std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); - std::vector output(size / logical_warp_size, 0); - std::vector expected(output.size(), 1); + std::vector output(size / logical_warp_size); + std::vector expected(output.size()); // Calculate expected results on host for(size_t i = 0; i < output.size(); i++) diff --git a/test/hipcub/test_hipcub_warp_scan.cpp b/test/hipcub/test_hipcub_warp_scan.cpp index bc7739bf..ee5e41ee 100644 --- a/test/hipcub/test_hipcub_warp_scan.cpp +++ b/test/hipcub/test_hipcub_warp_scan.cpp @@ -66,6 +66,24 @@ typedef ::testing::Types< params, #ifdef __HIP_PLATFORM_AMD__ params, +#endif + // Half + params, + params, + params, + params, + params, +#ifdef __HIP_PLATFORM_AMD__ + params, +#endif + // Bfloat16 + params, + params, + params, + params, + params, +#ifdef __HIP_PLATFORM_AMD__ + params, #endif // shared memory scan // Integer @@ -81,10 +99,12 @@ typedef ::testing::Types< params, params #ifdef __HIP_PLATFORM_AMD__ - ,params, + , + params, params #endif -> HipcubWarpScanTestParams; + > + HipcubWarpScanTestParams; TYPED_TEST_SUITE(HipcubWarpScanTests, HipcubWarpScanTestParams); @@ -168,7 +188,7 @@ TYPED_TEST(HipcubWarpScanTests, InclusiveScan) // Generate data std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); std::vector output(size); - std::vector expected(output.size(), 0); + std::vector expected(output.size()); // Calculate expected results on host for(size_t i = 0; i < input.size() / logical_warp_size; i++) @@ -332,8 +352,8 @@ TYPED_TEST(HipcubWarpScanTests, InclusiveScanReduce) std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); std::vector output(size); std::vector output_reductions(size / logical_warp_size); - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); + std::vector expected(output.size()); + std::vector expected_reductions(output_reductions.size()); // Calculate expected results on host for(size_t i = 0; i < output.size() / logical_warp_size; i++) @@ -498,8 +518,9 @@ TYPED_TEST(HipcubWarpScanTests, ExclusiveScan) // Generate data std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); std::vector output(size); - std::vector expected(input.size(), 0); - const T init = test_utils::get_random_value(0, 100, seed_value + seed_value_addition); + std::vector expected(input.size()); + const T init = static_cast( + test_utils::get_random_value(0, 100, seed_value + seed_value_addition)); // Calculate expected results on host for(size_t i = 0; i < input.size() / logical_warp_size; i++) @@ -658,9 +679,10 @@ TYPED_TEST(HipcubWarpScanTests, ExclusiveReduceScan) std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); std::vector output(size); std::vector output_reductions(size / logical_warp_size); - std::vector expected(input.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - const T init = test_utils::get_random_value(0, 100, seed_value + seed_value_addition); + std::vector expected(input.size()); + std::vector expected_reductions(output_reductions.size()); + const T init = static_cast( + test_utils::get_random_value(0, 100, seed_value + seed_value_addition)); // Calculate expected results on host for(size_t i = 0; i < input.size() / logical_warp_size; i++) @@ -843,9 +865,10 @@ TYPED_TEST(HipcubWarpScanTests, Scan) std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); std::vector output_inclusive(size); std::vector output_exclusive(size); - std::vector expected_inclusive(output_inclusive.size(), 0); - std::vector expected_exclusive(output_exclusive.size(), 0); - const T init = test_utils::get_random_value(0, 100, seed_value + seed_value_addition); + std::vector expected_inclusive(output_inclusive.size()); + std::vector expected_exclusive(output_exclusive.size()); + const T init = static_cast( + test_utils::get_random_value(0, 100, seed_value + seed_value_addition)); // Calculate expected results on host for(size_t i = 0; i < input.size() / logical_warp_size; i++) @@ -1002,7 +1025,7 @@ TYPED_TEST(HipcubWarpScanTests, InclusiveScanCustomType) // Generate data std::vector input(size); std::vector output(size); - std::vector expected(output.size(), 0); + std::vector expected(output.size()); // Initializing input data { @@ -1018,7 +1041,9 @@ TYPED_TEST(HipcubWarpScanTests, InclusiveScanCustomType) // Calculate expected results on host for(size_t i = 0; i < input.size() / logical_warp_size; i++) { - acc_type accumulator(0); + acc_type accumulator; + accumulator.x = 0; + accumulator.y = 0; for(size_t j = 0; j < logical_warp_size; j++) { auto idx = i * logical_warp_size + j; diff --git a/test/hipcub/test_utils.hpp b/test/hipcub/test_utils.hpp index 8095b608..a4b15886 100644 --- a/test/hipcub/test_utils.hpp +++ b/test/hipcub/test_utils.hpp @@ -139,20 +139,6 @@ struct select_plus_operator_host typedef T acc_type; }; -template<> -struct select_plus_operator_host -{ - typedef test_utils::plus type; - typedef double acc_type; -}; - -template<> -struct select_plus_operator_host -{ - typedef test_utils::plus type; - typedef double acc_type; -}; - template OutputIt host_inclusive_scan_impl( InputIt first, InputIt last, OutputIt d_first, BinaryOperation op, acc_type) diff --git a/test/hipcub/test_utils_data_generation.hpp b/test/hipcub/test_utils_data_generation.hpp index 6cb09803..8fa7402b 100644 --- a/test/hipcub/test_utils_data_generation.hpp +++ b/test/hipcub/test_utils_data_generation.hpp @@ -1,6 +1,6 @@ // MIT License // -// Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -462,6 +462,24 @@ inline std::vector get_random_data01(size_t size, float p, int seed_value) return data; } +inline std::vector get_large_sizes(int seed_value) +{ + // clang-format off + std::vector sizes = { + (size_t{1} << 32) - 1, size_t{1} << 32, + (size_t{1} << 35) - 1, size_t{1} << 35 + }; + // clang-format on + const std::vector random_sizes + = test_utils::get_random_data(2, + (size_t{1} << 30) + 1, + (size_t{1} << 35) - 2, + seed_value); + sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); + std::sort(sizes.begin(), sizes.end()); + return sizes; +} + } // namespace test_utils #endif // HIPCUB_TEST_HIPCUB_TEST_UTILS_DATA_GENERATION_HPP_