diff --git a/composable_kernel/include/utility/tensor_space_filling_curve.hpp b/composable_kernel/include/utility/tensor_space_filling_curve.hpp new file mode 100644 index 0000000000..a8f12cd8e1 --- /dev/null +++ b/composable_kernel/include/utility/tensor_space_filling_curve.hpp @@ -0,0 +1,131 @@ +#include "math.hpp" +#include "sequence.hpp" +#include "tensor_adaptor.hpp" +#include "statically_indexed_array_multi_index.hpp" +#include "tuple_helper.hpp" + +namespace ck { + +template // # of scalars per access in each dimension +struct SpaceFillingCurve +{ + static constexpr index_t nDim = TensorLengths::Size(); + + using Index = MultiIndex; + + static constexpr index_t ScalarPerVector = + reduce_on_sequence(ScalarsPerAccess{}, math::multiplies{}, Number<1>{}); + + static constexpr auto access_lengths = TensorLengths{} / ScalarsPerAccess{}; + static constexpr auto dim_access_order = DimAccessOrder{}; + static constexpr auto ordered_access_lengths = + container_reorder_given_new2old(access_lengths, dim_access_order); + + static constexpr auto to_index_adaptor = make_single_stage_tensor_adaptor( + make_tuple(make_merge_transform(ordered_access_lengths)), + make_tuple(typename arithmetic_sequence_gen<0, nDim, 1>::type{}), + make_tuple(Sequence<0>{})); + + static constexpr auto I0 = Number<0>{}; + static constexpr auto I1 = Number<1>{}; + + __host__ __device__ static constexpr index_t GetNumOfAccess() + { + return reduce_on_sequence(TensorLengths{}, math::multiplies{}, Number<1>{}) / + ScalarPerVector; + } + + template + static __device__ __host__ constexpr auto GetForwardStep(Number) + { + + constexpr auto idx_curr = GetIndex(Number{}); + constexpr auto idx_next = GetIndex(Number{}); + return idx_next - idx_curr; + } + + template + static __device__ __host__ constexpr auto GetBackwardStep(Number) + { + static_assert(AccessIdx1d > 0, "1D index should be larger than 0"); + + constexpr auto idx_curr = GetIndex(Number{}); + constexpr auto idx_prev = GetIndex(Number{}); + return idx_prev - idx_curr; + } + + template + static __device__ __host__ constexpr Index GetIndex(Number) + { +#if 0 + /* + * \todo: TensorAdaptor::CalculateBottomIndex does NOT return constexpr as expected. + */ + constexpr auto ordered_access_idx = to_index_adaptor.CalculateBottomIndex(make_multi_index(Number{})); +#else + + constexpr auto access_strides = container_reverse_exclusive_scan( + ordered_access_lengths, math::multiplies{}, Number<1>{}); + + constexpr auto idx_1d = Number{}; + // Given tensor strides \p access_lengths, and 1D index of space-filling-curve, compute the + // idim-th element of multidimensional index. + // All constexpr variables have to be captured by VALUE. + constexpr auto compute_index = [ idx_1d, access_strides ](auto idim) constexpr + { + constexpr auto compute_index_impl = [ idx_1d, access_strides ](auto jdim) constexpr + { + auto res = idx_1d.value; + auto id = 0; + + static_for<0, jdim.value + 1, 1>{}([&](auto kdim) { + id = res / access_strides[kdim].value; + res -= id * access_strides[kdim].value; + }); + + return id; + }; + + constexpr auto id = compute_index_impl(idim); + return Number{}; + }; + + constexpr auto ordered_access_idx = generate_tuple(compute_index, Number{}); +#endif + constexpr auto forward_sweep = [&]() { + StaticallyIndexedArray forward_sweep_; + + forward_sweep_(I0) = true; + + static_for<1, nDim, 1>{}([&](auto idim) { + index_t tmp = ordered_access_idx[I0]; + + static_for<1, idim, 1>{}( + [&](auto j) { tmp = tmp * ordered_access_lengths[j] + ordered_access_idx[j]; }); + + forward_sweep_(idim) = tmp % 2 == 0; + }); + + return forward_sweep_; + }(); + + // calculate multi-dim tensor index + auto idx_md = [&]() { + Index ordered_idx; + + static_for<0, nDim, 1>{}([&](auto idim) { + ordered_idx(idim) = forward_sweep[idim] ? ordered_access_idx[idim] + : ordered_access_lengths[idim] - 1 - + ordered_access_idx[idim]; + }); + + return container_reorder_given_old2new(ordered_idx, dim_access_order) * + ScalarsPerAccess{}; + }(); + return idx_md; + } +}; + +} // namespace ck diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ff483b8117..45748640dc 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -45,3 +45,8 @@ target_link_libraries(test_reference_conv_fwd PRIVATE host_tensor) set(CONVND_FWD_XDL_SOURCE convnd_fwd_xdl/main.cpp) add_executable(test_convnd_fwd_xdl ${CONVND_FWD_XDL_SOURCE}) target_link_libraries(test_convnd_fwd_xdl PRIVATE host_tensor) + +# test space_filling_curve_ +set(SPACE_FILLING_CURVE_SOURCE space_filling_curve/space_filling_curve.cpp) +add_executable(space_filling_curve ${SPACE_FILLING_CURVE_SOURCE}) +target_link_libraries(space_filling_curve PRIVATE host_tensor) diff --git a/test/space_filling_curve/space_filling_curve.cpp b/test/space_filling_curve/space_filling_curve.cpp new file mode 100644 index 0000000000..64e8044608 --- /dev/null +++ b/test/space_filling_curve/space_filling_curve.cpp @@ -0,0 +1,131 @@ +#include +#include +#include +#include + +#include "tensor_space_filling_curve.hpp" + +using namespace ck; + +void traverse_using_space_filling_curve(); + +int main(int argc, char** argv) +{ + (void)argc; + (void)argv; + + { + traverse_using_space_filling_curve(); + auto err = hipDeviceSynchronize(); + (void)err; + assert(err == hipSuccess); + } + return 0; +} + +void traverse_using_space_filling_curve() +{ + constexpr auto I0 = Number<0>{}; + constexpr auto I1 = Number<1>{}; + constexpr auto I2 = Number<2>{}; + + using TensorLengths = Sequence<4, 10, 9>; + using DimAccessOrder = Sequence<2, 0, 1>; + using ScalarsPerAccess = Sequence<1, 2, 3>; + using SpaceFillingCurve = SpaceFillingCurve; + + constexpr auto expected = make_tuple(make_tuple(0, 0, 0), + make_tuple(0, 2, 0), + make_tuple(0, 4, 0), + make_tuple(0, 6, 0), + make_tuple(0, 8, 0), + make_tuple(1, 8, 0), + make_tuple(1, 6, 0), + make_tuple(1, 4, 0), + make_tuple(1, 2, 0), + make_tuple(1, 0, 0), + make_tuple(2, 0, 0), + make_tuple(2, 2, 0), + make_tuple(2, 4, 0), + make_tuple(2, 6, 0), + make_tuple(2, 8, 0), + make_tuple(3, 8, 0), + make_tuple(3, 6, 0), + make_tuple(3, 4, 0), + make_tuple(3, 2, 0), + make_tuple(3, 0, 0), + make_tuple(3, 0, 3), + make_tuple(3, 2, 3), + make_tuple(3, 4, 3), + make_tuple(3, 6, 3), + make_tuple(3, 8, 3), + make_tuple(2, 8, 3), + make_tuple(2, 6, 3), + make_tuple(2, 4, 3), + make_tuple(2, 2, 3), + make_tuple(2, 0, 3), + make_tuple(1, 0, 3), + make_tuple(1, 2, 3), + make_tuple(1, 4, 3), + make_tuple(1, 6, 3), + make_tuple(1, 8, 3), + make_tuple(0, 8, 3), + make_tuple(0, 6, 3), + make_tuple(0, 4, 3), + make_tuple(0, 2, 3), + make_tuple(0, 0, 3), + make_tuple(0, 0, 6), + make_tuple(0, 2, 6), + make_tuple(0, 4, 6), + make_tuple(0, 6, 6), + make_tuple(0, 8, 6), + make_tuple(1, 8, 6), + make_tuple(1, 6, 6), + make_tuple(1, 4, 6), + make_tuple(1, 2, 6), + make_tuple(1, 0, 6), + make_tuple(2, 0, 6), + make_tuple(2, 2, 6), + make_tuple(2, 4, 6), + make_tuple(2, 6, 6), + make_tuple(2, 8, 6), + make_tuple(3, 8, 6), + make_tuple(3, 6, 6), + make_tuple(3, 4, 6), + make_tuple(3, 2, 6), + make_tuple(3, 0, 6)); + + constexpr index_t num_accesses = SpaceFillingCurve::GetNumOfAccess(); + + static_assert(num_accesses == reduce_on_sequence(TensorLengths{} / ScalarsPerAccess{}, + math::multiplies{}, + Number<1>{})); + + static_for<1, num_accesses, 1>{}([&](auto i) { + constexpr auto idx_curr = SpaceFillingCurve::GetIndex(i); + + static_assert(idx_curr[I0] == expected[i][I0]); + static_assert(idx_curr[I1] == expected[i][I1]); + static_assert(idx_curr[I2] == expected[i][I2]); + + constexpr auto backward_step = SpaceFillingCurve::GetBackwardStep(i); + constexpr auto expected_step = expected[i - I1] - expected[i]; + static_assert(backward_step[I0] == expected_step[I0]); + static_assert(backward_step[I1] == expected_step[I1]); + static_assert(backward_step[I2] == expected_step[I2]); + }); + + static_for<0, num_accesses - 1, 1>{}([&](auto i) { + constexpr auto idx_curr = SpaceFillingCurve::GetIndex(i); + + static_assert(idx_curr[I0] == expected[i][I0]); + static_assert(idx_curr[I1] == expected[i][I1]); + static_assert(idx_curr[I2] == expected[i][I2]); + + constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(i); + constexpr auto expected_step = expected[i + I1] - expected[i]; + static_assert(forward_step[I0] == expected_step[I0]); + static_assert(forward_step[I1] == expected_step[I1]); + static_assert(forward_step[I2] == expected_step[I2]); + }); +}