-
Notifications
You must be signed in to change notification settings - Fork 139
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add space_filling_curve * cleanup and move space_filling_curve into test * add functions for backward and forward step; hard coded results in unit test * minor changes
- Loading branch information
Showing
3 changed files
with
267 additions
and
0 deletions.
There are no files selected for viewing
131 changes: 131 additions & 0 deletions
131
composable_kernel/include/utility/tensor_space_filling_curve.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
#include "math.hpp" | ||
#include "sequence.hpp" | ||
#include "tensor_adaptor.hpp" | ||
#include "statically_indexed_array_multi_index.hpp" | ||
#include "tuple_helper.hpp" | ||
|
||
namespace ck { | ||
|
||
template <typename TensorLengths, | ||
typename DimAccessOrder, | ||
typename ScalarsPerAccess> // # of scalars per access in each dimension | ||
struct SpaceFillingCurve | ||
{ | ||
static constexpr index_t nDim = TensorLengths::Size(); | ||
|
||
using Index = MultiIndex<nDim>; | ||
|
||
static constexpr index_t ScalarPerVector = | ||
reduce_on_sequence(ScalarsPerAccess{}, math::multiplies{}, Number<1>{}); | ||
|
||
static constexpr auto access_lengths = TensorLengths{} / ScalarsPerAccess{}; | ||
static constexpr auto dim_access_order = DimAccessOrder{}; | ||
static constexpr auto ordered_access_lengths = | ||
container_reorder_given_new2old(access_lengths, dim_access_order); | ||
|
||
static constexpr auto to_index_adaptor = make_single_stage_tensor_adaptor( | ||
make_tuple(make_merge_transform(ordered_access_lengths)), | ||
make_tuple(typename arithmetic_sequence_gen<0, nDim, 1>::type{}), | ||
make_tuple(Sequence<0>{})); | ||
|
||
static constexpr auto I0 = Number<0>{}; | ||
static constexpr auto I1 = Number<1>{}; | ||
|
||
__host__ __device__ static constexpr index_t GetNumOfAccess() | ||
{ | ||
return reduce_on_sequence(TensorLengths{}, math::multiplies{}, Number<1>{}) / | ||
ScalarPerVector; | ||
} | ||
|
||
template <index_t AccessIdx1d> | ||
static __device__ __host__ constexpr auto GetForwardStep(Number<AccessIdx1d>) | ||
{ | ||
|
||
constexpr auto idx_curr = GetIndex(Number<AccessIdx1d>{}); | ||
constexpr auto idx_next = GetIndex(Number<AccessIdx1d + 1>{}); | ||
return idx_next - idx_curr; | ||
} | ||
|
||
template <index_t AccessIdx1d> | ||
static __device__ __host__ constexpr auto GetBackwardStep(Number<AccessIdx1d>) | ||
{ | ||
static_assert(AccessIdx1d > 0, "1D index should be larger than 0"); | ||
|
||
constexpr auto idx_curr = GetIndex(Number<AccessIdx1d>{}); | ||
constexpr auto idx_prev = GetIndex(Number<AccessIdx1d - 1>{}); | ||
return idx_prev - idx_curr; | ||
} | ||
|
||
template <index_t AccessIdx1d> | ||
static __device__ __host__ constexpr Index GetIndex(Number<AccessIdx1d>) | ||
{ | ||
#if 0 | ||
/* | ||
* \todo: TensorAdaptor::CalculateBottomIndex does NOT return constexpr as expected. | ||
*/ | ||
constexpr auto ordered_access_idx = to_index_adaptor.CalculateBottomIndex(make_multi_index(Number<AccessIdx1d>{})); | ||
#else | ||
|
||
constexpr auto access_strides = container_reverse_exclusive_scan( | ||
ordered_access_lengths, math::multiplies{}, Number<1>{}); | ||
|
||
constexpr auto idx_1d = Number<AccessIdx1d>{}; | ||
// Given tensor strides \p access_lengths, and 1D index of space-filling-curve, compute the | ||
// idim-th element of multidimensional index. | ||
// All constexpr variables have to be captured by VALUE. | ||
constexpr auto compute_index = [ idx_1d, access_strides ](auto idim) constexpr | ||
{ | ||
constexpr auto compute_index_impl = [ idx_1d, access_strides ](auto jdim) constexpr | ||
{ | ||
auto res = idx_1d.value; | ||
auto id = 0; | ||
|
||
static_for<0, jdim.value + 1, 1>{}([&](auto kdim) { | ||
id = res / access_strides[kdim].value; | ||
res -= id * access_strides[kdim].value; | ||
}); | ||
|
||
return id; | ||
}; | ||
|
||
constexpr auto id = compute_index_impl(idim); | ||
return Number<id>{}; | ||
}; | ||
|
||
constexpr auto ordered_access_idx = generate_tuple(compute_index, Number<nDim>{}); | ||
#endif | ||
constexpr auto forward_sweep = [&]() { | ||
StaticallyIndexedArray<bool, nDim> forward_sweep_; | ||
|
||
forward_sweep_(I0) = true; | ||
|
||
static_for<1, nDim, 1>{}([&](auto idim) { | ||
index_t tmp = ordered_access_idx[I0]; | ||
|
||
static_for<1, idim, 1>{}( | ||
[&](auto j) { tmp = tmp * ordered_access_lengths[j] + ordered_access_idx[j]; }); | ||
|
||
forward_sweep_(idim) = tmp % 2 == 0; | ||
}); | ||
|
||
return forward_sweep_; | ||
}(); | ||
|
||
// calculate multi-dim tensor index | ||
auto idx_md = [&]() { | ||
Index ordered_idx; | ||
|
||
static_for<0, nDim, 1>{}([&](auto idim) { | ||
ordered_idx(idim) = forward_sweep[idim] ? ordered_access_idx[idim] | ||
: ordered_access_lengths[idim] - 1 - | ||
ordered_access_idx[idim]; | ||
}); | ||
|
||
return container_reorder_given_old2new(ordered_idx, dim_access_order) * | ||
ScalarsPerAccess{}; | ||
}(); | ||
return idx_md; | ||
} | ||
}; | ||
|
||
} // namespace ck |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
#include <vector> | ||
#include <iostream> | ||
#include <numeric> | ||
#include <cassert> | ||
|
||
#include "tensor_space_filling_curve.hpp" | ||
|
||
using namespace ck; | ||
|
||
void traverse_using_space_filling_curve(); | ||
|
||
int main(int argc, char** argv) | ||
{ | ||
(void)argc; | ||
(void)argv; | ||
|
||
{ | ||
traverse_using_space_filling_curve(); | ||
auto err = hipDeviceSynchronize(); | ||
(void)err; | ||
assert(err == hipSuccess); | ||
} | ||
return 0; | ||
} | ||
|
||
void traverse_using_space_filling_curve() | ||
{ | ||
constexpr auto I0 = Number<0>{}; | ||
constexpr auto I1 = Number<1>{}; | ||
constexpr auto I2 = Number<2>{}; | ||
|
||
using TensorLengths = Sequence<4, 10, 9>; | ||
using DimAccessOrder = Sequence<2, 0, 1>; | ||
using ScalarsPerAccess = Sequence<1, 2, 3>; | ||
using SpaceFillingCurve = SpaceFillingCurve<TensorLengths, DimAccessOrder, ScalarsPerAccess>; | ||
|
||
constexpr auto expected = make_tuple(make_tuple(0, 0, 0), | ||
make_tuple(0, 2, 0), | ||
make_tuple(0, 4, 0), | ||
make_tuple(0, 6, 0), | ||
make_tuple(0, 8, 0), | ||
make_tuple(1, 8, 0), | ||
make_tuple(1, 6, 0), | ||
make_tuple(1, 4, 0), | ||
make_tuple(1, 2, 0), | ||
make_tuple(1, 0, 0), | ||
make_tuple(2, 0, 0), | ||
make_tuple(2, 2, 0), | ||
make_tuple(2, 4, 0), | ||
make_tuple(2, 6, 0), | ||
make_tuple(2, 8, 0), | ||
make_tuple(3, 8, 0), | ||
make_tuple(3, 6, 0), | ||
make_tuple(3, 4, 0), | ||
make_tuple(3, 2, 0), | ||
make_tuple(3, 0, 0), | ||
make_tuple(3, 0, 3), | ||
make_tuple(3, 2, 3), | ||
make_tuple(3, 4, 3), | ||
make_tuple(3, 6, 3), | ||
make_tuple(3, 8, 3), | ||
make_tuple(2, 8, 3), | ||
make_tuple(2, 6, 3), | ||
make_tuple(2, 4, 3), | ||
make_tuple(2, 2, 3), | ||
make_tuple(2, 0, 3), | ||
make_tuple(1, 0, 3), | ||
make_tuple(1, 2, 3), | ||
make_tuple(1, 4, 3), | ||
make_tuple(1, 6, 3), | ||
make_tuple(1, 8, 3), | ||
make_tuple(0, 8, 3), | ||
make_tuple(0, 6, 3), | ||
make_tuple(0, 4, 3), | ||
make_tuple(0, 2, 3), | ||
make_tuple(0, 0, 3), | ||
make_tuple(0, 0, 6), | ||
make_tuple(0, 2, 6), | ||
make_tuple(0, 4, 6), | ||
make_tuple(0, 6, 6), | ||
make_tuple(0, 8, 6), | ||
make_tuple(1, 8, 6), | ||
make_tuple(1, 6, 6), | ||
make_tuple(1, 4, 6), | ||
make_tuple(1, 2, 6), | ||
make_tuple(1, 0, 6), | ||
make_tuple(2, 0, 6), | ||
make_tuple(2, 2, 6), | ||
make_tuple(2, 4, 6), | ||
make_tuple(2, 6, 6), | ||
make_tuple(2, 8, 6), | ||
make_tuple(3, 8, 6), | ||
make_tuple(3, 6, 6), | ||
make_tuple(3, 4, 6), | ||
make_tuple(3, 2, 6), | ||
make_tuple(3, 0, 6)); | ||
|
||
constexpr index_t num_accesses = SpaceFillingCurve::GetNumOfAccess(); | ||
|
||
static_assert(num_accesses == reduce_on_sequence(TensorLengths{} / ScalarsPerAccess{}, | ||
math::multiplies{}, | ||
Number<1>{})); | ||
|
||
static_for<1, num_accesses, 1>{}([&](auto i) { | ||
constexpr auto idx_curr = SpaceFillingCurve::GetIndex(i); | ||
|
||
static_assert(idx_curr[I0] == expected[i][I0]); | ||
static_assert(idx_curr[I1] == expected[i][I1]); | ||
static_assert(idx_curr[I2] == expected[i][I2]); | ||
|
||
constexpr auto backward_step = SpaceFillingCurve::GetBackwardStep(i); | ||
constexpr auto expected_step = expected[i - I1] - expected[i]; | ||
static_assert(backward_step[I0] == expected_step[I0]); | ||
static_assert(backward_step[I1] == expected_step[I1]); | ||
static_assert(backward_step[I2] == expected_step[I2]); | ||
}); | ||
|
||
static_for<0, num_accesses - 1, 1>{}([&](auto i) { | ||
constexpr auto idx_curr = SpaceFillingCurve::GetIndex(i); | ||
|
||
static_assert(idx_curr[I0] == expected[i][I0]); | ||
static_assert(idx_curr[I1] == expected[i][I1]); | ||
static_assert(idx_curr[I2] == expected[i][I2]); | ||
|
||
constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(i); | ||
constexpr auto expected_step = expected[i + I1] - expected[i]; | ||
static_assert(forward_step[I0] == expected_step[I0]); | ||
static_assert(forward_step[I1] == expected_step[I1]); | ||
static_assert(forward_step[I2] == expected_step[I2]); | ||
}); | ||
} |