From d3af41be65d30eb46fa1a755fd846ca09c39f923 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Thu, 17 Jun 2021 11:41:20 -0400 Subject: [PATCH 1/3] Add neighbor_parallel functor test --- core/unit_test/neighbor_unit_test.hpp | 92 +++++++++++++++++++++++++++ core/unit_test/tstNeighborList.hpp | 5 ++ 2 files changed, 97 insertions(+) diff --git a/core/unit_test/neighbor_unit_test.hpp b/core/unit_test/neighbor_unit_test.hpp index 395bdbbd7..6029318ed 100644 --- a/core/unit_test/neighbor_unit_test.hpp +++ b/core/unit_test/neighbor_unit_test.hpp @@ -598,6 +598,98 @@ void checkSecondNeighborParallelReduce( const ListType& nlist, EXPECT_FLOAT_EQ( N2_sum, vector_sum ); } +//---------------------------------------------------------------------------// +// Functor work tag for only assigning half the value. +class DoubleValueWorkTag +{ +}; + +template +struct NeighParallelOp +{ + ViewType _result; + + NeighParallelOp( const int num_particle ) + { + _result = ViewType( "result", num_particle ); + } + + // tagged version that assigns double the value. + KOKKOS_INLINE_FUNCTION void operator()( const DoubleValueWorkTag&, + const int i, const int n ) const + { + Kokkos::atomic_add( &_result( i ), 2 * n ); + } + KOKKOS_INLINE_FUNCTION void operator()( const int i, const int n ) const + { + Kokkos::atomic_add( &_result( i ), n ); + } +}; + +template +void checkFirstNeighborParallelForFunctor( const ListType& nlist, + const TestListType& N2_list_copy, + const int num_particle, + const bool use_tag ) +{ + if ( use_tag ) + { + Kokkos::RangePolicy policy( + 0, num_particle ); + checkFirstNeighborParallelForFunctor( nlist, N2_list_copy, num_particle, + policy, 2 ); + } + else + { + Kokkos::RangePolicy policy( 0, num_particle ); + checkFirstNeighborParallelForFunctor( nlist, N2_list_copy, num_particle, + policy, 1 ); + } +} + +template +void checkFirstNeighborParallelForFunctor( const ListType& nlist, + const TestListType& N2_list_copy, + const int num_particle, + const PolicyType policy, + const int multiplier ) +{ + // Create Kokkos views for the write operation. + using memory_space = typename TEST_MEMSPACE::memory_space; + using view_type = Kokkos::View; + Kokkos::View N2_result( "N2_result", + num_particle ); + + // Test the list parallel operation by adding a value from each neighbor + // to the particle and compare to counts using a functor. + NeighParallelOp serial_functor( num_particle ); + NeighParallelOp team_functor( num_particle ); + + Cabana::neighbor_parallel_for( policy, serial_functor, nlist, + Cabana::FirstNeighborsTag(), + Cabana::SerialOpTag(), "test_1st_serial" ); + Cabana::neighbor_parallel_for( policy, team_functor, nlist, + Cabana::FirstNeighborsTag(), + Cabana::TeamOpTag(), "test_1st_team" ); + Kokkos::fence(); + + // Use a full N^2 neighbor list to check against. + for ( int p = 0; p < num_particle; ++p ) + for ( int n = 0; n < N2_list_copy.counts( p ); ++n ) + N2_result( p ) += N2_list_copy.neighbors( p, n ); + + // Check the result. + auto serial_mirror = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), serial_functor._result ); + auto team_mirror = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), team_functor._result ); + for ( int p = 0; p < num_particle; ++p ) + { + EXPECT_EQ( N2_result( p ) * multiplier, serial_mirror( p ) ); + EXPECT_EQ( N2_result( p ) * multiplier, team_mirror( p ) ); + } +} + //---------------------------------------------------------------------------// // Default test settings. struct NeighborListTestData diff --git a/core/unit_test/tstNeighborList.hpp b/core/unit_test/tstNeighborList.hpp index 512554177..cb402d797 100644 --- a/core/unit_test/tstNeighborList.hpp +++ b/core/unit_test/tstNeighborList.hpp @@ -242,6 +242,11 @@ void testNeighborParallelFor() checkSplitFirstNeighborParallelFor( nlist, test_data.N2_list_copy, test_data.num_particle ); + + checkFirstNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, true ); + checkFirstNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, false ); } //---------------------------------------------------------------------------// From 8814e31f7af0b2aef90874003736883fe72cc074 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Fri, 18 Jun 2021 15:59:58 -0400 Subject: [PATCH 2/3] Hack policy without work tag for serial neighbor loops --- core/src/Cabana_Parallel.hpp | 38 ++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/core/src/Cabana_Parallel.hpp b/core/src/Cabana_Parallel.hpp index 83136c084..56c9f1901 100644 --- a/core/src/Cabana_Parallel.hpp +++ b/core/src/Cabana_Parallel.hpp @@ -237,6 +237,11 @@ inline void neighbor_parallel_for( using memory_space = typename neighbor_list_traits::memory_space; + auto begin = exec_policy.begin(); + auto end = exec_policy.end(); + using linear_policy_type = Kokkos::RangePolicy; + linear_policy_type linear_exec_policy( begin, end ); + static_assert( is_accessible_from{}, "" ); auto neigh_func = KOKKOS_LAMBDA( const index_type i ) @@ -249,9 +254,9 @@ inline void neighbor_parallel_for( neighbor_list_traits::getNeighbor( list, i, n ) ) ); }; if ( str.empty() ) - Kokkos::parallel_for( exec_policy, neigh_func ); + Kokkos::parallel_for( linear_exec_policy, neigh_func ); else - Kokkos::parallel_for( str, exec_policy, neigh_func ); + Kokkos::parallel_for( str, linear_exec_policy, neigh_func ); } //---------------------------------------------------------------------------// @@ -292,6 +297,11 @@ inline void neighbor_parallel_for( using memory_space = typename neighbor_list_traits::memory_space; + auto begin = exec_policy.begin(); + auto end = exec_policy.end(); + using linear_policy_type = Kokkos::RangePolicy; + linear_policy_type linear_exec_policy( begin, end ); + static_assert( is_accessible_from{}, "" ); auto neigh_func = KOKKOS_LAMBDA( const index_type i ) @@ -312,9 +322,9 @@ inline void neighbor_parallel_for( } }; if ( str.empty() ) - Kokkos::parallel_for( exec_policy, neigh_func ); + Kokkos::parallel_for( linear_exec_policy, neigh_func ); else - Kokkos::parallel_for( str, exec_policy, neigh_func ); + Kokkos::parallel_for( str, linear_exec_policy, neigh_func ); } //---------------------------------------------------------------------------// @@ -590,6 +600,11 @@ inline void neighbor_parallel_reduce( using memory_space = typename neighbor_list_traits::memory_space; + auto begin = exec_policy.begin(); + auto end = exec_policy.end(); + using linear_policy_type = Kokkos::RangePolicy; + linear_policy_type linear_exec_policy( begin, end ); + static_assert( is_accessible_from{}, "" ); auto neigh_reduce = KOKKOS_LAMBDA( const index_type i, ReduceType& ival ) @@ -603,9 +618,10 @@ inline void neighbor_parallel_reduce( ival ); }; if ( str.empty() ) - Kokkos::parallel_reduce( exec_policy, neigh_reduce, reduce_val ); + Kokkos::parallel_reduce( linear_exec_policy, neigh_reduce, reduce_val ); else - Kokkos::parallel_reduce( str, exec_policy, neigh_reduce, reduce_val ); + Kokkos::parallel_reduce( str, linear_exec_policy, neigh_reduce, + reduce_val ); } //---------------------------------------------------------------------------// @@ -650,6 +666,11 @@ inline void neighbor_parallel_reduce( using memory_space = typename neighbor_list_traits::memory_space; + auto begin = exec_policy.begin(); + auto end = exec_policy.end(); + using linear_policy_type = Kokkos::RangePolicy; + linear_policy_type linear_exec_policy( begin, end ); + static_assert( is_accessible_from{}, "" ); auto neigh_reduce = KOKKOS_LAMBDA( const index_type i, ReduceType& ival ) @@ -670,9 +691,10 @@ inline void neighbor_parallel_reduce( } }; if ( str.empty() ) - Kokkos::parallel_reduce( exec_policy, neigh_reduce, reduce_val ); + Kokkos::parallel_reduce( linear_exec_policy, neigh_reduce, reduce_val ); else - Kokkos::parallel_reduce( str, exec_policy, neigh_reduce, reduce_val ); + Kokkos::parallel_reduce( str, linear_exec_policy, neigh_reduce, + reduce_val ); } //---------------------------------------------------------------------------// From 3f47c0d539435ca8dc4cc8a0b831438a6f23fbb8 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Mon, 21 Jun 2021 15:27:21 -0400 Subject: [PATCH 3/3] Add functor parallel checks for reduce and second neighbors --- core/unit_test/neighbor_unit_test.hpp | 482 ++++++++++++++++++----- core/unit_test/tstNeighborList.hpp | 31 +- core/unit_test/tstNeighborListArborX.hpp | 75 +++- 3 files changed, 464 insertions(+), 124 deletions(-) diff --git a/core/unit_test/neighbor_unit_test.hpp b/core/unit_test/neighbor_unit_test.hpp index 6029318ed..ae27264c8 100644 --- a/core/unit_test/neighbor_unit_test.hpp +++ b/core/unit_test/neighbor_unit_test.hpp @@ -302,11 +302,127 @@ void checkFullNeighborListPartialRange( const ListType& nlist, } } +//---------------------------------------------------------------------------// +template +void checkFirstNeighborParallelFor( const TestListType& N2_list_copy, + const ViewType& serial_result, + const ViewType& team_result, + const int multiplier ) +{ + double num_particle = serial_result.size(); + Kokkos::View N2_result( "N2_result", + num_particle ); + + // Use a full N^2 neighbor list to check against. + for ( int p = 0; p < num_particle; ++p ) + for ( int n = 0; n < N2_list_copy.counts( p ); ++n ) + N2_result( p ) += N2_list_copy.neighbors( p, n ); + + // Check the result. + auto serial_mirror = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), serial_result ); + auto team_mirror = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), team_result ); + for ( int p = 0; p < num_particle; ++p ) + { + EXPECT_EQ( N2_result( p ) * multiplier, serial_mirror( p ) ); + EXPECT_EQ( N2_result( p ) * multiplier, team_mirror( p ) ); + } +} + +template +void checkSecondNeighborParallelFor( const TestListType& N2_list_copy, + const ViewType& serial_result, + const ViewType& team_result, + const ViewType& vector_result, + const int multiplier ) +{ + double num_particle = serial_result.size(); + Kokkos::View N2_result( "N2_result", + num_particle ); + + // Use a full N^2 neighbor list to check against. + for ( int p = 0; p < num_particle; ++p ) + for ( int n = 0; n < N2_list_copy.counts( p ) - 1; ++n ) + for ( int a = n + 1; a < N2_list_copy.counts( p ); ++a ) + { + N2_result( p ) += N2_list_copy.neighbors( p, n ); + N2_result( p ) += N2_list_copy.neighbors( p, a ); + } + + // Check the result. + auto serial_mirror = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), serial_result ); + auto team_mirror = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), team_result ); + auto vector_mirror = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), vector_result ); + for ( int p = 0; p < num_particle; ++p ) + { + EXPECT_EQ( N2_result( p ) * multiplier, serial_mirror( p ) ); + EXPECT_EQ( N2_result( p ) * multiplier, team_mirror( p ) ); + EXPECT_EQ( N2_result( p ) * multiplier, vector_mirror( p ) ); + } +} + +template +void checkFirstNeighborParallelReduce( const TestListType& N2_list_copy, + const AoSoAType& aosoa, + const double serial_sum, + const double team_sum, + const int multiplier ) +{ + double num_particle = aosoa.size(); + + // Get the expected result from N^2 list in serial. + auto aosoa_mirror = + Cabana::create_mirror_view_and_copy( Kokkos::HostSpace(), aosoa ); + auto positions_mirror = Cabana::slice<0>( aosoa_mirror ); + double N2_sum = 0; + for ( int p = 0; p < num_particle; ++p ) + for ( int n = 0; n < N2_list_copy.counts( p ); ++n ) + N2_sum += positions_mirror( p, 0 ) + + positions_mirror( N2_list_copy.neighbors( p, n ), 0 ); + + // Check the result. + EXPECT_FLOAT_EQ( N2_sum * multiplier, serial_sum ); + EXPECT_FLOAT_EQ( N2_sum * multiplier, team_sum ); +} + +template +void checkSecondNeighborParallelReduce( const TestListType& N2_list_copy, + const AoSoAType& aosoa, + const double serial_sum, + const double team_sum, + const double vector_sum, + const int multiplier ) +{ + double num_particle = aosoa.size(); + + // Get the expected result from N^2 list in serial. + auto aosoa_mirror = + Cabana::create_mirror_view_and_copy( Kokkos::HostSpace(), aosoa ); + auto positions_mirror = Cabana::slice<0>( aosoa_mirror ); + double N2_sum = 0; + for ( int p = 0; p < num_particle; ++p ) + for ( int n = 0; n < N2_list_copy.counts( p ); ++n ) + for ( int a = n + 1; a < N2_list_copy.counts( p ); ++a ) + N2_sum += + positions_mirror( p, 0 ) + + positions_mirror( N2_list_copy.neighbors( p, n ), 0 ) + + positions_mirror( N2_list_copy.neighbors( p, a ), 0 ); + + // Check the result. + EXPECT_FLOAT_EQ( N2_sum * multiplier, serial_sum ); + EXPECT_FLOAT_EQ( N2_sum * multiplier, team_sum ); + EXPECT_FLOAT_EQ( N2_sum * multiplier, vector_sum ); +} + //---------------------------------------------------------------------------// template -void checkFirstNeighborParallelFor( const ListType& nlist, - const TestListType& N2_list_copy, - const int num_particle ) +void checkFirstNeighborParallelForLambda( const ListType& nlist, + const TestListType& N2_list_copy, + const int num_particle ) { // Create Kokkos views for the write operation. using memory_space = typename TEST_MEMSPACE::memory_space; @@ -335,33 +451,18 @@ void checkFirstNeighborParallelFor( const ListType& nlist, Cabana::TeamOpTag(), "test_1st_team" ); Kokkos::fence(); - // Use a full N^2 neighbor list to check against. - for ( int p = 0; p < num_particle; ++p ) - for ( int n = 0; n < N2_list_copy.counts( p ); ++n ) - N2_result( p ) += N2_list_copy.neighbors( p, n ); - - // Check the result. - auto serial_mirror = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), serial_result ); - auto team_mirror = - Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), team_result ); - for ( int p = 0; p < num_particle; ++p ) - { - EXPECT_EQ( N2_result( p ), serial_mirror( p ) ); - EXPECT_EQ( N2_result( p ), team_mirror( p ) ); - } + checkFirstNeighborParallelFor( N2_list_copy, serial_result, team_result, + 1 ); } //---------------------------------------------------------------------------// template -void checkSecondNeighborParallelFor( const ListType& nlist, - const TestListType& N2_list_copy, - const int num_particle ) +void checkSecondNeighborParallelForLambda( const ListType& nlist, + const TestListType& N2_list_copy, + const int num_particle ) { // Create Kokkos views for the write operation. using memory_space = typename TEST_MEMSPACE::memory_space; - Kokkos::View N2_result( "N2_result", - num_particle ); Kokkos::View serial_result( "serial_result", num_particle ); Kokkos::View team_result( "team_result", num_particle ); @@ -399,28 +500,8 @@ void checkSecondNeighborParallelFor( const ListType& nlist, Cabana::TeamVectorOpTag(), "test_2nd_vector" ); Kokkos::fence(); - // Use a full N^2 neighbor list to check against. - for ( int p = 0; p < num_particle; ++p ) - for ( int n = 0; n < N2_list_copy.counts( p ) - 1; ++n ) - for ( int a = n + 1; a < N2_list_copy.counts( p ); ++a ) - { - N2_result( p ) += N2_list_copy.neighbors( p, n ); - N2_result( p ) += N2_list_copy.neighbors( p, a ); - } - - // Check the result. - auto serial_mirror = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), serial_result ); - auto team_mirror = - Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), team_result ); - auto vector_mirror = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), vector_result ); - for ( int p = 0; p < num_particle; ++p ) - { - EXPECT_EQ( N2_result( p ), serial_mirror( p ) ); - EXPECT_EQ( N2_result( p ), team_mirror( p ) ); - EXPECT_EQ( N2_result( p ), vector_mirror( p ) ); - } + checkSecondNeighborParallelFor( N2_list_copy, serial_result, team_result, + vector_result, 1 ); } //---------------------------------------------------------------------------// @@ -505,9 +586,9 @@ void checkSplitFirstNeighborParallelFor( const ListType& nlist, //---------------------------------------------------------------------------// template -void checkFirstNeighborParallelReduce( const ListType& nlist, - const TestListType& N2_list_copy, - const AoSoAType& aosoa ) +void checkFirstNeighborParallelReduceLambda( const ListType& nlist, + const TestListType& N2_list_copy, + const AoSoAType& aosoa ) { // Test the list parallel operation by adding a value from each neighbor // to the particle and compare to counts. @@ -531,26 +612,15 @@ void checkFirstNeighborParallelReduce( const ListType& nlist, team_sum, "test_reduce_team" ); Kokkos::fence(); - // Get the expected result from N^2 list in serial. - auto aosoa_mirror = - Cabana::create_mirror_view_and_copy( Kokkos::HostSpace(), aosoa ); - auto positions_mirror = Cabana::slice<0>( aosoa_mirror ); - double N2_sum = 0; - for ( int p = 0; p < num_particle; ++p ) - for ( int n = 0; n < N2_list_copy.counts( p ); ++n ) - N2_sum += positions_mirror( p, 0 ) + - positions_mirror( N2_list_copy.neighbors( p, n ), 0 ); - - // Check the result. - EXPECT_FLOAT_EQ( N2_sum, serial_sum ); - EXPECT_FLOAT_EQ( N2_sum, team_sum ); + checkFirstNeighborParallelReduce( N2_list_copy, aosoa, serial_sum, team_sum, + 1 ); } //---------------------------------------------------------------------------// template -void checkSecondNeighborParallelReduce( const ListType& nlist, - const TestListType& N2_list_copy, - const AoSoAType& aosoa ) +void checkSecondNeighborParallelReduceLambda( const ListType& nlist, + const TestListType& N2_list_copy, + const AoSoAType& aosoa ) { // Test the list parallel operation by adding a value from each neighbor // to the particle and compare to counts. @@ -579,37 +649,24 @@ void checkSecondNeighborParallelReduce( const ListType& nlist, Cabana::TeamVectorOpTag(), vector_sum, "test_reduce_vector" ); Kokkos::fence(); - // Get the expected result from N^2 list in serial. - auto aosoa_mirror = - Cabana::create_mirror_view_and_copy( Kokkos::HostSpace(), aosoa ); - auto positions_mirror = Cabana::slice<0>( aosoa_mirror ); - double N2_sum = 0; - for ( int p = 0; p < num_particle; ++p ) - for ( int n = 0; n < N2_list_copy.counts( p ); ++n ) - for ( int a = n + 1; a < N2_list_copy.counts( p ); ++a ) - N2_sum += - positions_mirror( p, 0 ) + - positions_mirror( N2_list_copy.neighbors( p, n ), 0 ) + - positions_mirror( N2_list_copy.neighbors( p, a ), 0 ); - - // Check the result. - EXPECT_FLOAT_EQ( N2_sum, serial_sum ); - EXPECT_FLOAT_EQ( N2_sum, team_sum ); - EXPECT_FLOAT_EQ( N2_sum, vector_sum ); + checkSecondNeighborParallelReduce( N2_list_copy, aosoa, serial_sum, + team_sum, vector_sum, 1 ); } //---------------------------------------------------------------------------// -// Functor work tag for only assigning half the value. +// Check parallel with functor (with and without work tag) + +// Functor work tag for assigning double the value. class DoubleValueWorkTag { }; template -struct NeighParallelOp +struct FirstNeighForOp { ViewType _result; - NeighParallelOp( const int num_particle ) + FirstNeighForOp( const int num_particle ) { _result = ViewType( "result", num_particle ); } @@ -657,13 +714,11 @@ void checkFirstNeighborParallelForFunctor( const ListType& nlist, // Create Kokkos views for the write operation. using memory_space = typename TEST_MEMSPACE::memory_space; using view_type = Kokkos::View; - Kokkos::View N2_result( "N2_result", - num_particle ); // Test the list parallel operation by adding a value from each neighbor // to the particle and compare to counts using a functor. - NeighParallelOp serial_functor( num_particle ); - NeighParallelOp team_functor( num_particle ); + FirstNeighForOp serial_functor( num_particle ); + FirstNeighForOp team_functor( num_particle ); Cabana::neighbor_parallel_for( policy, serial_functor, nlist, Cabana::FirstNeighborsTag(), @@ -673,21 +728,248 @@ void checkFirstNeighborParallelForFunctor( const ListType& nlist, Cabana::TeamOpTag(), "test_1st_team" ); Kokkos::fence(); - // Use a full N^2 neighbor list to check against. - for ( int p = 0; p < num_particle; ++p ) - for ( int n = 0; n < N2_list_copy.counts( p ); ++n ) - N2_result( p ) += N2_list_copy.neighbors( p, n ); + checkFirstNeighborParallelFor( N2_list_copy, serial_functor._result, + team_functor._result, multiplier ); +} - // Check the result. - auto serial_mirror = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), serial_functor._result ); - auto team_mirror = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), team_functor._result ); - for ( int p = 0; p < num_particle; ++p ) +//---------------------------------------------------------------------------// +template +struct SecondNeighForOp +{ + ViewType _result; + + SecondNeighForOp( const int num_particle ) { - EXPECT_EQ( N2_result( p ) * multiplier, serial_mirror( p ) ); - EXPECT_EQ( N2_result( p ) * multiplier, team_mirror( p ) ); + _result = ViewType( "result", num_particle ); + } + + // tagged version that assigns double the value. + KOKKOS_INLINE_FUNCTION void operator()( const DoubleValueWorkTag&, + const int i, const int n, + const int a ) const + { + Kokkos::atomic_add( &_result( i ), 2 * ( n + a ) ); + } + KOKKOS_INLINE_FUNCTION void operator()( const int i, const int n, + const int a ) const + { + Kokkos::atomic_add( &_result( i ), n + a ); } +}; + +template +void checkSecondNeighborParallelForFunctor( const ListType& nlist, + const TestListType& N2_list_copy, + const int num_particle, + const bool use_tag ) +{ + if ( use_tag ) + { + Kokkos::RangePolicy policy( + 0, num_particle ); + checkSecondNeighborParallelForFunctor( nlist, N2_list_copy, + num_particle, policy, 2 ); + } + else + { + Kokkos::RangePolicy policy( 0, num_particle ); + checkSecondNeighborParallelForFunctor( nlist, N2_list_copy, + num_particle, policy, 1 ); + } +} + +template +void checkSecondNeighborParallelForFunctor( const ListType& nlist, + const TestListType& N2_list_copy, + const int num_particle, + const PolicyType policy, + const int multiplier ) +{ + // Create Kokkos views for the write operation. + using memory_space = typename TEST_MEMSPACE::memory_space; + using view_type = Kokkos::View; + + // Test the list parallel operation by adding a value from each neighbor + // to the particle and compare to counts using a functor. + SecondNeighForOp serial_functor( num_particle ); + SecondNeighForOp team_functor( num_particle ); + SecondNeighForOp vector_functor( num_particle ); + + Cabana::neighbor_parallel_for( policy, serial_functor, nlist, + Cabana::SecondNeighborsTag(), + Cabana::SerialOpTag(), "test_2nd_serial" ); + Cabana::neighbor_parallel_for( policy, team_functor, nlist, + Cabana::SecondNeighborsTag(), + Cabana::TeamOpTag(), "test_2nd_team" ); + Cabana::neighbor_parallel_for( + policy, vector_functor, nlist, Cabana::SecondNeighborsTag(), + Cabana::TeamVectorOpTag(), "test_2nd_vector" ); + + Kokkos::fence(); + + checkSecondNeighborParallelFor( N2_list_copy, serial_functor._result, + team_functor._result, + vector_functor._result, multiplier ); +} + +//---------------------------------------------------------------------------// +// Check parallel reductions with functor (with and without work tag) +template +struct FirstNeighReduceOp +{ + PositionSlice _position; + + FirstNeighReduceOp( const PositionSlice position ) + : _position( position ) + { + } + + // tagged version that assigns double the value. + KOKKOS_INLINE_FUNCTION void operator()( const DoubleValueWorkTag&, + const int i, const int n, + double& sum ) const + { + sum += ( _position( i, 0 ) + _position( n, 0 ) ) * 2; + } + KOKKOS_INLINE_FUNCTION void operator()( const int i, const int n, + double& sum ) const + { + sum += _position( i, 0 ) + _position( n, 0 ); + } +}; + +template +void checkFirstNeighborParallelReduceFunctor( const ListType& nlist, + const TestListType& N2_list_copy, + const AoSoAType aosoa, + const bool use_tag ) +{ + double num_particle = aosoa.size(); + if ( use_tag ) + { + Kokkos::RangePolicy policy( + 0, num_particle ); + checkFirstNeighborParallelReduceFunctor( nlist, N2_list_copy, aosoa, + policy, 2 ); + } + else + { + Kokkos::RangePolicy policy( 0, num_particle ); + checkFirstNeighborParallelReduceFunctor( nlist, N2_list_copy, aosoa, + policy, 1 ); + } +} + +template +void checkFirstNeighborParallelReduceFunctor( const ListType& nlist, + const TestListType& N2_list_copy, + const AoSoAType& aosoa, + const PolicyType policy, + const int multiplier ) +{ + // Test the list parallel operation by adding a value from each neighbor + // to the particle and compare to counts. + auto position = Cabana::slice<0>( aosoa ); + using slice_type = typename AoSoAType::template member_slice_type<0>; + + FirstNeighReduceOp serial_sum_functor( position ); + FirstNeighReduceOp team_sum_functor( position ); + + // Do the reductions. + double serial_sum = 0; + Cabana::neighbor_parallel_reduce( + policy, serial_sum_functor, nlist, Cabana::FirstNeighborsTag(), + Cabana::SerialOpTag(), serial_sum, "test_reduce_serial" ); + double team_sum = 0; + Cabana::neighbor_parallel_reduce( + policy, team_sum_functor, nlist, Cabana::FirstNeighborsTag(), + Cabana::TeamOpTag(), team_sum, "test_reduce_team" ); + Kokkos::fence(); + + checkFirstNeighborParallelReduce( N2_list_copy, aosoa, serial_sum, team_sum, + multiplier ); +} + +template +struct SecondNeighReduceOp +{ + PositionSlice _position; + + SecondNeighReduceOp( const PositionSlice position ) + : _position( position ) + { + } + + // tagged version that assigns double the value. + KOKKOS_INLINE_FUNCTION void operator()( const DoubleValueWorkTag&, + const int i, const int n, + const int a, double& sum ) const + { + sum += + ( _position( i, 0 ) + _position( n, 0 ) + _position( a, 0 ) ) * 2; + } + KOKKOS_INLINE_FUNCTION void operator()( const int i, const int n, + const int a, double& sum ) const + { + sum += _position( i, 0 ) + _position( n, 0 ) + _position( a, 0 ); + } +}; + +template +void checkSecondNeighborParallelReduceFunctor( const ListType& nlist, + const TestListType& N2_list_copy, + const AoSoAType aosoa, + const bool use_tag ) +{ + double num_particle = aosoa.size(); + if ( use_tag ) + { + Kokkos::RangePolicy policy( + 0, num_particle ); + checkSecondNeighborParallelReduceFunctor( nlist, N2_list_copy, aosoa, + policy, 2 ); + } + else + { + Kokkos::RangePolicy policy( 0, num_particle ); + checkSecondNeighborParallelReduceFunctor( nlist, N2_list_copy, aosoa, + policy, 1 ); + } +} + +template +void checkSecondNeighborParallelReduceFunctor( const ListType& nlist, + const TestListType& N2_list_copy, + const AoSoAType& aosoa, + const PolicyType policy, + const int multiplier ) +{ + // Test the list parallel operation by adding a value from each neighbor + // to the particle and compare to counts. + auto position = Cabana::slice<0>( aosoa ); + using slice_type = typename AoSoAType::template member_slice_type<0>; + + SecondNeighReduceOp serial_sum_functor( position ); + SecondNeighReduceOp team_sum_functor( position ); + SecondNeighReduceOp vector_sum_functor( position ); + + // Do the reductions. + double serial_sum = 0; + Cabana::neighbor_parallel_reduce( + policy, serial_sum_functor, nlist, Cabana::SecondNeighborsTag(), + Cabana::SerialOpTag(), serial_sum, "test_reduce_serial" ); + double team_sum = 0; + Cabana::neighbor_parallel_reduce( + policy, team_sum_functor, nlist, Cabana::SecondNeighborsTag(), + Cabana::TeamOpTag(), team_sum, "test_reduce_team" ); + double vector_sum = 0; + Cabana::neighbor_parallel_reduce( + policy, vector_sum_functor, nlist, Cabana::SecondNeighborsTag(), + Cabana::TeamVectorOpTag(), vector_sum, "test_reduce_vector" ); + Kokkos::fence(); + + checkSecondNeighborParallelReduce( N2_list_copy, aosoa, serial_sum, + team_sum, vector_sum, multiplier ); } //---------------------------------------------------------------------------// diff --git a/core/unit_test/tstNeighborList.hpp b/core/unit_test/tstNeighborList.hpp index cb402d797..69ec8c9e8 100644 --- a/core/unit_test/tstNeighborList.hpp +++ b/core/unit_test/tstNeighborList.hpp @@ -234,11 +234,11 @@ void testNeighborParallelFor() test_data.cell_size_ratio, test_data.grid_min, test_data.grid_max ); - checkFirstNeighborParallelFor( nlist, test_data.N2_list_copy, - test_data.num_particle ); + checkFirstNeighborParallelForLambda( nlist, test_data.N2_list_copy, + test_data.num_particle ); - checkSecondNeighborParallelFor( nlist, test_data.N2_list_copy, - test_data.num_particle ); + checkSecondNeighborParallelForLambda( nlist, test_data.N2_list_copy, + test_data.num_particle ); checkSplitFirstNeighborParallelFor( nlist, test_data.N2_list_copy, test_data.num_particle ); @@ -247,6 +247,11 @@ void testNeighborParallelFor() test_data.num_particle, true ); checkFirstNeighborParallelForFunctor( nlist, test_data.N2_list_copy, test_data.num_particle, false ); + + checkSecondNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, true ); + checkSecondNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, false ); } //---------------------------------------------------------------------------// @@ -264,11 +269,21 @@ void testNeighborParallelReduce() test_data.cell_size_ratio, test_data.grid_min, test_data.grid_max ); - checkFirstNeighborParallelReduce( nlist, test_data.N2_list_copy, - test_data.aosoa ); + checkFirstNeighborParallelReduceLambda( nlist, test_data.N2_list_copy, + test_data.aosoa ); + + checkSecondNeighborParallelReduceLambda( nlist, test_data.N2_list_copy, + test_data.aosoa ); + + checkFirstNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, true ); + checkFirstNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, false ); - checkSecondNeighborParallelReduce( nlist, test_data.N2_list_copy, - test_data.aosoa ); + checkSecondNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, true ); + checkSecondNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, false ); } //---------------------------------------------------------------------------// diff --git a/core/unit_test/tstNeighborListArborX.hpp b/core/unit_test/tstNeighborListArborX.hpp index 460f04e80..262f4d025 100644 --- a/core/unit_test/tstNeighborListArborX.hpp +++ b/core/unit_test/tstNeighborListArborX.hpp @@ -201,14 +201,24 @@ void testNeighborArborXParallelFor() Cabana::FullNeighborTag{}, position, 0, position.size(), test_data.test_radius ); - checkFirstNeighborParallelFor( nlist, test_data.N2_list_copy, - test_data.num_particle ); + checkFirstNeighborParallelForLambda( nlist, test_data.N2_list_copy, + test_data.num_particle ); - checkSecondNeighborParallelFor( nlist, test_data.N2_list_copy, - test_data.num_particle ); + checkSecondNeighborParallelForLambda( nlist, test_data.N2_list_copy, + test_data.num_particle ); checkSplitFirstNeighborParallelFor( nlist, test_data.N2_list_copy, test_data.num_particle ); + + checkFirstNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, true ); + checkFirstNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, false ); + + checkSecondNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, true ); + checkSecondNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, false ); } { // Create the neighbor list. @@ -218,11 +228,24 @@ void testNeighborArborXParallelFor() Cabana::FullNeighborTag{}, position, 0, position.size(), test_data.test_radius ); - checkFirstNeighborParallelFor( nlist, test_data.N2_list_copy, - test_data.num_particle ); + checkFirstNeighborParallelForLambda( nlist, test_data.N2_list_copy, + test_data.num_particle ); + + checkSecondNeighborParallelForLambda( nlist, test_data.N2_list_copy, + test_data.num_particle ); + + checkSplitFirstNeighborParallelFor( nlist, test_data.N2_list_copy, + test_data.num_particle ); - checkSecondNeighborParallelFor( nlist, test_data.N2_list_copy, - test_data.num_particle ); + checkFirstNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, true ); + checkFirstNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, false ); + + checkSecondNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, true ); + checkSecondNeighborParallelForFunctor( nlist, test_data.N2_list_copy, + test_data.num_particle, false ); } } @@ -240,11 +263,21 @@ void testNeighborArborXParallelReduce() Cabana::FullNeighborTag{}, position, 0, position.size(), test_data.test_radius ); - checkFirstNeighborParallelReduce( nlist, test_data.N2_list_copy, - test_data.aosoa ); + checkFirstNeighborParallelReduceLambda( nlist, test_data.N2_list_copy, + test_data.aosoa ); + + checkSecondNeighborParallelReduceLambda( nlist, test_data.N2_list_copy, + test_data.aosoa ); + + checkFirstNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, true ); + checkFirstNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, false ); - checkSecondNeighborParallelReduce( nlist, test_data.N2_list_copy, - test_data.aosoa ); + checkSecondNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, true ); + checkSecondNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, false ); } { // Create the neighbor list. @@ -254,11 +287,21 @@ void testNeighborArborXParallelReduce() Cabana::FullNeighborTag{}, position, 0, position.size(), test_data.test_radius ); - checkFirstNeighborParallelReduce( nlist, test_data.N2_list_copy, - test_data.aosoa ); + checkFirstNeighborParallelReduceLambda( nlist, test_data.N2_list_copy, + test_data.aosoa ); + + checkSecondNeighborParallelReduceLambda( nlist, test_data.N2_list_copy, + test_data.aosoa ); + + checkFirstNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, true ); + checkFirstNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, false ); - checkSecondNeighborParallelReduce( nlist, test_data.N2_list_copy, - test_data.aosoa ); + checkSecondNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, true ); + checkSecondNeighborParallelReduceFunctor( nlist, test_data.N2_list_copy, + test_data.aosoa, false ); } }