diff --git a/core/distributed/dd_matrix.cpp b/core/distributed/dd_matrix.cpp index 8c8de7075b1..cfb2d65db00 100644 --- a/core/distributed/dd_matrix.cpp +++ b/core/distributed/dd_matrix.cpp @@ -10,10 +10,10 @@ #include #include +#include "core/components/fill_array_kernels.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/distributed/dd_matrix_kernels.hpp" - namespace gko { namespace experimental { namespace distributed { @@ -23,6 +23,7 @@ namespace { GKO_REGISTER_OPERATION(filter_non_owning_idxs, distributed_dd_matrix::filter_non_owning_idxs); +GKO_REGISTER_OPERATION(fill_seq_array, components::fill_seq_array); GKO_REGISTER_OPERATION(prefix_sum_nonnegative, components::prefix_sum_nonnegative); @@ -149,9 +150,23 @@ void DdMatrix::read_distributed( std::shared_ptr> col_partition) { + const auto comm = this->get_communicator(); + GKO_ASSERT_EQ(data.get_size()[0], row_partition->get_size()); + GKO_ASSERT_EQ(data.get_size()[1], col_partition->get_size()); + GKO_ASSERT_EQ(comm.size(), row_partition->get_num_parts()); + GKO_ASSERT_EQ(comm.size(), col_partition->get_num_parts()); auto exec = this->get_executor(); - auto comm = this->get_communicator(); auto local_part = comm.rank(); + auto use_host_buffer = mpi::requires_host_buffer(exec, comm); + auto tmp_row_partition = make_temporary_clone(exec, row_partition); + auto tmp_col_partition = make_temporary_clone(exec, col_partition); + + // set up LinOp sizes + auto global_num_rows = row_partition->get_size(); + auto global_num_cols = col_partition->get_size(); + dim<2> global_dim{global_num_rows, global_num_cols}; + this->set_size(global_dim); + size_type num_parts = comm.size(); array non_owning_row_idxs{exec}; array non_owning_col_idxs{exec}; @@ -185,78 +200,69 @@ void DdMatrix::read_distributed( dim<2>{static_cast(local_num_rows), static_cast(local_num_cols)}, local_row_idxs, local_col_idxs, arrays.values}; + local_data.sort_row_major(); as>(this->local_mtx_) ->read(std::move(local_data)); - // // Gather local sizes from all ranks and build the partition in the - // enriched - // // space. - // array range_bounds{exec, num_parts + 1}; - // comm.all_gather(exec, &local_num_rows, 1, range_bounds.get_data(), 1); - // exec->run(dd_matrix::make_prefix_sum_nonnegative(range_bounds.get_data(), - // num_parts + 1)); - // auto large_partition = - // share(Partition::build_from_contiguous( - // exec, range_bounds)); - - // // Build the restricion and prolongation operators. - // array remote_idxs{exec, 0}; - // auto enriched_map = - // gko::experimental::distributed::index_map( - // exec, large_partition, local_part, remote_idxs); - // auto restrict_col_idxs = - // make_const_array_view( - // exec, static_cast(local_num_cols), - // col_map.get_remote_global_idxs().get_const_flat_data()) - // .copy_to_array(); - // auto restrict_row_idxs = - // make_const_array_view( - // exec, static_cast(local_num_rows), - // enriched_map.get_remote_global_idxs().get_const_flat_data()) - // .copy_to_array(); - // array restrict_values{exec, - // static_cast(local_num_rows)}; - // restrict_values.fill(one()); - // device_matrix_data restrict_data{ - // exec, dim<2>{large_partition->get_size(), col_partition->get_size()}, - // std::move(restrict_row_idxs), std::move(restrict_col_idxs), - // std::move(restrict_values)}; - // restriction_ = - // Matrix::create(exec, - // comm); - // restriction_->read_distributed(restrict_data, large_partition, - // col_partition); - // auto prolongate_col_idxs = - // make_const_array_view( - // exec, static_cast(local_num_rows), - // enriched_map.get_remote_global_idxs().get_const_flat_data()) - // .copy_to_array(); - // auto prolongate_row_idxs = - // make_const_array_view( - // exec, static_cast(local_num_cols), - // row_map.get_remote_global_idxs().get_const_flat_data()) - // .copy_to_array(); - // array prolongate_values{exec, - // static_cast(local_num_rows)}; - // prolongate_values.fill(one()); - // device_matrix_data prolongate_data{ - // exec, dim<2>{large_partition->get_size(), col_partition->get_size()}, - // std::move(prolongate_row_idxs), std::move(prolongate_col_idxs), - // std::move(prolongate_values)}; - // prolongation_ = - // Matrix::create(exec, - // comm); - // prolongation_->read_distributed(prolongate_data, row_partition, - // large_partition, - // assembly_mode::communicate); - - // dim<2> global_buffer_size{large_partition->get_size(), 1u}; - // dim<2> local_buffer_size{static_cast(local_num_rows), 1u}; - // lhs_buffer_ = Vector::create(exec, comm, global_buffer_size, - // local_buffer_size); rhs_buffer_ = Vector::create(exec, comm, - // global_buffer_size, local_buffer_size); + // Gather local sizes from all ranks and build the partition in the enriched + // space. + array range_bounds{exec, num_parts + 1}; + comm.all_gather(exec, &local_num_rows, 1, range_bounds.get_data(), 1); + exec->run(dd_matrix::make_prefix_sum_nonnegative(range_bounds.get_data(), + num_parts + 1)); + auto large_partition = + share(Partition::build_from_contiguous( + exec, range_bounds)); + + // Build the restricion and prolongation operators. + array remote_idxs{exec, 0}; + auto enriched_map = + gko::experimental::distributed::index_map( + exec, large_partition, local_part, remote_idxs); + array local_idxs{exec, + static_cast(local_num_rows)}; + exec->run(dd_matrix::make_fill_seq_array( + local_idxs.get_data(), static_cast(local_num_rows))); + auto restrict_col_idxs = + col_map.map_to_global(local_idxs, index_space::combined); + auto restrict_row_idxs = + enriched_map.map_to_global(local_idxs, index_space::combined); + array restrict_values{exec, + static_cast(local_num_rows)}; + restrict_values.fill(one()); + device_matrix_data restrict_data{ + exec, dim<2>{large_partition->get_size(), col_partition->get_size()}, + std::move(restrict_row_idxs), std::move(restrict_col_idxs), + std::move(restrict_values)}; + restriction_ = + Matrix::create(exec, comm); + restriction_->read_distributed(restrict_data, large_partition, + col_partition); + auto prolongate_col_idxs = + enriched_map.map_to_global(local_idxs, index_space::combined); + auto prolongate_row_idxs = + row_map.map_to_global(local_idxs, index_space::combined); + array prolongate_values{exec, + static_cast(local_num_rows)}; + prolongate_values.fill(one()); + device_matrix_data prolongate_data{ + exec, dim<2>{row_partition->get_size(), large_partition->get_size()}, + std::move(prolongate_row_idxs), std::move(prolongate_col_idxs), + std::move(prolongate_values)}; + prolongation_ = + Matrix::create(exec, comm); + prolongation_->read_distributed(prolongate_data, row_partition, + large_partition, + assembly_mode::communicate); + + // Create buffers for SpMV + dim<2> global_buffer_size{large_partition->get_size(), 1u}; + dim<2> local_buffer_size{static_cast(local_num_rows), 1u}; + lhs_buffer_ = Vector::create(exec, comm, global_buffer_size, + local_buffer_size); + rhs_buffer_ = Vector::create(exec, comm, global_buffer_size, + local_buffer_size); } @@ -301,13 +307,91 @@ void DdMatrix::read_distributed( template void DdMatrix::apply_impl( const LinOp* b, LinOp* x) const -{} +{ + auto exec = this->get_executor(); + auto comm = this->get_communicator(); + const auto nrhs = x->get_size()[1]; + if (nrhs != rhs_buffer_->get_size()[1]) { + dim<2> local_buffer_size{rhs_buffer_->get_local_vector()->get_size()[0], + nrhs}; + dim<2> global_buffer_size{rhs_buffer_->get_size()[0], nrhs}; + lhs_buffer_ = Vector::create(exec, comm, global_buffer_size, + local_buffer_size); + rhs_buffer_ = Vector::create(exec, comm, global_buffer_size, + local_buffer_size); + } + distributed::precision_dispatch_real_complex( + [this](const auto dense_b, auto dense_x) { + auto exec = this->get_executor(); + this->restriction_->apply(dense_b, lhs_buffer_); + + auto local_b = gko::matrix::Dense::create( + exec, lhs_buffer_->get_local_vector()->get_size(), + gko::make_array_view( + exec, + lhs_buffer_->get_local_vector()->get_num_stored_elements(), + lhs_buffer_->get_local_values()), + lhs_buffer_->get_local_vector()->get_stride()); + auto local_x = gko::matrix::Dense::create( + exec, rhs_buffer_->get_local_vector()->get_size(), + gko::make_array_view( + exec, + rhs_buffer_->get_local_vector()->get_num_stored_elements(), + rhs_buffer_->get_local_values()), + rhs_buffer_->get_local_vector()->get_stride()); + + local_mtx_->apply(local_b, local_x); + + this->prolongation_->apply(rhs_buffer_, dense_x); + }, + b, x); +} template void DdMatrix::apply_impl( const LinOp* alpha, const LinOp* b, const LinOp* beta, LinOp* x) const -{} +{ + auto exec = this->get_executor(); + auto comm = this->get_communicator(); + const auto nrhs = x->get_size()[1]; + if (nrhs != rhs_buffer_->get_size()[1]) { + dim<2> local_buffer_size{rhs_buffer_->get_local_vector()->get_size()[0], + nrhs}; + dim<2> global_buffer_size{rhs_buffer_->get_size()[0], nrhs}; + lhs_buffer_ = Vector::create(exec, comm, global_buffer_size, + local_buffer_size); + rhs_buffer_ = Vector::create(exec, comm, global_buffer_size, + local_buffer_size); + } + distributed::precision_dispatch_real_complex( + [this](const auto local_alpha, const auto dense_b, + const auto local_beta, auto dense_x) { + auto exec = this->get_executor(); + this->restriction_->apply(dense_b, lhs_buffer_); + + auto local_b = gko::matrix::Dense::create( + exec, lhs_buffer_->get_local_vector()->get_size(), + gko::make_array_view( + exec, + lhs_buffer_->get_local_vector()->get_num_stored_elements(), + lhs_buffer_->get_local_values()), + lhs_buffer_->get_local_vector()->get_stride()); + auto local_x = gko::matrix::Dense::create( + exec, rhs_buffer_->get_local_vector()->get_size(), + gko::make_array_view( + exec, + rhs_buffer_->get_local_vector()->get_num_stored_elements(), + rhs_buffer_->get_local_values()), + rhs_buffer_->get_local_vector()->get_stride()); + + local_mtx_->apply(local_b, local_x); + + this->prolongation_->apply(local_alpha, rhs_buffer_, local_beta, + dense_x); + }, + alpha, b, beta, x); +} template diff --git a/include/ginkgo/core/distributed/dd_matrix.hpp b/include/ginkgo/core/distributed/dd_matrix.hpp index 1a99c3107ae..361b47c1773 100644 --- a/include/ginkgo/core/distributed/dd_matrix.hpp +++ b/include/ginkgo/core/distributed/dd_matrix.hpp @@ -450,8 +450,8 @@ class DdMatrix std::shared_ptr restriction_; std::shared_ptr local_mtx_; std::shared_ptr prolongation_; - std::shared_ptr lhs_buffer_; - std::shared_ptr rhs_buffer_; + mutable std::shared_ptr lhs_buffer_; + mutable std::shared_ptr rhs_buffer_; }; diff --git a/test/mpi/dd_matrix.cpp b/test/mpi/dd_matrix.cpp index 484edd1e612..1f127ce9156 100644 --- a/test/mpi/dd_matrix.cpp +++ b/test/mpi/dd_matrix.cpp @@ -29,7 +29,7 @@ template -class MatrixCreation : public CommonMpiTestFixture { +class DdMatrix : public CommonMpiTestFixture { protected: using value_type = typename std::tuple_element< 0, decltype(ValueLocalGlobalIndexType())>::type; @@ -50,9 +50,10 @@ class MatrixCreation : public CommonMpiTestFixture { global_index_type>; using matrix_data = gko::matrix_data; using local_matrix_data = gko::matrix_data; + using dense_vec_type = gko::matrix::Dense; - MatrixCreation() + DdMatrix() : size{12, 12}, dist_input{ {{size, {{0, 0, 2}, {0, 1, -1}, {0, 3, -1}, {1, 0, -1}, @@ -61,7 +62,7 @@ class MatrixCreation : public CommonMpiTestFixture { {3, 4, -0.5}, {4, 1, -1}, {4, 3, -0.5}, {4, 4, 2}, {4, 5, -0.5}, {5, 2, -1}, {5, 4, -0.5}, {5, 5, 1.5}}}, {size, {{3, 3, 1.5}, {3, 4, -0.5}, {3, 6, -1}, {4, 3, -0.5}, - {4, 4, 2}, {4, 5, -0.4}, {4, 7, -1}, {5, 4, -0.5}, + {4, 4, 2}, {4, 5, -0.5}, {4, 7, -1}, {5, 4, -0.5}, {5, 5, 1.5}, {5, 8, -1}, {6, 3, -1}, {6, 6, 1.5}, {6, 7, -0.5}, {7, 4, -1}, {7, 6, -0.5}, {7, 7, 2}, {7, 8, -0.5}, {8, 5, -1}, {8, 7, -0.5}, {8, 8, 1.5}}}, @@ -80,17 +81,17 @@ class MatrixCreation : public CommonMpiTestFixture { {3, 4, -0.5}, {4, 1, -1}, {4, 3, -0.5}, {4, 4, 2}, {4, 5, -0.5}, {5, 2, -1}, {5, 4, -0.5}, {5, 5, 1.5}}}, {local_size, - {{4, 4, 1.5}, {4, 0, -0.5}, {4, 2, -1}, {0, 4, -0.5}, - {0, 0, 2}, {0, 1, -0.4}, {0, 3, -1}, {1, 0, -0.5}, - {1, 1, 1.5}, {1, 5, -1}, {2, 3, -1}, {2, 2, 1.5}, - {2, 3, -0.5}, {3, 0, -1}, {3, 2, -0.5}, {3, 3, 2}, - {3, 5, -0.5}, {5, 1, -1}, {5, 3, -0.5}, {5, 5, 1.5}}}, + {{0, 0, 2}, {0, 1, -0.5}, {0, 3, -1}, {0, 4, -0.5}, + {1, 0, -0.5}, {1, 1, 1.5}, {1, 5, -1}, {2, 2, 1.5}, + {2, 3, -0.5}, {2, 4, -1}, {3, 0, -1}, {3, 2, -0.5}, + {3, 3, 2}, {3, 5, -0.5}, {4, 0, -0.5}, {4, 2, -1}, + {4, 4, 1.5}, {5, 1, -1}, {5, 3, -0.5}, {5, 5, 1.5}}}, {local_size, - {{4, 4, 1.5}, {4, 5, -0.5}, {4, 1, -1}, {5, 4, -0.5}, - {5, 5, 2}, {5, 0, -0.5}, {5, 2, -1}, {0, 5, -0.5}, - {0, 0, 1.5}, {0, 3, -1}, {1, 4, -1}, {1, 1, 2}, - {1, 2, -1}, {2, 5, -1}, {2, 1, -1}, {2, 2, 3}, - {2, 3, -1}, {3, 0, -1}, {3, 2, -1}, {3, 3, 2}}}}}, + {{0, 0, 1.5}, {0, 3, -1}, {0, 5, -0.5}, {1, 1, 2}, + {1, 2, -1}, {1, 4, -1}, {2, 1, -1}, {2, 2, 3}, + {2, 3, -1}, {2, 5, -1}, {3, 0, -1}, {3, 2, -1}, + {3, 3, 2}, {4, 1, -1}, {4, 4, 1.5}, {4, 5, -0.5}, + {5, 0, -0.5}, {5, 2, -1}, {5, 4, -0.5}, {5, 5, 2}}}}}, engine(42) { row_part = Partition::build_from_contiguous( @@ -98,6 +99,8 @@ class MatrixCreation : public CommonMpiTestFixture { exec, I{0, 4, 8, 12})); dist_mat = dd_mtx_type::create(exec, comm); + x = dist_vec_type::create(ref, comm); + y = dist_vec_type::create(ref, comm); } void SetUp() override { ASSERT_EQ(comm.size(), 3); } @@ -114,591 +117,93 @@ class MatrixCreation : public CommonMpiTestFixture { std::unique_ptr dist_mat; + std::unique_ptr x; + std::unique_ptr y; + std::default_random_engine engine; }; -TYPED_TEST_SUITE(MatrixCreation, gko::test::ValueLocalGlobalIndexTypes, +TYPED_TEST_SUITE(DdMatrix, gko::test::ValueLocalGlobalIndexTypes, TupleTypenameNameGenerator); -TYPED_TEST(MatrixCreation, ReadsDistributed) +TYPED_TEST(DdMatrix, ReadsDistributed) { using value_type = typename TestFixture::value_type; using csr = typename TestFixture::local_matrix_type; - auto rank = this->dist_mat->get_communicator().rank(); + auto rank = this->comm.rank(); auto res_local = csr::create(this->exec); res_local->read(this->local_result[rank]); + I> local_restriction = {{1, 0, 0, 0}, {0, 1, 0, 0}, + {0, 0, 1, 0}, {0, 0, 0, 1}, + {0, 0, 0, 0}, {0, 0, 0, 0}}; + I> non_local_restriction = {{0, 0}, {0, 0}, {0, 0}, + {0, 0}, {1, 0}, {0, 1}}; + I> local_prolongation = {{1, 0, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0}, + {0, 0, 0, 1, 0, 0}}; + I> non_local_prolongation[] = { + {{0}, {0}, {0}, {1}}, + {{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {0, 0, 0, 1}}, + {{1}, {0}, {0}, {0}}}; this->dist_mat->read_distributed(this->dist_input[rank], this->row_part); GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_local_matrix()), res_local, 0); + GKO_ASSERT_MTX_NEAR( + gko::as(this->dist_mat->get_restriction()->get_local_matrix()), + local_restriction, 0); + GKO_ASSERT_MTX_NEAR( + gko::as(this->dist_mat->get_restriction()->get_non_local_matrix()), + non_local_restriction, 0); + GKO_ASSERT_MTX_NEAR( + gko::as(this->dist_mat->get_prolongation()->get_local_matrix()), + local_prolongation, 0); + GKO_ASSERT_MTX_NEAR( + gko::as( + this->dist_mat->get_prolongation()->get_non_local_matrix()), + non_local_prolongation[rank], 0); } -#endif - - -// template -// class Matrix : public CommonMpiTestFixture { -// public: -// using value_type = ValueType; -// using local_index_type = gko::int32; -// using global_index_type = gko::int64; -// using part_type = -// gko::experimental::distributed::Partition; -// using csr_mtx_type = gko::matrix::Csr; -// using dist_mtx_type = -// gko::experimental::distributed::Matrix; -// using dist_vec_type = gko::experimental::distributed::Vector; -// using local_matrix_type = gko::matrix::Csr; -// using dense_vec_type = gko::matrix::Dense; -// using matrix_data = gko::matrix_data; - -// Matrix() : size{5, 5}, engine() -// { -// row_part = part_type::build_from_contiguous( -// exec, gko::array( -// exec, I{0, 2, 4, 5})); -// col_part = part_type::build_from_mapping( -// exec, -// gko::array( -// exec, -// I{1, 1, 2, -// 0, -// 0}), -// 3); - -// dist_mat = dist_mtx_type::create(exec, comm); -// dist_mat_large = dist_mtx_type::create(exec, comm); -// x = dist_vec_type::create(ref, comm); -// y = dist_vec_type::create(ref, comm); - -// csr_mat = csr_mtx_type::create(exec); -// dense_x = dense_vec_type::create(exec); -// dense_y = dense_vec_type::create(exec); - -// gko::matrix_data mat_input{ -// size, -// // clang-format off -// {{0, 1, 1}, {0, 3, 2}, {1, 1, 3}, {1, 2, 4}, {2, 1, 5}, -// {2, 2, 6}, {3, 3, 8}, {3, 4, 7}, {4, 0, 9}, {4, 4, 10}} -// // clang-format on -// }; -// dist_mat->read_distributed(mat_input, this->row_part, -// this->col_part); csr_mat->read(mat_input); - -// alpha = gko::test::generate_random_matrix( -// 1, 1, std::uniform_int_distribution(1, 1), -// std::normal_distribution>(), -// this->engine, this->exec); -// beta = gko::test::generate_random_matrix( -// 1, 1, std::uniform_int_distribution(1, 1), -// std::normal_distribution>(), -// this->engine, this->exec); -// } - -// void SetUp() override { ASSERT_EQ(comm.size(), 3); } - -// void assert_local_vector_equal_to_global_vector(const dist_vec_type* -// dist, -// const dense_vec_type* -// dense, const part_type* -// part, int rank) -// { -// auto host_part = gko::clone(this->ref, part); -// auto range_bounds = host_part->get_range_bounds(); -// auto part_ids = host_part->get_part_ids(); -// std::vector gather_idxs; -// for (gko::size_type range_id = 0; -// range_id < host_part->get_num_ranges(); ++range_id) { -// if (part_ids[range_id] == rank) { -// for (global_index_type global_row = range_bounds[range_id]; -// global_row < range_bounds[range_id + 1]; ++global_row) { -// gather_idxs.push_back(global_row); -// } -// } -// } -// gko::array gather_idxs_view( -// this->exec, gather_idxs.begin(), gather_idxs.end()); -// auto gathered_local = dense->row_gather(&gather_idxs_view); - -// GKO_ASSERT_MTX_NEAR(dist->get_local_vector(), gathered_local, -// r::value); -// } - -// void init_large(gko::size_type num_rows, gko::size_type num_cols) -// { -// auto rank = comm.rank(); -// int num_parts = comm.size(); -// auto vec_md = gko::test::generate_random_matrix_data( -// num_rows, num_cols, -// std::uniform_int_distribution(static_cast(num_cols), -// static_cast(num_cols)), -// std::normal_distribution>(), -// engine); -// auto mat_md = gko::test::generate_random_matrix_data( -// num_rows, num_rows, -// std::uniform_int_distribution(0, -// static_cast(num_rows)), -// std::normal_distribution>(), -// engine); - -// auto row_mapping = gko::test::generate_random_array< -// gko::experimental::distributed::comm_index_type>( -// num_rows, std::uniform_int_distribution(0, num_parts - 1), -// engine, exec); -// auto col_mapping = gko::test::generate_random_array< -// gko::experimental::distributed::comm_index_type>( -// num_rows, std::uniform_int_distribution(0, num_parts - 1), -// engine, exec); -// row_part_large = -// part_type::build_from_mapping(exec, row_mapping, num_parts); -// col_part_large = -// part_type::build_from_mapping(exec, col_mapping, num_parts); - -// dist_mat_large->read_distributed(mat_md, row_part_large, -// col_part_large); -// csr_mat->read(mat_md); - -// x->read_distributed(vec_md, col_part_large); -// dense_x->read(vec_md); - -// y->read_distributed(vec_md, row_part_large); -// dense_y->read(vec_md); -// } - -// gko::dim<2> size; - -// std::shared_ptr row_part; -// std::shared_ptr col_part; -// std::shared_ptr row_part_large; -// std::shared_ptr col_part_large; - -// std::unique_ptr dist_mat; -// std::unique_ptr dist_mat_large; -// std::unique_ptr csr_mat; - -// std::unique_ptr x; -// std::unique_ptr y; -// std::unique_ptr dense_x; -// std::unique_ptr dense_y; - -// std::unique_ptr alpha; -// std::unique_ptr beta; - -// std::default_random_engine engine; -// }; - -// TYPED_TEST_SUITE(Matrix, gko::test::ValueTypes, TypenameNameGenerator); - - -// TYPED_TEST(Matrix, CanApplyToSingleVector) -// { -// using value_type = typename TestFixture::value_type; -// using index_type = typename TestFixture::global_index_type; -// auto vec_md = gko::matrix_data{ -// I>{{1}, {2}, {3}, {4}, {5}}}; -// I> result[3] = {{{10}, {18}}, {{28}, {67}}, {{59}}}; -// auto rank = this->comm.rank(); -// this->x->read_distributed(vec_md, this->col_part); -// this->y->read_distributed(vec_md, this->row_part); - -// this->dist_mat->apply(this->x, this->y); - -// GKO_ASSERT_MTX_NEAR(this->y->get_local_vector(), result[rank], 0); -// } - - -// TYPED_TEST(Matrix, CanApplyToMultipleVectors) -// { -// using value_type = typename TestFixture::value_type; -// using index_type = typename TestFixture::global_index_type; -// auto vec_md = gko::matrix_data{ -// I>{{1, 11}, {2, 22}, {3, 33}, {4, 44}, {5, 55}}}; -// I> result[3] = { -// {{10, 110}, {18, 198}}, {{28, 308}, {67, 737}}, {{59, 649}}}; -// auto rank = this->comm.rank(); -// this->x->read_distributed(vec_md, this->col_part); -// this->y->read_distributed(vec_md, this->row_part); - -// this->dist_mat->apply(this->x, this->y); - -// GKO_ASSERT_MTX_NEAR(this->y->get_local_vector(), result[rank], 0); -// } - - -// TYPED_TEST(Matrix, CanAdvancedApplyToSingleVector) -// { -// using value_type = typename TestFixture::value_type; -// using index_type = typename TestFixture::global_index_type; -// using dense_vec_type = typename TestFixture::dense_vec_type; -// auto vec_md = gko::matrix_data{ -// I>{{1}, {2}, {3}, {4}, {5}}}; -// I> result[3] = {{{17}, {30}}, {{47}, {122}}, {{103}}}; -// auto rank = this->comm.rank(); -// this->alpha = gko::initialize({2.0}, this->exec); -// this->beta = gko::initialize({-3.0}, this->exec); -// this->x->read_distributed(vec_md, this->col_part); -// this->y->read_distributed(vec_md, this->row_part); - -// this->dist_mat->apply(this->alpha, this->x, this->beta, this->y); - -// GKO_ASSERT_MTX_NEAR(this->y->get_local_vector(), result[rank], 0); -// } - - -// TYPED_TEST(Matrix, CanApplyToSingleVectorLarge) -// { -// this->init_large(100, 1); - -// this->dist_mat_large->apply(this->x, this->y); -// this->csr_mat->apply(this->dense_x, this->dense_y); - -// this->assert_local_vector_equal_to_global_vector( -// this->y.get(), this->dense_y.get(), this->row_part_large.get(), -// this->comm.rank()); -// } - - -// TYPED_TEST(Matrix, CanApplyToMultipleVectorsLarge) -// { -// this->init_large(100, 17); - -// this->dist_mat_large->apply(this->x, this->y); -// this->csr_mat->apply(this->dense_x, this->dense_y); - -// this->assert_local_vector_equal_to_global_vector( -// this->y.get(), this->dense_y.get(), this->row_part_large.get(), -// this->comm.rank()); -// } - - -// TYPED_TEST(Matrix, CanAdvancedApplyToMultipleVectorsLarge) -// { -// this->init_large(100, 17); - -// this->dist_mat_large->apply(this->alpha, this->x, this->beta, this->y); -// this->csr_mat->apply(this->alpha, this->dense_x, this->beta, -// this->dense_y); - -// this->assert_local_vector_equal_to_global_vector( -// this->y.get(), this->dense_y.get(), this->row_part_large.get(), -// this->comm.rank()); -// } - - -// TYPED_TEST(Matrix, CanColScale) -// { -// using value_type = typename TestFixture::value_type; -// using index_type = typename TestFixture::global_index_type; -// using csr = typename TestFixture::local_matrix_type; -// using dist_vec_type = typename TestFixture::dist_vec_type; -// auto vec_md = gko::matrix_data{ -// I>{{1}, {2}, {3}, {4}, {5}}}; -// I> res_col_scale_local[] = { -// {{8, 0}, {0, 0}}, {{0, 10}, {0, 0}}, {{0}}}; -// I> res_col_scale_non_local[] = { -// {{2, 0}, {6, 12}}, {{0, 0, 18}, {32, 35, 0}}, {{50, 9}}}; -// auto rank = this->comm.rank(); -// auto col_scaling_factors = dist_vec_type::create(this->exec, this->comm); -// col_scaling_factors->read_distributed(vec_md, this->col_part); - -// this->dist_mat->col_scale(col_scaling_factors); - -// GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_local_matrix()), -// res_col_scale_local[rank], 0); -// GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_non_local_matrix()), -// res_col_scale_non_local[rank], 0); -// } - - -// TYPED_TEST(Matrix, CanRowScale) -// { -// using value_type = typename TestFixture::value_type; -// using index_type = typename TestFixture::global_index_type; -// using csr = typename TestFixture::local_matrix_type; -// using dist_vec_type = typename TestFixture::dist_vec_type; -// auto vec_md = gko::matrix_data{ -// I>{{1}, {2}, {3}, {4}, {5}}}; -// I> res_row_scale_local[] = { -// {{2, 0}, {0, 0}}, {{0, 15}, {0, 0}}, {{0}}}; -// I> res_row_scale_non_local[] = { -// {{1, 0}, {6, 8}}, {{0, 0, 18}, {32, 28, 0}}, {{50, 45}}}; -// auto rank = this->comm.rank(); -// auto row_scaling_factors = dist_vec_type::create(this->exec, this->comm); -// row_scaling_factors->read_distributed(vec_md, this->row_part); - -// this->dist_mat->row_scale(row_scaling_factors); - -// GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_local_matrix()), -// res_row_scale_local[rank], 0); -// GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_non_local_matrix()), -// res_row_scale_non_local[rank], 0); -// } - - -// TYPED_TEST(Matrix, CanColScaleWithStride) -// { -// using value_type = typename TestFixture::value_type; -// using index_type = typename TestFixture::global_index_type; -// using csr = typename TestFixture::local_matrix_type; -// using dist_vec_type = typename TestFixture::dist_vec_type; -// auto vec_md = gko::matrix_data{ -// I>{{1}, {2}, {3}, {4}, {5}}}; -// I> res_col_scale_local[] = { -// {{8, 0}, {0, 0}}, {{0, 10}, {0, 0}}, {{0}}}; -// I> res_col_scale_non_local[] = { -// {{2, 0}, {6, 12}}, {{0, 0, 18}, {32, 35, 0}}, {{50, 9}}}; -// gko::dim<2> local_sizes[] = {{2, 1}, {2, 1}, {1, 1}}; -// auto rank = this->comm.rank(); -// auto col_scaling_factors = dist_vec_type::create( -// this->exec, this->comm, gko::dim<2>{5, 1}, local_sizes[rank], 2); -// col_scaling_factors->read_distributed(vec_md, this->col_part); - -// this->dist_mat->col_scale(col_scaling_factors); - -// ASSERT_EQ(col_scaling_factors->get_stride(), 2); -// GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_local_matrix()), -// res_col_scale_local[rank], 0); -// GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_non_local_matrix()), -// res_col_scale_non_local[rank], 0); -// } - - -// TYPED_TEST(Matrix, CanRowScaleWithStride) -// { -// using value_type = typename TestFixture::value_type; -// using index_type = typename TestFixture::global_index_type; -// using csr = typename TestFixture::local_matrix_type; -// using dist_vec_type = typename TestFixture::dist_vec_type; -// auto vec_md = gko::matrix_data{ -// I>{{1}, {2}, {3}, {4}, {5}}}; -// I> res_row_scale_local[] = { -// {{2, 0}, {0, 0}}, {{0, 15}, {0, 0}}, {{0}}}; -// I> res_row_scale_non_local[] = { -// {{1, 0}, {6, 8}}, {{0, 0, 18}, {32, 28, 0}}, {{50, 45}}}; -// gko::dim<2> local_sizes[] = {{2, 1}, {2, 1}, {1, 1}}; -// auto rank = this->comm.rank(); -// auto row_scaling_factors = dist_vec_type::create( -// this->exec, this->comm, gko::dim<2>{5, 1}, local_sizes[rank], 2); -// row_scaling_factors->read_distributed(vec_md, this->row_part); - -// this->dist_mat->row_scale(row_scaling_factors); - -// ASSERT_EQ(row_scaling_factors->get_stride(), 2); -// GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_local_matrix()), -// res_row_scale_local[rank], 0); -// GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_non_local_matrix()), -// res_row_scale_non_local[rank], 0); -// } - - -// TYPED_TEST(Matrix, ColScaleThrowsOnWrongDimension) -// { -// using value_type = typename TestFixture::value_type; -// using index_type = typename TestFixture::global_index_type; -// using dist_vec_type = typename TestFixture::dist_vec_type; -// using part_type = typename TestFixture::part_type; -// auto vec_md = gko::matrix_data{ -// I>{{1}, {2}, {3}, {4}}}; -// auto two_vec_md = gko::matrix_data{ -// I>{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}}}; -// auto rank = this->comm.rank(); -// auto col_part = part_type::build_from_mapping( -// this->exec, -// gko::array( -// this->exec, -// I{1, 2, 0, 0}), -// 3); -// auto col_scaling_factors = dist_vec_type::create(this->exec, this->comm); -// col_scaling_factors->read_distributed(vec_md, col_part); -// auto two_col_scaling_factors = -// dist_vec_type::create(this->exec, this->comm); -// two_col_scaling_factors->read_distributed(two_vec_md, this->col_part); - -// ASSERT_THROW(this->dist_mat->col_scale(col_scaling_factors), -// gko::DimensionMismatch); -// ASSERT_THROW(this->dist_mat->col_scale(two_col_scaling_factors), -// gko::ValueMismatch); -// } - - -// TYPED_TEST(Matrix, RowScaleThrowsOnWrongDimension) -// { -// using value_type = typename TestFixture::value_type; -// using index_type = typename TestFixture::global_index_type; -// using dist_vec_type = typename TestFixture::dist_vec_type; -// using part_type = typename TestFixture::part_type; -// auto vec_md = gko::matrix_data{ -// I>{{1}, {2}, {3}, {4}}}; -// auto two_vec_md = gko::matrix_data{ -// I>{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}}}; -// auto rank = this->comm.rank(); -// auto row_part = part_type::build_from_contiguous( -// this->exec, -// gko::array(this->exec, I{0, 2, 3, 4})); -// auto row_scaling_factors = dist_vec_type::create(this->exec, this->comm); -// row_scaling_factors->read_distributed(vec_md, row_part); -// auto two_row_scaling_factors = -// dist_vec_type::create(this->exec, this->comm); -// two_row_scaling_factors->read_distributed(two_vec_md, this->col_part); - -// ASSERT_THROW(this->dist_mat->row_scale(row_scaling_factors), -// gko::DimensionMismatch); -// ASSERT_THROW(this->dist_mat->row_scale(two_row_scaling_factors), -// gko::ValueMismatch); -// } - - -// TYPED_TEST(Matrix, CanConvertToNextPrecision) -// { -// using T = typename TestFixture::value_type; -// using csr = typename TestFixture::local_matrix_type; -// using local_index_type = typename TestFixture::local_index_type; -// using global_index_type = typename TestFixture::global_index_type; -// using OtherT = typename gko::next_precision; -// using OtherDist = typename gko::experimental::distributed::Matrix< -// OtherT, local_index_type, global_index_type>; -// auto tmp = OtherDist::create(this->ref, this->comm); -// auto res = TestFixture::dist_mtx_type::create(this->ref, this->comm); -// // If OtherT is more precise: 0, otherwise r -// auto residual = r::value < r::value -// ? gko::remove_complex{0} -// : gko::remove_complex{r::value}; - -// this->dist_mat->convert_to(tmp); -// tmp->convert_to(res); - -// GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_local_matrix()), -// gko::as(res->get_local_matrix()), residual); -// GKO_ASSERT_MTX_NEAR(gko::as(this->dist_mat->get_non_local_matrix()), -// gko::as(res->get_non_local_matrix()), residual); -// } - - -// TYPED_TEST(Matrix, CanMoveToNextPrecision) -// { -// using T = typename TestFixture::value_type; -// using csr = typename TestFixture::local_matrix_type; -// using local_index_type = typename TestFixture::local_index_type; -// using global_index_type = typename TestFixture::global_index_type; -// using OtherT = typename gko::next_precision; -// using OtherDist = typename gko::experimental::distributed::Matrix< -// OtherT, local_index_type, global_index_type>; -// auto tmp = OtherDist::create(this->ref, this->comm); -// auto res = TestFixture::dist_mtx_type::create(this->ref, this->comm); -// auto clone_dist_mat = gko::clone(this->dist_mat); -// // If OtherT is more precise: 0, otherwise r -// auto residual = r::value < r::value -// ? gko::remove_complex{0} -// : gko::remove_complex{r::value}; - -// this->dist_mat->move_to(tmp); -// tmp->convert_to(res); - -// GKO_ASSERT_MTX_NEAR(gko::as(clone_dist_mat->get_local_matrix()), -// gko::as(res->get_local_matrix()), residual); -// GKO_ASSERT_MTX_NEAR(gko::as(clone_dist_mat->get_non_local_matrix()), -// gko::as(res->get_non_local_matrix()), residual); -// } - - -// bool needs_transfers(std::shared_ptr exec) -// { -// return exec->get_master() != exec && -// !gko::experimental::mpi::is_gpu_aware(); -// } - - -// class HostToDeviceLogger : public gko::log::Logger { -// public: -// void on_copy_started(const gko::Executor* exec_from, -// const gko::Executor* exec_to, -// const gko::uintptr& loc_from, -// const gko::uintptr& loc_to, -// const gko::size_type& num_bytes) const override -// { -// if (exec_from != exec_to) { -// transfer_count_++; -// } -// } - -// int get_transfer_count() const { return transfer_count_; } - -// static std::unique_ptr create() -// { -// return std::unique_ptr(new HostToDeviceLogger()); -// } - -// protected: -// explicit HostToDeviceLogger() -// : gko::log::Logger(gko::log::Logger::copy_started_mask) -// {} - -// private: -// mutable int transfer_count_ = 0; -// }; - - -// class MatrixGpuAwareCheck : public CommonMpiTestFixture { -// public: -// using local_index_type = gko::int32; -// using global_index_type = gko::int64; -// using dist_mtx_type = -// gko::experimental::distributed::Matrix; -// using dist_vec_type = gko::experimental::distributed::Vector; -// using dense_vec_type = gko::matrix::Dense; - -// MatrixGpuAwareCheck() -// : logger(gko::share(HostToDeviceLogger::create())), engine(42) -// { -// exec->add_logger(logger); - -// mat = dist_mtx_type::create(exec, comm); -// x = dist_vec_type::create(exec, comm); -// y = dist_vec_type::create(exec, comm); - -// alpha = dense_vec_type::create(exec, gko::dim<2>{1, 1}); -// beta = dense_vec_type::create(exec, gko::dim<2>{1, 1}); -// } - - -// std::unique_ptr mat; - -// std::unique_ptr x; -// std::unique_ptr y; - -// std::unique_ptr alpha; -// std::unique_ptr beta; - -// std::shared_ptr logger; - -// std::default_random_engine engine; -// }; - - -// TEST_F(MatrixGpuAwareCheck, ApplyCopiesToHostOnlyIfNecessary) -// { -// auto transfer_count_before = logger->get_transfer_count(); +TYPED_TEST(DdMatrix, CanApplyToSingleVector) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::global_index_type; + auto vec_md = gko::matrix_data{I>{ + {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}}}; + I> result[3] = { + {{-4}, {-3}, {-2}, {-1}}, {{0}, {1}, {-1}, {0}}, {{1}, {2}, {3}, {4}}}; + auto rank = this->comm.rank(); + this->dist_mat->read_distributed(this->dist_input[rank], this->row_part); + this->x->read_distributed(vec_md, this->row_part); + this->y->read_distributed(vec_md, this->row_part); -// mat->apply(x, y); + this->dist_mat->apply(this->x, this->y); -// ASSERT_EQ(logger->get_transfer_count() > transfer_count_before, -// needs_transfers(exec)); -// } + GKO_ASSERT_MTX_NEAR(this->y->get_local_vector(), result[rank], 0); +} +TYPED_TEST(DdMatrix, CanAdvancedApplyToSingleVector) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::global_index_type; + using dense_vec_type = typename TestFixture::dense_vec_type; + auto vec_md = gko::matrix_data{I>{ + {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}}}; + I> result[3] = { + {{-3}, {-1}, {1}, {3}}, {{5}, {7}, {6}, {8}}, {{10}, {12}, {14}, {16}}}; + auto rank = this->comm.rank(); + this->dist_mat->read_distributed(this->dist_input[rank], this->row_part); + this->x->read_distributed(vec_md, this->row_part); + this->y->read_distributed(vec_md, this->row_part); + auto alpha = gko::initialize({1.0}, this->exec); + auto beta = gko::initialize({1.0}, this->exec); -// TEST_F(MatrixGpuAwareCheck, AdvancedApplyCopiesToHostOnlyIfNecessary) -// { -// auto transfer_count_before = logger->get_transfer_count(); + this->dist_mat->apply(alpha, this->x, beta, this->y); -// mat->apply(alpha, x, beta, y); + GKO_ASSERT_MTX_NEAR(this->y->get_local_vector(), result[rank], 0); +} -// ASSERT_EQ(logger->get_transfer_count() > transfer_count_before, -// needs_transfers(exec)); -// } +#endif \ No newline at end of file