Skip to content

Commit

Permalink
use and return index_map in matrix::read_distributed
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcelKoch committed Mar 25, 2024
1 parent ba54ec9 commit 0831c13
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 42 deletions.
78 changes: 47 additions & 31 deletions core/distributed/matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ namespace matrix {
namespace {


GKO_REGISTER_OPERATION(build_local_nonlocal,
distributed_matrix::build_local_nonlocal);
GKO_REGISTER_OPERATION(separate_local_nonlocal,
distributed_matrix::separate_local_nonlocal);


} // namespace
Expand Down Expand Up @@ -56,7 +56,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::Matrix(
recv_offsets_(comm.size() + 1),
recv_sizes_(comm.size()),
gather_idxs_{exec},
non_local_to_global_{exec},
one_scalar_{},
local_mtx_{local_matrix_template->clone(exec)},
non_local_mtx_{non_local_matrix_template->clone(exec)}
Expand Down Expand Up @@ -86,7 +85,6 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::convert_to(
result->recv_offsets_ = this->recv_offsets_;
result->recv_sizes_ = this->recv_sizes_;
result->send_sizes_ = this->send_sizes_;
result->non_local_to_global_ = this->non_local_to_global_;
result->set_size(this->get_size());
}

Expand All @@ -105,18 +103,18 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::move_to(
result->recv_offsets_ = std::move(this->recv_offsets_);
result->recv_sizes_ = std::move(this->recv_sizes_);
result->send_sizes_ = std::move(this->send_sizes_);
result->non_local_to_global_ = std::move(this->non_local_to_global_);
result->set_size(this->get_size());
this->set_size({});
}


template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
void Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
index_map<LocalIndexType, GlobalIndexType>
Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
const device_matrix_data<value_type, global_index_type>& data,
ptr_param<const Partition<local_index_type, global_index_type>>
std::shared_ptr<const Partition<local_index_type, global_index_type>>
row_partition,
ptr_param<const Partition<local_index_type, global_index_type>>
std::shared_ptr<const Partition<local_index_type, global_index_type>>
col_partition)
{
const auto comm = this->get_communicator();
Expand All @@ -139,30 +137,37 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
array<local_index_type> local_col_idxs{exec};
array<value_type> local_values{exec};
array<local_index_type> non_local_row_idxs{exec};
array<local_index_type> non_local_col_idxs{exec};
array<global_index_type> global_non_local_col_idxs{exec};
array<value_type> non_local_values{exec};
array<local_index_type> recv_gather_idxs{exec};
array<comm_index_type> recv_sizes_array{exec, num_parts};

// build local, non-local matrix data and communication structures
exec->run(matrix::make_build_local_nonlocal(
// separate input into local and non-local block
// The rows and columns of the local block are mapped into local indexing,
// as well as the rows of the non-local block. The columns of the non-local
// block are still in global indices.
exec->run(matrix::make_separate_local_nonlocal(
data, make_temporary_clone(exec, row_partition).get(),
make_temporary_clone(exec, col_partition).get(), local_part,
local_row_idxs, local_col_idxs, local_values, non_local_row_idxs,
non_local_col_idxs, non_local_values, recv_gather_idxs,
recv_sizes_array, non_local_to_global_));
global_non_local_col_idxs, non_local_values));

auto imap = index_map<local_index_type, global_index_type>(
exec, col_partition, comm.rank(), global_non_local_col_idxs);

auto non_local_col_idxs =
imap.get_local(global_non_local_col_idxs, index_space::non_local);

// read the local matrix data
const auto num_local_rows =
static_cast<size_type>(row_partition->get_part_size(local_part));
const auto num_local_cols =
static_cast<size_type>(col_partition->get_part_size(local_part));
const auto num_non_local_cols = non_local_to_global_.get_size();
device_matrix_data<value_type, local_index_type> local_data{
exec, dim<2>{num_local_rows, num_local_cols}, std::move(local_row_idxs),
std::move(local_col_idxs), std::move(local_values)};
device_matrix_data<value_type, local_index_type> non_local_data{
exec, dim<2>{num_local_rows, num_non_local_cols},
exec,
dim<2>{num_local_rows,
imap.get_remote_global_idxs().get_flat().get_size()},
std::move(non_local_row_idxs), std::move(non_local_col_idxs),
std::move(non_local_values)};
as<ReadableFromMatrixData<ValueType, LocalIndexType>>(this->local_mtx_)
Expand All @@ -171,8 +176,13 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
->read(std::move(non_local_data));

// exchange step 1: determine recv_sizes, send_sizes, send_offsets
exec->get_master()->copy_from(
exec, num_parts, recv_sizes_array.get_const_data(), recv_sizes_.data());
auto host_recv_targets =
make_temporary_clone(exec->get_master(), &imap.get_remote_target_ids());
std::fill(recv_sizes_.begin(), recv_sizes_.end(), 0);
for (size_type i = 0; i < host_recv_targets->get_size(); ++i) {
recv_sizes_[host_recv_targets->get_const_data()[i]] =
imap.get_remote_global_idxs()[i].get_size();
}
std::partial_sum(recv_sizes_.begin(), recv_sizes_.end(),
recv_offsets_.begin() + 1);
comm.all_to_all(exec, recv_sizes_.data(), 1, send_sizes_.data(), 1);
Expand All @@ -182,6 +192,7 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
recv_offsets_[0] = 0;

// exchange step 2: exchange gather_idxs from receivers to senders
auto recv_gather_idxs = imap.get_remote_local_idxs().get_flat();
auto use_host_buffer = mpi::requires_host_buffer(exec, comm);
if (use_host_buffer) {
recv_gather_idxs.set_executor(exec->get_master());
Expand All @@ -196,42 +207,49 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
if (use_host_buffer) {
gather_idxs_.set_executor(exec);
}

return imap;
}


template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
void Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
index_map<LocalIndexType, GlobalIndexType>
Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
const matrix_data<value_type, global_index_type>& data,
ptr_param<const Partition<local_index_type, global_index_type>>
std::shared_ptr<const Partition<local_index_type, global_index_type>>
row_partition,
ptr_param<const Partition<local_index_type, global_index_type>>
std::shared_ptr<const Partition<local_index_type, global_index_type>>
col_partition)
{
this->read_distributed(
return this->read_distributed(
device_matrix_data<value_type, global_index_type>::create_from_host(
this->get_executor(), data),
row_partition, col_partition);
}


template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
void Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
index_map<LocalIndexType, GlobalIndexType>
Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
const matrix_data<ValueType, global_index_type>& data,
ptr_param<const Partition<local_index_type, global_index_type>> partition)
std::shared_ptr<const Partition<local_index_type, global_index_type>>
partition)
{
this->read_distributed(
return this->read_distributed(
device_matrix_data<value_type, global_index_type>::create_from_host(
this->get_executor(), data),
partition, partition);
}


template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
void Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
index_map<LocalIndexType, GlobalIndexType>
Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(
const device_matrix_data<ValueType, GlobalIndexType>& data,
ptr_param<const Partition<local_index_type, global_index_type>> partition)
std::shared_ptr<const Partition<local_index_type, global_index_type>>
partition)
{
this->read_distributed(data, partition, partition);
return this->read_distributed(data, partition, partition);
}


Expand Down Expand Up @@ -382,7 +400,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(
recv_offsets_ = other.recv_offsets_;
send_sizes_ = other.send_sizes_;
recv_sizes_ = other.recv_sizes_;
non_local_to_global_ = other.non_local_to_global_;
one_scalar_.init(this->get_executor(), dim<2>{1, 1});
one_scalar_->fill(one<value_type>());
}
Expand All @@ -406,7 +423,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(Matrix&& other)
recv_offsets_ = std::move(other.recv_offsets_);
send_sizes_ = std::move(other.send_sizes_);
recv_sizes_ = std::move(other.recv_sizes_);
non_local_to_global_ = std::move(other.non_local_to_global_);
one_scalar_.init(this->get_executor(), dim<2>{1, 1});
one_scalar_->fill(one<value_type>());
}
Expand Down
26 changes: 15 additions & 11 deletions include/ginkgo/core/distributed/matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <ginkgo/core/base/dense_cache.hpp>
#include <ginkgo/core/base/mpi.hpp>
#include <ginkgo/core/distributed/base.hpp>
#include <ginkgo/core/distributed/index_map.hpp>
#include <ginkgo/core/distributed/lin_op.hpp>


Expand Down Expand Up @@ -281,10 +282,12 @@ class Matrix
*
* @param data The device_matrix_data structure.
* @param partition The global row and column partition.
*
* @return the index_map induced by the partitions and the matrix structure
*/
void read_distributed(
index_map<local_index_type, global_index_type> read_distributed(
const device_matrix_data<value_type, global_index_type>& data,
ptr_param<const Partition<local_index_type, global_index_type>>
std::shared_ptr<const Partition<local_index_type, global_index_type>>
partition);

/**
Expand All @@ -296,9 +299,9 @@ class Matrix
* @note For efficiency it is advised to use the device_matrix_data
* overload.
*/
void read_distributed(
index_map<local_index_type, global_index_type> read_distributed(
const matrix_data<value_type, global_index_type>& data,
ptr_param<const Partition<local_index_type, global_index_type>>
std::shared_ptr<const Partition<local_index_type, global_index_type>>
partition);

/**
Expand All @@ -315,12 +318,14 @@ class Matrix
* @param data The device_matrix_data structure.
* @param row_partition The global row partition.
* @param col_partition The global col partition.
*
* @return the index_map induced by the partitions and the matrix structure
*/
void read_distributed(
index_map<local_index_type, global_index_type> read_distributed(
const device_matrix_data<value_type, global_index_type>& data,
ptr_param<const Partition<local_index_type, global_index_type>>
std::shared_ptr<const Partition<local_index_type, global_index_type>>
row_partition,
ptr_param<const Partition<local_index_type, global_index_type>>
std::shared_ptr<const Partition<local_index_type, global_index_type>>
col_partition);

/**
Expand All @@ -332,11 +337,11 @@ class Matrix
* @note For efficiency it is advised to use the device_matrix_data
* overload.
*/
void read_distributed(
index_map<local_index_type, global_index_type> read_distributed(
const matrix_data<value_type, global_index_type>& data,
ptr_param<const Partition<local_index_type, global_index_type>>
std::shared_ptr<const Partition<local_index_type, global_index_type>>
row_partition,
ptr_param<const Partition<local_index_type, global_index_type>>
std::shared_ptr<const Partition<local_index_type, global_index_type>>
col_partition);

/**
Expand Down Expand Up @@ -529,7 +534,6 @@ class Matrix
std::vector<comm_index_type> recv_offsets_;
std::vector<comm_index_type> recv_sizes_;
array<local_index_type> gather_idxs_;
array<global_index_type> non_local_to_global_;
gko::detail::DenseCache<value_type> one_scalar_;
gko::detail::DenseCache<value_type> host_send_buffer_;
gko::detail::DenseCache<value_type> host_recv_buffer_;
Expand Down

0 comments on commit 0831c13

Please sign in to comment.