From b4efba50c844409451ee1b044d94451c5e4eda58 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Fri, 25 Oct 2024 18:22:25 +0200 Subject: [PATCH] factorization --- .../factorization/cholesky_kernels.cpp | 12 ++-- .../factorization/factorization_kernels.cpp | 10 ++-- common/cuda_hip/factorization/ic_kernels.cpp | 3 +- common/cuda_hip/factorization/ilu_kernels.cpp | 2 +- common/cuda_hip/factorization/lu_kernels.cpp | 6 +- .../cuda_hip/factorization/par_ic_kernels.cpp | 29 +++++---- .../factorization/par_ict_kernels.cpp | 26 +++++--- .../factorization/par_ilu_kernels.cpp | 32 ++++++---- .../par_ilut_approx_filter_kernels.cpp | 2 +- .../factorization/par_ilut_filter_kernels.cpp | 2 +- .../factorization/par_ilut_select_common.cpp | 16 +++-- .../factorization/par_ilut_select_kernels.cpp | 19 ++++-- .../factorization/par_ilut_select_kernels.hpp | 4 +- .../factorization/par_ilut_spgeam_kernels.cpp | 2 +- .../factorization/par_ilut_sweep_kernels.cpp | 36 ++++++----- core/device_hooks/common_kernels.inc.cpp | 60 +++++++++++-------- core/factorization/cholesky.cpp | 2 +- core/factorization/elimination_forest.cpp | 3 +- core/factorization/factorization.cpp | 3 +- core/factorization/ic.cpp | 2 +- core/factorization/ilu.cpp | 2 +- core/factorization/lu.cpp | 2 +- core/factorization/par_ic.cpp | 2 +- core/factorization/par_ict.cpp | 2 +- core/factorization/par_ilu.cpp | 2 +- core/factorization/par_ilut.cpp | 2 +- core/factorization/symbolic.cpp | 8 ++- .../test/factorization/elimination_forest.cpp | 2 +- core/test/factorization/par_ic.cpp | 3 +- core/test/factorization/par_ict.cpp | 3 +- core/test/factorization/par_ilu.cpp | 3 +- core/test/factorization/par_ilut.cpp | 2 +- .../factorization_kernels.dp.cpp | 12 ++-- dpcpp/factorization/par_ic_kernels.dp.cpp | 8 +-- dpcpp/factorization/par_ict_kernels.dp.cpp | 6 +- .../par_ilut_filter_kernels.hpp.inc | 4 +- .../par_ilut_select_kernels.hpp.inc | 8 +-- omp/factorization/cholesky_kernels.cpp | 12 ++-- omp/factorization/factorization_kernels.cpp | 10 ++-- omp/factorization/ic_kernels.cpp | 3 +- omp/factorization/ilu_kernels.cpp | 2 +- omp/factorization/lu_kernels.cpp | 6 +- omp/factorization/par_ic_kernels.cpp | 4 +- omp/factorization/par_ict_kernels.cpp | 4 +- omp/factorization/par_ilu_kernels.cpp | 2 +- omp/factorization/par_ilut_kernels.cpp | 10 ++-- reference/factorization/cholesky_kernels.cpp | 12 ++-- .../factorization/factorization_kernels.cpp | 10 ++-- reference/factorization/ic_kernels.cpp | 3 +- reference/factorization/ilu_kernels.cpp | 2 +- reference/factorization/lu_kernels.cpp | 6 +- reference/factorization/par_ic_kernels.cpp | 4 +- reference/factorization/par_ict_kernels.cpp | 4 +- reference/factorization/par_ilu_kernels.cpp | 2 +- reference/factorization/par_ilut_kernels.cpp | 17 ++++-- .../test/factorization/cholesky_kernels.cpp | 2 +- .../test/factorization/factorization.cpp | 2 +- reference/test/factorization/ic_kernels.cpp | 3 +- reference/test/factorization/ilu_kernels.cpp | 3 +- reference/test/factorization/lu_kernels.cpp | 11 ++-- .../test/factorization/par_ic_kernels.cpp | 3 +- .../test/factorization/par_ict_kernels.cpp | 3 +- .../test/factorization/par_ilu_kernels.cpp | 3 +- .../test/factorization/par_ilut_kernels.cpp | 27 ++++++--- test/factorization/lu_kernels.cpp | 2 +- test/factorization/par_ic_kernels.cpp | 8 ++- test/factorization/par_ict_kernels.cpp | 13 ++-- test/factorization/par_ilu_kernels.cpp | 9 ++- test/factorization/par_ilut_kernels.cpp | 53 +++++++++------- 69 files changed, 359 insertions(+), 238 deletions(-) diff --git a/common/cuda_hip/factorization/cholesky_kernels.cpp b/common/cuda_hip/factorization/cholesky_kernels.cpp index e5f2bf5e5e5..b02fbde8c01 100644 --- a/common/cuda_hip/factorization/cholesky_kernels.cpp +++ b/common/cuda_hip/factorization/cholesky_kernels.cpp @@ -253,7 +253,7 @@ void symbolic_factorize( postorder, postorder_parent, out_row_ptrs, out_cols); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_CHOLESKY_SYMBOLIC_FACTORIZE); @@ -312,7 +312,7 @@ void forest_from_factor( build_children_from_parents(exec, forest); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_CHOLESKY_FOREST_FROM_FACTOR); @@ -346,7 +346,8 @@ void initialize(std::shared_ptr exec, transpose_idxs); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_INITIALIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_CHOLESKY_INITIALIZE); template @@ -372,7 +373,8 @@ void factorize(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_FACTORIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_CHOLESKY_FACTORIZE); template @@ -428,7 +430,7 @@ void symbolic_count(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_CHOLESKY_SYMBOLIC_COUNT); diff --git a/common/cuda_hip/factorization/factorization_kernels.cpp b/common/cuda_hip/factorization/factorization_kernels.cpp index e3ec943b18c..08f366d911e 100644 --- a/common/cuda_hip/factorization/factorization_kernels.cpp +++ b/common/cuda_hip/factorization/factorization_kernels.cpp @@ -441,7 +441,7 @@ void add_diagonal_elements(std::shared_ptr exec, mtx_builder.get_col_idx_array() = std::move(new_col_idx_array); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_ADD_DIAGONAL_ELEMENTS_KERNEL); @@ -471,7 +471,7 @@ void initialize_row_ptrs_l_u( components::prefix_sum_nonnegative(exec, u_row_ptrs, num_rows + 1); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_U_KERNEL); @@ -497,7 +497,7 @@ void initialize_l_u(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_U_KERNEL); @@ -525,7 +525,7 @@ void initialize_row_ptrs_l( components::prefix_sum_nonnegative(exec, l_row_ptrs, num_rows + 1); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL); @@ -549,7 +549,7 @@ void initialize_l(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); diff --git a/common/cuda_hip/factorization/ic_kernels.cpp b/common/cuda_hip/factorization/ic_kernels.cpp index 62963c479bd..0e662ced915 100644 --- a/common/cuda_hip/factorization/ic_kernels.cpp +++ b/common/cuda_hip/factorization/ic_kernels.cpp @@ -54,7 +54,8 @@ void compute(std::shared_ptr exec, sparselib::destroy(desc); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_IC_COMPUTE_KERNEL); } // namespace ic_factorization diff --git a/common/cuda_hip/factorization/ilu_kernels.cpp b/common/cuda_hip/factorization/ilu_kernels.cpp index 0469b80fe86..1ae3e93a642 100644 --- a/common/cuda_hip/factorization/ilu_kernels.cpp +++ b/common/cuda_hip/factorization/ilu_kernels.cpp @@ -54,7 +54,7 @@ void compute_lu(std::shared_ptr exec, sparselib::destroy(desc); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); diff --git a/common/cuda_hip/factorization/lu_kernels.cpp b/common/cuda_hip/factorization/lu_kernels.cpp index aa432bf711c..cb91bbe5625 100644 --- a/common/cuda_hip/factorization/lu_kernels.cpp +++ b/common/cuda_hip/factorization/lu_kernels.cpp @@ -245,7 +245,8 @@ void initialize(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_INITIALIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_LU_INITIALIZE); template @@ -268,7 +269,8 @@ void factorize(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_LU_FACTORIZE); template diff --git a/common/cuda_hip/factorization/par_ic_kernels.cpp b/common/cuda_hip/factorization/par_ic_kernels.cpp index ee8b7c97f64..87e2fefd823 100644 --- a/common/cuda_hip/factorization/par_ic_kernels.cpp +++ b/common/cuda_hip/factorization/par_ic_kernels.cpp @@ -110,7 +110,7 @@ void init_factor(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); @@ -123,19 +123,28 @@ void compute_factor(std::shared_ptr exec, auto nnz = l->get_num_stored_elements(); auto num_blocks = ceildiv(nnz, default_block_size); if (num_blocks > 0) { - for (size_type i = 0; i < iterations; ++i) { - kernel::ic_sweep<<get_stream()>>>( - a_lower->get_const_row_idxs(), a_lower->get_const_col_idxs(), - as_device_type(a_lower->get_const_values()), - l->get_const_row_ptrs(), l->get_const_col_idxs(), - as_device_type(l->get_values()), - static_cast(l->get_num_stored_elements())); +#ifdef GKO_COMPILING_HIP + if constexpr (std::is_same, half>::value) { + // HIP does not support 16bit atomic operation + GKO_NOT_SUPPORTED(a_lower); + } else +#endif + { + for (size_type i = 0; i < iterations; ++i) { + kernel::ic_sweep<<get_stream()>>>( + a_lower->get_const_row_idxs(), + a_lower->get_const_col_idxs(), + as_device_type(a_lower->get_const_values()), + l->get_const_row_ptrs(), l->get_const_col_idxs(), + as_device_type(l->get_values()), + static_cast(l->get_num_stored_elements())); + } } } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); diff --git a/common/cuda_hip/factorization/par_ict_kernels.cpp b/common/cuda_hip/factorization/par_ict_kernels.cpp index 3446f124123..0acf0633a2c 100644 --- a/common/cuda_hip/factorization/par_ict_kernels.cpp +++ b/common/cuda_hip/factorization/par_ict_kernels.cpp @@ -390,13 +390,21 @@ void compute_factor(syn::value_list, auto block_size = default_block_size / subwarp_size; auto num_blocks = ceildiv(total_nnz, block_size); if (num_blocks > 0) { - kernel::ict_sweep - <<get_stream()>>>( - a->get_const_row_ptrs(), a->get_const_col_idxs(), - as_device_type(a->get_const_values()), l->get_const_row_ptrs(), - l_coo->get_const_row_idxs(), l->get_const_col_idxs(), - as_device_type(l->get_values()), - static_cast(l->get_num_stored_elements())); +#ifdef GKO_COMPILING_HIP + if constexpr (std::is_same, half>::value) { + // HIP does not support 16bit atomic operation + GKO_NOT_SUPPORTED(l); + } else +#endif + { + kernel::ict_sweep + <<get_stream()>>>( + a->get_const_row_ptrs(), a->get_const_col_idxs(), + as_device_type(a->get_const_values()), + l->get_const_row_ptrs(), l_coo->get_const_row_idxs(), + l->get_const_col_idxs(), as_device_type(l->get_values()), + static_cast(l->get_num_stored_elements())); + } } } @@ -427,7 +435,7 @@ void add_candidates(std::shared_ptr exec, syn::value_list(), syn::type_list<>(), exec, llh, a, l, l_new); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ICT_ADD_CANDIDATES_KERNEL); @@ -449,7 +457,7 @@ void compute_factor(std::shared_ptr exec, syn::value_list(), syn::type_list<>(), exec, a, l, l_coo); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ICT_COMPUTE_FACTOR_KERNEL); diff --git a/common/cuda_hip/factorization/par_ilu_kernels.cpp b/common/cuda_hip/factorization/par_ilu_kernels.cpp index 8bf71c471a8..a22bb85275a 100644 --- a/common/cuda_hip/factorization/par_ilu_kernels.cpp +++ b/common/cuda_hip/factorization/par_ilu_kernels.cpp @@ -94,21 +94,31 @@ void compute_l_u_factors(std::shared_ptr exec, const auto grid_dim = static_cast( ceildiv(num_elements, static_cast(block_size))); if (grid_dim > 0) { - for (size_type i = 0; i < iterations; ++i) { - kernel::compute_l_u_factors<<get_stream()>>>( - num_elements, system_matrix->get_const_row_idxs(), - system_matrix->get_const_col_idxs(), - as_device_type(system_matrix->get_const_values()), - l_factor->get_const_row_ptrs(), l_factor->get_const_col_idxs(), - as_device_type(l_factor->get_values()), - u_factor->get_const_row_ptrs(), u_factor->get_const_col_idxs(), - as_device_type(u_factor->get_values())); +#ifdef GKO_COMPILING_HIP + if constexpr (std::is_same, half>::value) { + // HIP does not support 16bit atomic operation + GKO_NOT_SUPPORTED(system_matrix); + } else +#endif + { + for (size_type i = 0; i < iterations; ++i) { + kernel::compute_l_u_factors<<get_stream()>>>( + num_elements, system_matrix->get_const_row_idxs(), + system_matrix->get_const_col_idxs(), + as_device_type(system_matrix->get_const_values()), + l_factor->get_const_row_ptrs(), + l_factor->get_const_col_idxs(), + as_device_type(l_factor->get_values()), + u_factor->get_const_row_ptrs(), + u_factor->get_const_col_idxs(), + as_device_type(u_factor->get_values())); + } } } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILU_COMPUTE_L_U_FACTORS_KERNEL); diff --git a/common/cuda_hip/factorization/par_ilut_approx_filter_kernels.cpp b/common/cuda_hip/factorization/par_ilut_approx_filter_kernels.cpp index 12d8da9e4f5..475d87b8bda 100644 --- a/common/cuda_hip/factorization/par_ilut_approx_filter_kernels.cpp +++ b/common/cuda_hip/factorization/par_ilut_approx_filter_kernels.cpp @@ -168,7 +168,7 @@ void threshold_filter_approx(std::shared_ptr exec, &threshold, m_out, m_out_coo); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_APPROX_KERNEL); diff --git a/common/cuda_hip/factorization/par_ilut_filter_kernels.cpp b/common/cuda_hip/factorization/par_ilut_filter_kernels.cpp index 25432fb44d2..d6ad2f477eb 100644 --- a/common/cuda_hip/factorization/par_ilut_filter_kernels.cpp +++ b/common/cuda_hip/factorization/par_ilut_filter_kernels.cpp @@ -123,7 +123,7 @@ void threshold_filter(std::shared_ptr exec, m_out_coo, lower); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_KERNEL); diff --git a/common/cuda_hip/factorization/par_ilut_select_common.cpp b/common/cuda_hip/factorization/par_ilut_select_common.cpp index fccb89fcf5a..3bb67d96e4f 100644 --- a/common/cuda_hip/factorization/par_ilut_select_common.cpp +++ b/common/cuda_hip/factorization/par_ilut_select_common.cpp @@ -43,9 +43,17 @@ void sampleselect_count(std::shared_ptr exec, auto num_threads_total = ceildiv(size, items_per_thread); auto num_blocks = static_cast(ceildiv(num_threads_total, default_block_size)); - // pick sample, build searchtree - kernel::build_searchtree<<<1, bucket_count, 0, exec->get_stream()>>>( - as_device_type(values), size, as_device_type(tree)); +#ifdef GKO_COMPILING_HIP + if constexpr (std::is_same, half>::value) { + // HIP does not support 16bit atomic operation + GKO_NOT_SUPPORTED(values); + } else +#endif + { + // pick sample, build searchtree + kernel::build_searchtree<<<1, bucket_count, 0, exec->get_stream()>>>( + as_device_type(values), size, as_device_type(tree)); + } // determine bucket sizes if (num_blocks > 0) { kernel::count_buckets<< exec, unsigned char* oracles, IndexType* partial_counts, \ IndexType* total_counts) -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_SSSS_COUNT); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(DECLARE_SSSS_COUNT); template diff --git a/common/cuda_hip/factorization/par_ilut_select_kernels.cpp b/common/cuda_hip/factorization/par_ilut_select_kernels.cpp index e03ee379977..a15adf580e8 100644 --- a/common/cuda_hip/factorization/par_ilut_select_kernels.cpp +++ b/common/cuda_hip/factorization/par_ilut_select_kernels.cpp @@ -141,13 +141,22 @@ void threshold_select(std::shared_ptr exec, // base case auto out_ptr = reinterpret_cast(tmp1.get_data()); - kernel::basecase_select<<<1, kernel::basecase_block_size, 0, - exec->get_stream()>>>( - as_device_type(tmp22), bucket.size, rank, as_device_type(out_ptr)); - threshold = exec->copy_val_to_host(out_ptr); + +#ifdef GKO_COMPILING_HIP + if constexpr (std::is_same, half>::value) { + // HIP does not support 16bit atomic operation + GKO_NOT_SUPPORTED(m); + } else +#endif + { + kernel::basecase_select<<<1, kernel::basecase_block_size, 0, + exec->get_stream()>>>( + as_device_type(tmp22), bucket.size, rank, as_device_type(out_ptr)); + threshold = exec->copy_val_to_host(out_ptr); + } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_THRESHOLD_SELECT_KERNEL); diff --git a/common/cuda_hip/factorization/par_ilut_select_kernels.hpp b/common/cuda_hip/factorization/par_ilut_select_kernels.hpp index 79a562ff834..b88c052d19a 100644 --- a/common/cuda_hip/factorization/par_ilut_select_kernels.hpp +++ b/common/cuda_hip/factorization/par_ilut_select_kernels.hpp @@ -254,12 +254,12 @@ __global__ __launch_bounds__(basecase_block_size) void basecase_select( const ValueType* __restrict__ input, IndexType size, IndexType rank, ValueType* __restrict__ out) { - constexpr auto sentinel = device_numeric_limits::inf(); + const auto sentinel = device_numeric_limits::inf(); ValueType local[basecase_local_size]; __shared__ ValueType sh_local[basecase_size]; for (int i = 0; i < basecase_local_size; ++i) { auto idx = threadIdx.x + i * basecase_block_size; - local[i] = idx < size ? input[idx] : sentinel; + local[i] = idx < size ? input[idx] : static_cast(sentinel); } bitonic_sort(local, sh_local); if (threadIdx.x == rank / basecase_local_size) { diff --git a/common/cuda_hip/factorization/par_ilut_spgeam_kernels.cpp b/common/cuda_hip/factorization/par_ilut_spgeam_kernels.cpp index a29cf6f2cb3..8f7a8af0443 100644 --- a/common/cuda_hip/factorization/par_ilut_spgeam_kernels.cpp +++ b/common/cuda_hip/factorization/par_ilut_spgeam_kernels.cpp @@ -389,7 +389,7 @@ void add_candidates(std::shared_ptr exec, u_new); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_ADD_CANDIDATES_KERNEL); diff --git a/common/cuda_hip/factorization/par_ilut_sweep_kernels.cpp b/common/cuda_hip/factorization/par_ilut_sweep_kernels.cpp index 52f62b50e6a..c0f962a89c8 100644 --- a/common/cuda_hip/factorization/par_ilut_sweep_kernels.cpp +++ b/common/cuda_hip/factorization/par_ilut_sweep_kernels.cpp @@ -154,18 +154,26 @@ void compute_l_u_factors(syn::value_list, auto block_size = default_block_size / subwarp_size; auto num_blocks = ceildiv(total_nnz, block_size); if (num_blocks > 0) { - kernel::sweep - <<get_stream()>>>( - a->get_const_row_ptrs(), a->get_const_col_idxs(), - as_device_type(a->get_const_values()), l->get_const_row_ptrs(), - l_coo->get_const_row_idxs(), l->get_const_col_idxs(), - as_device_type(l->get_values()), - static_cast(l->get_num_stored_elements()), - u_coo->get_const_row_idxs(), u_coo->get_const_col_idxs(), - as_device_type(u->get_values()), u_csc->get_const_row_ptrs(), - u_csc->get_const_col_idxs(), - as_device_type(u_csc->get_values()), - static_cast(u->get_num_stored_elements())); +#ifdef GKO_COMPILING_HIP + if constexpr (std::is_same, half>::value) { + // HIP does not support 16bit atomic operation + GKO_NOT_SUPPORTED(a); + } else +#endif + { + kernel::sweep + <<get_stream()>>>( + a->get_const_row_ptrs(), a->get_const_col_idxs(), + as_device_type(a->get_const_values()), + l->get_const_row_ptrs(), l_coo->get_const_row_idxs(), + l->get_const_col_idxs(), as_device_type(l->get_values()), + static_cast(l->get_num_stored_elements()), + u_coo->get_const_row_idxs(), u_coo->get_const_col_idxs(), + as_device_type(u->get_values()), + u_csc->get_const_row_ptrs(), u_csc->get_const_col_idxs(), + as_device_type(u_csc->get_values()), + static_cast(u->get_num_stored_elements())); + } } } @@ -199,11 +207,11 @@ void compute_l_u_factors(std::shared_ptr exec, u_csc); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_COMPUTE_LU_FACTORS_KERNEL); } // namespace par_ilut_factorization } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels -} // namespace gko \ No newline at end of file +} // namespace gko diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index d58ca4d7198..2c9ef463316 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -959,11 +959,13 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ISAI_SCATTER_EXCESS_SOLUTION_KERNEL); namespace cholesky { -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_SYMBOLIC_COUNT); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_SYMBOLIC_FACTORIZE); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_FOREST_FROM_FACTOR); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_INITIALIZE); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_FACTORIZE); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_CHOLESKY_SYMBOLIC_COUNT); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_CHOLESKY_SYMBOLIC_FACTORIZE); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_CHOLESKY_FOREST_FROM_FACTOR); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_CHOLESKY_INITIALIZE); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_CHOLESKY_FACTORIZE); } // namespace cholesky @@ -972,14 +974,16 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_FACTORIZE); namespace factorization { -GKO_STUB_VALUE_AND_INDEX_TYPE( +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_ADD_DIAGONAL_ELEMENTS_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE( +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_U_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FACTORIZATION_INITIALIZE_L_U_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE( +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_FACTORIZATION_INITIALIZE_L_U_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); } // namespace factorization @@ -988,7 +992,7 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); namespace ic_factorization { -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_IC_COMPUTE_KERNEL); } // namespace ic_factorization @@ -997,7 +1001,7 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); namespace ilu_factorization { -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); } // namespace ilu_factorization @@ -1006,8 +1010,8 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); namespace lu_factorization { -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_INITIALIZE); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_LU_INITIALIZE); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_LU_FACTORIZE); GKO_STUB_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE); GKO_STUB_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE_FINALIZE); @@ -1018,8 +1022,9 @@ GKO_STUB_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE_FINALIZE); namespace par_ic_factorization { -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); } // namespace par_ic_factorization @@ -1028,8 +1033,10 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); namespace par_ict_factorization { -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ICT_ADD_CANDIDATES_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ICT_COMPUTE_FACTOR_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_PAR_ICT_ADD_CANDIDATES_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_PAR_ICT_COMPUTE_FACTOR_KERNEL); } // namespace par_ict_factorization @@ -1038,7 +1045,8 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ICT_COMPUTE_FACTOR_KERNEL); namespace par_ilu_factorization { -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ILU_COMPUTE_L_U_FACTORS_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_PAR_ILU_COMPUTE_L_U_FACTORS_KERNEL); } // namespace par_ilu_factorization @@ -1047,11 +1055,15 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ILU_COMPUTE_L_U_FACTORS_KERNEL); namespace par_ilut_factorization { -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ILUT_ADD_CANDIDATES_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ILUT_COMPUTE_LU_FACTORS_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ILUT_THRESHOLD_SELECT_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE( +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_PAR_ILUT_ADD_CANDIDATES_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_PAR_ILUT_COMPUTE_LU_FACTORS_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_PAR_ILUT_THRESHOLD_SELECT_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_APPROX_KERNEL); diff --git a/core/factorization/cholesky.cpp b/core/factorization/cholesky.cpp index 81627ad229b..8a6fae3f715 100644 --- a/core/factorization/cholesky.cpp +++ b/core/factorization/cholesky.cpp @@ -146,7 +146,7 @@ std::unique_ptr Cholesky::generate_impl( #define GKO_DECLARE_CHOLESKY(ValueType, IndexType) \ class Cholesky -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_CHOLESKY); } // namespace factorization diff --git a/core/factorization/elimination_forest.cpp b/core/factorization/elimination_forest.cpp index 1dc8ff060a0..f8d6d861c2d 100644 --- a/core/factorization/elimination_forest.cpp +++ b/core/factorization/elimination_forest.cpp @@ -173,7 +173,8 @@ void compute_elim_forest(const matrix::Csr* mtx, const matrix::Csr* mtx, \ std::unique_ptr>& forest) -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COMPUTE_ELIM_FOREST); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_COMPUTE_ELIM_FOREST); } // namespace factorization diff --git a/core/factorization/factorization.cpp b/core/factorization/factorization.cpp index 1df1f49aa13..e0e4ccdc3c7 100644 --- a/core/factorization/factorization.cpp +++ b/core/factorization/factorization.cpp @@ -362,7 +362,8 @@ void Factorization::apply_impl(const LinOp* alpha, #define GKO_DECLARE_FACTORIZATION(ValueType, IndexType) \ class Factorization -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FACTORIZATION); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_FACTORIZATION); } // namespace factorization diff --git a/core/factorization/ic.cpp b/core/factorization/ic.cpp index 2257e6256e4..01b1b61439d 100644 --- a/core/factorization/ic.cpp +++ b/core/factorization/ic.cpp @@ -114,7 +114,7 @@ std::unique_ptr> Ic::generate( #define GKO_DECLARE_IC(ValueType, IndexType) class Ic -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_IC); } // namespace factorization diff --git a/core/factorization/ilu.cpp b/core/factorization/ilu.cpp index 41df4065979..2634286cf83 100644 --- a/core/factorization/ilu.cpp +++ b/core/factorization/ilu.cpp @@ -116,7 +116,7 @@ std::unique_ptr> Ilu::generate_l_u( #define GKO_DECLARE_ILU(ValueType, IndexType) class Ilu -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ILU); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_ILU); } // namespace factorization diff --git a/core/factorization/lu.cpp b/core/factorization/lu.cpp index fb9cab4154a..8ab3aceadce 100644 --- a/core/factorization/lu.cpp +++ b/core/factorization/lu.cpp @@ -170,7 +170,7 @@ std::unique_ptr Lu::generate_impl( #define GKO_DECLARE_LU(ValueType, IndexType) class Lu -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_LU); } // namespace factorization diff --git a/core/factorization/par_ic.cpp b/core/factorization/par_ic.cpp index f4a4afd23d6..b310025eb8d 100644 --- a/core/factorization/par_ic.cpp +++ b/core/factorization/par_ic.cpp @@ -146,7 +146,7 @@ std::unique_ptr> ParIc::generate( #define GKO_DECLARE_PAR_IC(ValueType, IndexType) \ class ParIc -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_IC); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_PAR_IC); } // namespace factorization diff --git a/core/factorization/par_ict.cpp b/core/factorization/par_ict.cpp index a0e8a628ca8..696b185e969 100644 --- a/core/factorization/par_ict.cpp +++ b/core/factorization/par_ict.cpp @@ -300,7 +300,7 @@ void ParIctState::iterate() #define GKO_DECLARE_PAR_ICT(ValueType, IndexType) \ class ParIct -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ICT); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_PAR_ICT); } // namespace factorization diff --git a/core/factorization/par_ilu.cpp b/core/factorization/par_ilu.cpp index 68c0c0c4fc6..177c150df1d 100644 --- a/core/factorization/par_ilu.cpp +++ b/core/factorization/par_ilu.cpp @@ -161,7 +161,7 @@ ParIlu::generate_l_u( #define GKO_DECLARE_PAR_ILU(ValueType, IndexType) \ class ParIlu -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ILU); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_PAR_ILU); } // namespace factorization diff --git a/core/factorization/par_ilut.cpp b/core/factorization/par_ilut.cpp index 42e3cc03130..e90dbb8140f 100644 --- a/core/factorization/par_ilut.cpp +++ b/core/factorization/par_ilut.cpp @@ -352,7 +352,7 @@ void ParIlutState::iterate() #define GKO_DECLARE_PAR_ILUT(ValueType, IndexType) \ class ParIlut -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ILUT); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(GKO_DECLARE_PAR_ILUT); } // namespace factorization diff --git a/core/factorization/symbolic.cpp b/core/factorization/symbolic.cpp index 23f6b94cc14..495b830d7ea 100644 --- a/core/factorization/symbolic.cpp +++ b/core/factorization/symbolic.cpp @@ -80,7 +80,8 @@ void symbolic_cholesky( std::unique_ptr>& factors, \ std::unique_ptr>& forest) -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SYMBOLIC_CHOLESKY); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_SYMBOLIC_CHOLESKY); template @@ -158,7 +159,7 @@ void symbolic_lu_near_symm( const matrix::Csr* mtx, \ std::unique_ptr>& factors) -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_SYMBOLIC_LU_NEAR_SYMM); @@ -245,7 +246,8 @@ void symbolic_lu(const matrix::Csr* mtx, const matrix::Csr* mtx, \ std::unique_ptr>& factors) -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SYMBOLIC_LU); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_SYMBOLIC_LU); } // namespace factorization diff --git a/core/test/factorization/elimination_forest.cpp b/core/test/factorization/elimination_forest.cpp index 292b366f50e..cf9ddb7536e 100644 --- a/core/test/factorization/elimination_forest.cpp +++ b/core/test/factorization/elimination_forest.cpp @@ -33,7 +33,7 @@ class EliminationForest : public ::testing::Test { std::shared_ptr ref; }; -TYPED_TEST_SUITE(EliminationForest, gko::test::ValueIndexTypes, +TYPED_TEST_SUITE(EliminationForest, gko::test::ValueIndexTypesWithHalf, PairTypenameNameGenerator); diff --git a/core/test/factorization/par_ic.cpp b/core/test/factorization/par_ic.cpp index d6de0f9fc98..efd4c1e3ebd 100644 --- a/core/test/factorization/par_ic.cpp +++ b/core/test/factorization/par_ic.cpp @@ -29,7 +29,8 @@ class ParIc : public ::testing::Test { std::shared_ptr ref; }; -TYPED_TEST_SUITE(ParIc, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(ParIc, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(ParIc, SetIterations) diff --git a/core/test/factorization/par_ict.cpp b/core/test/factorization/par_ict.cpp index 07eec8db549..5d5ac8bc815 100644 --- a/core/test/factorization/par_ict.cpp +++ b/core/test/factorization/par_ict.cpp @@ -29,7 +29,8 @@ class ParIct : public ::testing::Test { std::shared_ptr ref; }; -TYPED_TEST_SUITE(ParIct, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(ParIct, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(ParIct, SetIterations) diff --git a/core/test/factorization/par_ilu.cpp b/core/test/factorization/par_ilu.cpp index a0b8f37e3d4..e06a90741af 100644 --- a/core/test/factorization/par_ilu.cpp +++ b/core/test/factorization/par_ilu.cpp @@ -29,7 +29,8 @@ class ParIlu : public ::testing::Test { std::shared_ptr ref; }; -TYPED_TEST_SUITE(ParIlu, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(ParIlu, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(ParIlu, SetIterations) diff --git a/core/test/factorization/par_ilut.cpp b/core/test/factorization/par_ilut.cpp index ad466e62407..a2d0287d22a 100644 --- a/core/test/factorization/par_ilut.cpp +++ b/core/test/factorization/par_ilut.cpp @@ -30,7 +30,7 @@ class ParIlut : public ::testing::Test { std::shared_ptr ref; }; -TYPED_TEST_SUITE(ParIlut, gko::test::ValueIndexTypes, +TYPED_TEST_SUITE(ParIlut, gko::test::ValueIndexTypesWithHalf, PairTypenameNameGenerator); diff --git a/dpcpp/factorization/factorization_kernels.dp.cpp b/dpcpp/factorization/factorization_kernels.dp.cpp index 1d9912b4f12..afebacb693f 100644 --- a/dpcpp/factorization/factorization_kernels.dp.cpp +++ b/dpcpp/factorization/factorization_kernels.dp.cpp @@ -450,7 +450,7 @@ void initialize_l(size_type num_rows, const IndexType* __restrict__ row_ptrs, l_col_idxs[l_diag_idx] = row; // compute square root with sentinel if (use_sqrt) { - diag_val = std::sqrt(diag_val); + diag_val = gko::sqrt(diag_val); if (!is_finite(diag_val)) { diag_val = one(); } @@ -550,7 +550,7 @@ void add_diagonal_elements(std::shared_ptr exec, mtx_builder.get_col_idx_array() = std::move(new_col_idxs); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_ADD_DIAGONAL_ELEMENTS_KERNEL); @@ -577,7 +577,7 @@ void initialize_row_ptrs_l_u( components::prefix_sum_nonnegative(exec, u_row_ptrs, num_rows + 1); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_U_KERNEL); @@ -602,7 +602,7 @@ void initialize_l_u(std::shared_ptr exec, csr_u->get_col_idxs(), csr_u->get_values()); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_U_KERNEL); @@ -627,7 +627,7 @@ void initialize_row_ptrs_l( components::prefix_sum_nonnegative(exec, l_row_ptrs, num_rows + 1); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL); @@ -650,7 +650,7 @@ void initialize_l(std::shared_ptr exec, csr_l->get_values(), diag_sqrt); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); diff --git a/dpcpp/factorization/par_ic_kernels.dp.cpp b/dpcpp/factorization/par_ic_kernels.dp.cpp index 5428460fac5..91819dd98d0 100644 --- a/dpcpp/factorization/par_ic_kernels.dp.cpp +++ b/dpcpp/factorization/par_ic_kernels.dp.cpp @@ -41,7 +41,7 @@ void ic_init(const IndexType* __restrict__ l_row_ptrs, return; } auto l_nz = l_row_ptrs[row + 1] - 1; - auto diag = std::sqrt(l_vals[l_nz]); + auto diag = gko::sqrt(l_vals[l_nz]); if (is_finite(diag)) { l_vals[l_nz] = diag; } else { @@ -93,7 +93,7 @@ void ic_sweep(const IndexType* __restrict__ a_row_idxs, lh_col_begin += l_col >= lh_row; } auto to_write = row == col - ? std::sqrt(a_val - sum) + ? gko::sqrt(a_val - sum) : (a_val - sum) / l_vals[l_row_ptrs[col + 1] - 1]; if (is_finite(to_write)) { l_vals[l_nz] = to_write; @@ -130,7 +130,7 @@ void init_factor(std::shared_ptr exec, l_row_ptrs, l_vals, num_rows); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); @@ -152,7 +152,7 @@ void compute_factor(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); diff --git a/dpcpp/factorization/par_ict_kernels.dp.cpp b/dpcpp/factorization/par_ict_kernels.dp.cpp index fb99b662dec..6a704641252 100644 --- a/dpcpp/factorization/par_ict_kernels.dp.cpp +++ b/dpcpp/factorization/par_ict_kernels.dp.cpp @@ -356,7 +356,7 @@ void ict_sweep(const IndexType* __restrict__ a_row_ptrs, if (subwarp.thread_rank() == 0) { auto to_write = row == col - ? std::sqrt(a_val - sum) + ? gko::sqrt(a_val - sum) : (a_val - sum) / l_vals[l_row_ptrs[col + 1] - 1]; if (is_finite(to_write)) { l_vals[l_nz] = to_write; @@ -483,7 +483,7 @@ void add_candidates(std::shared_ptr exec, syn::value_list(), syn::type_list<>(), exec, llh, a, l, l_new); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ICT_ADD_CANDIDATES_KERNEL); @@ -505,7 +505,7 @@ void compute_factor(std::shared_ptr exec, syn::value_list(), syn::type_list<>(), exec, a, l, l_coo); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ICT_COMPUTE_FACTOR_KERNEL); diff --git a/dpcpp/factorization/par_ilut_filter_kernels.hpp.inc b/dpcpp/factorization/par_ilut_filter_kernels.hpp.inc index d2345848d1f..6081bc0f417 100644 --- a/dpcpp/factorization/par_ilut_filter_kernels.hpp.inc +++ b/dpcpp/factorization/par_ilut_filter_kernels.hpp.inc @@ -102,7 +102,7 @@ void threshold_filter_nnz(const IndexType* __restrict__ row_ptrs, row_ptrs, num_rows, [&](IndexType idx, IndexType row_begin, IndexType row_end) { auto diag_idx = lower ? row_end - 1 : row_begin; - return std::abs(vals[idx]) >= threshold || idx == diag_idx; + return gko::abs(vals[idx]) >= threshold || idx == diag_idx; }, nnz, item_ct1); } @@ -140,7 +140,7 @@ void threshold_filter(const IndexType* __restrict__ old_row_ptrs, old_row_ptrs, old_col_idxs, old_vals, num_rows, [&](IndexType idx, IndexType row_begin, IndexType row_end) { auto diag_idx = lower ? row_end - 1 : row_begin; - return std::abs(old_vals[idx]) >= threshold || idx == diag_idx; + return gko::abs(old_vals[idx]) >= threshold || idx == diag_idx; }, new_row_ptrs, new_row_idxs, new_col_idxs, new_vals, item_ct1); } diff --git a/dpcpp/factorization/par_ilut_select_kernels.hpp.inc b/dpcpp/factorization/par_ilut_select_kernels.hpp.inc index 67cc9cdba15..430bf650e07 100644 --- a/dpcpp/factorization/par_ilut_select_kernels.hpp.inc +++ b/dpcpp/factorization/par_ilut_select_kernels.hpp.inc @@ -38,7 +38,7 @@ void build_searchtree(const ValueType* __restrict__ input, IndexType size, for (int i = 0; i < sampleselect_oversampling; ++i) { auto lidx = idx * sampleselect_oversampling + i; auto val = input[static_cast(lidx * stride)]; - samples[i] = std::abs(val); + samples[i] = gko::abs(val); } bitonic_sort(samples, sh_samples, @@ -113,7 +113,7 @@ void count_buckets(const ValueType* __restrict__ input, IndexType size, auto end = min(block_end, size); for (IndexType i = begin; i < end; i += default_block_size) { // traverse the search tree with the input element - auto el = std::abs(input[i]); + auto el = gko::abs(input[i]); IndexType tree_idx{}; #pragma unroll for (int level = 0; level < sampleselect_searchtree_height; ++level) { @@ -297,7 +297,7 @@ void filter_bucket(const ValueType* __restrict__ input, IndexType size, auto found = bucket == oracles[i]; auto ofs = atomic_add(&*counter, IndexType{found}); if (found) { - output[ofs] = std::abs(input[i]); + output[ofs] = gko::abs(input[i]); } } } @@ -337,7 +337,7 @@ void basecase_select(const ValueType* __restrict__ input, IndexType size, for (int i = 0; i < basecase_local_size; ++i) { auto idx = item_ct1.get_local_id(2) + i * basecase_block_size; - local[i] = idx < size ? input[idx] : sentinel; + local[i] = idx < size ? input[idx] : static_cast(sentinel); } bitonic_sort(local, sh_local, item_ct1); if (item_ct1.get_local_id(2) == rank / basecase_local_size) { diff --git a/omp/factorization/cholesky_kernels.cpp b/omp/factorization/cholesky_kernels.cpp index 8ce5392ebde..c1dbfa2a4b5 100644 --- a/omp/factorization/cholesky_kernels.cpp +++ b/omp/factorization/cholesky_kernels.cpp @@ -78,7 +78,7 @@ void symbolic_count(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_CHOLESKY_SYMBOLIC_COUNT); @@ -126,7 +126,7 @@ void symbolic_factorize( } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_CHOLESKY_SYMBOLIC_FACTORIZE); @@ -169,7 +169,7 @@ void forest_from_factor( num_rows, num_rows + 1, child_ptrs); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_CHOLESKY_FOREST_FROM_FACTOR); @@ -201,7 +201,8 @@ void initialize(std::shared_ptr exec, }); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_INITIALIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_CHOLESKY_INITIALIZE); template @@ -248,7 +249,8 @@ void factorize(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_FACTORIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_CHOLESKY_FACTORIZE); } // namespace cholesky diff --git a/omp/factorization/factorization_kernels.cpp b/omp/factorization/factorization_kernels.cpp index f4b41cbdac5..7ab46e3522f 100644 --- a/omp/factorization/factorization_kernels.cpp +++ b/omp/factorization/factorization_kernels.cpp @@ -179,7 +179,7 @@ void add_diagonal_elements(std::shared_ptr exec, mtx_builder.get_col_idx_array() = std::move(new_col_idxs); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_ADD_DIAGONAL_ELEMENTS_KERNEL); @@ -214,7 +214,7 @@ void initialize_row_ptrs_l_u( components::prefix_sum_nonnegative(exec, u_row_ptrs, num_rows + 1); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_U_KERNEL); @@ -269,7 +269,7 @@ void initialize_l_u(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_U_KERNEL); @@ -300,7 +300,7 @@ void initialize_row_ptrs_l( components::prefix_sum_nonnegative(exec, l_row_ptrs, num_rows + 1); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL); @@ -348,7 +348,7 @@ void initialize_l(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); diff --git a/omp/factorization/ic_kernels.cpp b/omp/factorization/ic_kernels.cpp index f9b78abc835..cf12eb91d4a 100644 --- a/omp/factorization/ic_kernels.cpp +++ b/omp/factorization/ic_kernels.cpp @@ -20,7 +20,8 @@ template void compute(std::shared_ptr exec, matrix::Csr* m) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_IC_COMPUTE_KERNEL); } // namespace ic_factorization diff --git a/omp/factorization/ilu_kernels.cpp b/omp/factorization/ilu_kernels.cpp index 70982c80753..f9e68b3bd66 100644 --- a/omp/factorization/ilu_kernels.cpp +++ b/omp/factorization/ilu_kernels.cpp @@ -20,7 +20,7 @@ template void compute_lu(std::shared_ptr exec, matrix::Csr* m) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); diff --git a/omp/factorization/lu_kernels.cpp b/omp/factorization/lu_kernels.cpp index 53847ff2b6c..697e3bc26ab 100644 --- a/omp/factorization/lu_kernels.cpp +++ b/omp/factorization/lu_kernels.cpp @@ -59,7 +59,8 @@ void initialize(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_INITIALIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_LU_INITIALIZE); template @@ -96,7 +97,8 @@ void factorize(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_LU_FACTORIZE); template diff --git a/omp/factorization/par_ic_kernels.cpp b/omp/factorization/par_ic_kernels.cpp index 93093783acc..9488c448519 100644 --- a/omp/factorization/par_ic_kernels.cpp +++ b/omp/factorization/par_ic_kernels.cpp @@ -42,7 +42,7 @@ void init_factor(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); @@ -96,7 +96,7 @@ void compute_factor(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); diff --git a/omp/factorization/par_ict_kernels.cpp b/omp/factorization/par_ict_kernels.cpp index b5546e1a644..a67ad860965 100644 --- a/omp/factorization/par_ict_kernels.cpp +++ b/omp/factorization/par_ict_kernels.cpp @@ -91,7 +91,7 @@ void compute_factor(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ICT_COMPUTE_FACTOR_KERNEL); @@ -166,7 +166,7 @@ void add_candidates(std::shared_ptr exec, [](IndexType, row_state) {}); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ICT_ADD_CANDIDATES_KERNEL); diff --git a/omp/factorization/par_ilu_kernels.cpp b/omp/factorization/par_ilu_kernels.cpp index da42a631b81..0504bca8b1d 100644 --- a/omp/factorization/par_ilu_kernels.cpp +++ b/omp/factorization/par_ilu_kernels.cpp @@ -88,7 +88,7 @@ void compute_l_u_factors(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILU_COMPUTE_L_U_FACTORS_KERNEL); diff --git a/omp/factorization/par_ilut_kernels.cpp b/omp/factorization/par_ilut_kernels.cpp index a24709e4f1a..4ce75d02472 100644 --- a/omp/factorization/par_ilut_kernels.cpp +++ b/omp/factorization/par_ilut_kernels.cpp @@ -54,7 +54,7 @@ void threshold_select(std::shared_ptr exec, threshold = abs(*target); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_THRESHOLD_SELECT_KERNEL); @@ -144,7 +144,7 @@ void threshold_filter(std::shared_ptr exec, }); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_KERNEL); @@ -228,7 +228,7 @@ void threshold_filter_approx(std::shared_ptr exec, }); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_APPROX_KERNEL); @@ -312,7 +312,7 @@ void compute_l_u_factors(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_COMPUTE_LU_FACTORS_KERNEL); @@ -428,7 +428,7 @@ void add_candidates(std::shared_ptr exec, [](IndexType, row_state) {}); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_ADD_CANDIDATES_KERNEL); diff --git a/reference/factorization/cholesky_kernels.cpp b/reference/factorization/cholesky_kernels.cpp index 2aeee99d45d..b53a86c2c52 100644 --- a/reference/factorization/cholesky_kernels.cpp +++ b/reference/factorization/cholesky_kernels.cpp @@ -63,7 +63,7 @@ void symbolic_count(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_CHOLESKY_SYMBOLIC_COUNT); @@ -102,7 +102,7 @@ void symbolic_factorize( } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_CHOLESKY_SYMBOLIC_FACTORIZE); @@ -140,7 +140,7 @@ void forest_from_factor( num_rows + 1, child_ptrs); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_CHOLESKY_FOREST_FROM_FACTOR); @@ -172,7 +172,8 @@ void initialize(std::shared_ptr exec, }); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_INITIALIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_CHOLESKY_INITIALIZE); template @@ -220,7 +221,8 @@ void factorize(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CHOLESKY_FACTORIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_CHOLESKY_FACTORIZE); } // namespace cholesky diff --git a/reference/factorization/factorization_kernels.cpp b/reference/factorization/factorization_kernels.cpp index 085e2f62ecc..b9427833035 100644 --- a/reference/factorization/factorization_kernels.cpp +++ b/reference/factorization/factorization_kernels.cpp @@ -126,7 +126,7 @@ void add_diagonal_elements(std::shared_ptr exec, mtx_builder.get_col_idx_array() = std::move(new_col_idxs_array); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_ADD_DIAGONAL_ELEMENTS_KERNEL); @@ -158,7 +158,7 @@ void initialize_row_ptrs_l_u( } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_U_KERNEL); @@ -212,7 +212,7 @@ void initialize_l_u(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_U_KERNEL); @@ -239,7 +239,7 @@ void initialize_row_ptrs_l( } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL); @@ -286,7 +286,7 @@ void initialize_l(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); diff --git a/reference/factorization/ic_kernels.cpp b/reference/factorization/ic_kernels.cpp index 6f88467262a..b97e2bd5e38 100644 --- a/reference/factorization/ic_kernels.cpp +++ b/reference/factorization/ic_kernels.cpp @@ -69,7 +69,8 @@ void compute(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_IC_COMPUTE_KERNEL); } // namespace ic_factorization diff --git a/reference/factorization/ilu_kernels.cpp b/reference/factorization/ilu_kernels.cpp index fdbe8a9e86f..8d46e91c7f5 100644 --- a/reference/factorization/ilu_kernels.cpp +++ b/reference/factorization/ilu_kernels.cpp @@ -65,7 +65,7 @@ void compute_lu(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); diff --git a/reference/factorization/lu_kernels.cpp b/reference/factorization/lu_kernels.cpp index d8516cffb49..a414b1dffa5 100644 --- a/reference/factorization/lu_kernels.cpp +++ b/reference/factorization/lu_kernels.cpp @@ -58,7 +58,8 @@ void initialize(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_INITIALIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_LU_INITIALIZE); template @@ -94,7 +95,8 @@ void factorize(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( + GKO_DECLARE_LU_FACTORIZE); template diff --git a/reference/factorization/par_ic_kernels.cpp b/reference/factorization/par_ic_kernels.cpp index 4da317cf201..e8f3a9273f4 100644 --- a/reference/factorization/par_ic_kernels.cpp +++ b/reference/factorization/par_ic_kernels.cpp @@ -46,7 +46,7 @@ void init_factor(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); @@ -96,7 +96,7 @@ void compute_factor(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); diff --git a/reference/factorization/par_ict_kernels.cpp b/reference/factorization/par_ict_kernels.cpp index 684158d380c..c6b192b328b 100644 --- a/reference/factorization/par_ict_kernels.cpp +++ b/reference/factorization/par_ict_kernels.cpp @@ -89,7 +89,7 @@ void compute_factor(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ICT_COMPUTE_FACTOR_KERNEL); @@ -167,7 +167,7 @@ void add_candidates(std::shared_ptr exec, [](IndexType, row_state) {}); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ICT_ADD_CANDIDATES_KERNEL); diff --git a/reference/factorization/par_ilu_kernels.cpp b/reference/factorization/par_ilu_kernels.cpp index 44c2e5f66bc..ddcc41d1070 100644 --- a/reference/factorization/par_ilu_kernels.cpp +++ b/reference/factorization/par_ilu_kernels.cpp @@ -86,7 +86,7 @@ void compute_l_u_factors(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILU_COMPUTE_L_U_FACTORS_KERNEL); diff --git a/reference/factorization/par_ilut_kernels.cpp b/reference/factorization/par_ilut_kernels.cpp index abef6e9b5f2..3827a99fbc1 100644 --- a/reference/factorization/par_ilut_kernels.cpp +++ b/reference/factorization/par_ilut_kernels.cpp @@ -58,7 +58,7 @@ void threshold_select(std::shared_ptr exec, threshold = abs(*target); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_THRESHOLD_SELECT_KERNEL); @@ -150,7 +150,7 @@ void threshold_filter(std::shared_ptr exec, }); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_KERNEL); @@ -191,7 +191,12 @@ void threshold_filter_approx(std::shared_ptr exec, // pick splitters for (IndexType i = 0; i < bucket_count - 1; ++i) { // shift by one so we get upper bounds for the buckets - sample[i] = sample[(i + 1) * sampleselect_oversampling]; + // TODO FIXME: NVHPC 23.3 seems to handle assignment index with + // optimization wrongly on a custom class when IndexType is long. We set + // the index explicitly with volatile to solve it. + // https://godbolt.org/z/srYhGndKn + volatile auto index = (i + 1) * sampleselect_oversampling; + sample[i] = sample[index]; } // count elements per bucket auto histogram = reinterpret_cast(sample + bucket_count); @@ -221,7 +226,7 @@ void threshold_filter_approx(std::shared_ptr exec, }); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_APPROX_KERNEL); @@ -309,7 +314,7 @@ void compute_l_u_factors(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_COMPUTE_LU_FACTORS_KERNEL); @@ -432,7 +437,7 @@ void add_candidates(std::shared_ptr exec, [](IndexType, row_state) {}); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF( GKO_DECLARE_PAR_ILUT_ADD_CANDIDATES_KERNEL); diff --git a/reference/test/factorization/cholesky_kernels.cpp b/reference/test/factorization/cholesky_kernels.cpp index d63e491e26a..dcdd081942b 100644 --- a/reference/test/factorization/cholesky_kernels.cpp +++ b/reference/test/factorization/cholesky_kernels.cpp @@ -248,7 +248,7 @@ class Cholesky : public ::testing::Test { std::shared_ptr combined_ref; }; -TYPED_TEST_SUITE(Cholesky, gko::test::ValueIndexTypes, +TYPED_TEST_SUITE(Cholesky, gko::test::ValueIndexTypesWithHalf, PairTypenameNameGenerator); diff --git a/reference/test/factorization/factorization.cpp b/reference/test/factorization/factorization.cpp index 2ded81d4867..73bf8cdc321 100644 --- a/reference/test/factorization/factorization.cpp +++ b/reference/test/factorization/factorization.cpp @@ -70,7 +70,7 @@ class Factorization : public ::testing::Test { std::shared_ptr beta; }; -TYPED_TEST_SUITE(Factorization, gko::test::ValueIndexTypes, +TYPED_TEST_SUITE(Factorization, gko::test::ValueIndexTypesWithHalf, PairTypenameNameGenerator); diff --git a/reference/test/factorization/ic_kernels.cpp b/reference/test/factorization/ic_kernels.cpp index cdcb6b12bc8..ee01e18087a 100644 --- a/reference/test/factorization/ic_kernels.cpp +++ b/reference/test/factorization/ic_kernels.cpp @@ -83,7 +83,8 @@ class Ic : public ::testing::Test { gko::remove_complex tol; }; -TYPED_TEST_SUITE(Ic, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(Ic, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(Ic, ThrowNotSupportedForWrongLinOp) diff --git a/reference/test/factorization/ilu_kernels.cpp b/reference/test/factorization/ilu_kernels.cpp index c750ca93fc8..9dcf839c0de 100644 --- a/reference/test/factorization/ilu_kernels.cpp +++ b/reference/test/factorization/ilu_kernels.cpp @@ -170,7 +170,8 @@ class Ilu : public ::testing::Test { std::unique_ptr ilu_factory_sort; }; -TYPED_TEST_SUITE(Ilu, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(Ilu, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(Ilu, ThrowNotSupportedForWrongLinOp1) diff --git a/reference/test/factorization/lu_kernels.cpp b/reference/test/factorization/lu_kernels.cpp index f4a8b240b38..54c6ae4f696 100644 --- a/reference/test/factorization/lu_kernels.cpp +++ b/reference/test/factorization/lu_kernels.cpp @@ -98,7 +98,8 @@ class Lu : public ::testing::Test { gko::array row_descs; }; -TYPED_TEST_SUITE(Lu, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(Lu, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(Lu, SymbolicCholeskyWorks) @@ -219,7 +220,7 @@ TYPED_TEST(Lu, KernelFactorizeWorks) diag_idxs.get_const_data(), this->mtx_lu.get(), tmp); GKO_ASSERT_MTX_NEAR(this->mtx_lu, mtx_lu_ref, - 15 * r::value); + 30 * r::value); }); } @@ -268,7 +269,7 @@ TYPED_TEST(Lu, FactorizeNonsymmetricWorks) GKO_ASSERT_MTX_EQ_SPARSITY(lu->get_combined(), this->mtx_lu); GKO_ASSERT_MTX_NEAR(lu->get_combined(), this->mtx_lu, - 15 * r::value); + 30 * r::value); ASSERT_EQ(lu->get_storage_type(), gko::experimental::factorization::storage_type::combined_lu); ASSERT_EQ(lu->get_lower_factor(), nullptr); @@ -294,7 +295,7 @@ TYPED_TEST(Lu, FactorizeNearSymmetricWorks) GKO_ASSERT_MTX_EQ_SPARSITY(lu->get_combined(), this->mtx_lu); GKO_ASSERT_MTX_NEAR(lu->get_combined(), this->mtx_lu, - 15 * r::value); + 30 * r::value); ASSERT_EQ(lu->get_storage_type(), gko::experimental::factorization::storage_type::combined_lu); ASSERT_EQ(lu->get_lower_factor(), nullptr); @@ -321,7 +322,7 @@ TYPED_TEST(Lu, FactorizeWithKnownSparsityWorks) auto lu = factory->generate(this->mtx); GKO_ASSERT_MTX_NEAR(lu->get_combined(), this->mtx_lu, - 15 * r::value); + 30 * r::value); ASSERT_EQ(lu->get_storage_type(), gko::experimental::factorization::storage_type::combined_lu); ASSERT_EQ(lu->get_lower_factor(), nullptr); diff --git a/reference/test/factorization/par_ic_kernels.cpp b/reference/test/factorization/par_ic_kernels.cpp index b9caf8c9e5e..481e89bb744 100644 --- a/reference/test/factorization/par_ic_kernels.cpp +++ b/reference/test/factorization/par_ic_kernels.cpp @@ -104,7 +104,8 @@ class ParIc : public ::testing::Test { gko::remove_complex tol; }; -TYPED_TEST_SUITE(ParIc, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(ParIc, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(ParIc, KernelCompute) diff --git a/reference/test/factorization/par_ict_kernels.cpp b/reference/test/factorization/par_ict_kernels.cpp index 55ac5771732..d3b6df59f42 100644 --- a/reference/test/factorization/par_ict_kernels.cpp +++ b/reference/test/factorization/par_ict_kernels.cpp @@ -137,7 +137,8 @@ class ParIct : public ::testing::Test { gko::remove_complex tol; }; -TYPED_TEST_SUITE(ParIct, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(ParIct, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(ParIct, KernelInitializeRowPtrsL) diff --git a/reference/test/factorization/par_ilu_kernels.cpp b/reference/test/factorization/par_ilu_kernels.cpp index bf4e422f640..3d590c1a6d6 100644 --- a/reference/test/factorization/par_ilu_kernels.cpp +++ b/reference/test/factorization/par_ilu_kernels.cpp @@ -180,7 +180,8 @@ class ParIlu : public ::testing::Test { std::unique_ptr ilu_factory_sort; }; -TYPED_TEST_SUITE(ParIlu, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(ParIlu, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(ParIlu, KernelAddDiagonalElementsEmpty) diff --git a/reference/test/factorization/par_ilut_kernels.cpp b/reference/test/factorization/par_ilut_kernels.cpp index 59805f246f8..3a6ba9232da 100644 --- a/reference/test/factorization/par_ilut_kernels.cpp +++ b/reference/test/factorization/par_ilut_kernels.cpp @@ -54,6 +54,7 @@ class ParIlut : public ::testing::Test { using ComplexCsr = gko::matrix::Csr>, index_type>; + using complex_value_type = std::complex>; ParIlut() : ref(gko::ReferenceExecutor::create()), @@ -75,16 +76,24 @@ class ParIlut : public ::testing::Test { {0., -3., 0., 1.}}, ref)), mtx1_complex(gko::initialize( - {{{.1, 0.}, {0., 0.}, {0., 0.}, {0., 0.}}, - {{-1., .1}, {.1, -1.}, {0., 0.}, {0., 0.}}, - {{-1., 1.}, {-2., .2}, {-1., -.3}, {0., 0.}}, - {{1., -2.}, {-3., -.1}, {-1., .1}, {.1, 2.}}}, + {{complex_value_type{.1, 0.}, complex_value_type{0., 0.}, + complex_value_type{0., 0.}, complex_value_type{0., 0.}}, + {complex_value_type{-1., .1}, complex_value_type{.1, -1.}, + complex_value_type{0., 0.}, complex_value_type{0., 0.}}, + {complex_value_type{-1., 1.}, complex_value_type{-2., .2}, + complex_value_type{-1., -.3}, complex_value_type{0., 0.}}, + {complex_value_type{1., -2.}, complex_value_type{-3., -.1}, + complex_value_type{-1., .1}, complex_value_type{.1, 2.}}}, ref)), mtx1_expect_complex_thrm(gko::initialize( - {{{.1, 0.}, {0., 0.}, {0., 0.}, {0., 0.}}, - {{0., 0.}, {.1, -1.}, {0., 0.}, {0., 0.}}, - {{-1., 1.}, {-2., .2}, {-1., -.3}, {0., 0.}}, - {{1., -2.}, {-3., -.1}, {0., 0.}, {.1, 2.}}}, + {{complex_value_type{.1, 0.}, complex_value_type{0., 0.}, + complex_value_type{0., 0.}, complex_value_type{0., 0.}}, + {complex_value_type{0., 0.}, complex_value_type{.1, -1.}, + complex_value_type{0., 0.}, complex_value_type{0., 0.}}, + {complex_value_type{-1., 1.}, complex_value_type{-2., .2}, + complex_value_type{-1., -.3}, complex_value_type{0., 0.}}, + {complex_value_type{1., -2.}, complex_value_type{-3., -.1}, + complex_value_type{0., 0.}, complex_value_type{.1, 2.}}}, ref)), identity(gko::initialize( {{1., 0., 0.}, {0., 1., 0.}, {0., 0., 1.}}, ref)), @@ -268,7 +277,7 @@ class ParIlut : public ::testing::Test { gko::remove_complex tol; }; // namespace -TYPED_TEST_SUITE(ParIlut, gko::test::ValueIndexTypes, +TYPED_TEST_SUITE(ParIlut, gko::test::ValueIndexTypesWithHalf, PairTypenameNameGenerator); diff --git a/test/factorization/lu_kernels.cpp b/test/factorization/lu_kernels.cpp index d38b6346cd8..712a3addcb9 100644 --- a/test/factorization/lu_kernels.cpp +++ b/test/factorization/lu_kernels.cpp @@ -127,7 +127,7 @@ class Lu : public CommonTestFixture { }; #ifdef GKO_COMPILING_OMP -using Types = gko::test::ValueIndexTypes; +using Types = gko::test::ValueIndexTypesWithHalf; #elif defined(GKO_COMPILING_CUDA) // CUDA don't support long indices for sorting, and the triangular solvers // seem broken diff --git a/test/factorization/par_ic_kernels.cpp b/test/factorization/par_ic_kernels.cpp index de2342a28db..bb53a454e21 100644 --- a/test/factorization/par_ic_kernels.cpp +++ b/test/factorization/par_ic_kernels.cpp @@ -41,8 +41,7 @@ class ParIc : public CommonTestFixture { mtx_l = gko::test::generate_random_lower_triangular_matrix( mtx_size[0], false, std::uniform_int_distribution(10, mtx_size[0]), - std::normal_distribution>(0, 10.0), - rand_engine, ref); + std::normal_distribution<>(0, 10.0), rand_engine, ref); dmtx_ani = Csr::create(exec); dmtx_l_ani = Csr::create(exec); dmtx_l_ani_init = Csr::create(exec); @@ -87,7 +86,8 @@ class ParIc : public CommonTestFixture { std::unique_ptr dmtx_l_ani_init; }; -TYPED_TEST_SUITE(ParIc, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(ParIc, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(ParIc, KernelInitFactorIsEquivalentToRef) @@ -107,6 +107,8 @@ TYPED_TEST(ParIc, KernelComputeFactorIsEquivalentToRef) { using Csr = typename TestFixture::Csr; using Coo = typename TestFixture::Coo; + using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); auto square_size = this->mtx_ani->get_size(); auto mtx_l_coo = Coo::create(this->ref, square_size); this->mtx_l_ani->convert_to(mtx_l_coo); diff --git a/test/factorization/par_ict_kernels.cpp b/test/factorization/par_ict_kernels.cpp index 3b33e52630c..945f874ef26 100644 --- a/test/factorization/par_ict_kernels.cpp +++ b/test/factorization/par_ict_kernels.cpp @@ -47,15 +47,11 @@ class ParIct : public CommonTestFixture { mtx = gko::test::generate_random_matrix( mtx_size[0], mtx_size[1], std::uniform_int_distribution(10, mtx_size[1]), - std::normal_distribution>(-1.0, - 1.0), - rand_engine, ref); + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); mtx_l = gko::test::generate_random_lower_triangular_matrix( mtx_size[0], false, std::uniform_int_distribution(10, mtx_size[0]), - std::normal_distribution>(-1.0, - 1.0), - rand_engine, ref); + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); dmtx_ani = Csr::create(exec); dmtx_l_ani = Csr::create(exec); @@ -97,7 +93,8 @@ class ParIct : public CommonTestFixture { std::unique_ptr dmtx_l; }; -TYPED_TEST_SUITE(ParIct, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(ParIct, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(ParIct, KernelAddCandidatesIsEquivalentToRef) @@ -127,6 +124,8 @@ TYPED_TEST(ParIct, KernelComputeFactorIsEquivalentToRef) { using Csr = typename TestFixture::Csr; using Coo = typename TestFixture::Coo; + using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); auto square_size = this->mtx_ani->get_size(); auto mtx_l_coo = Coo::create(this->ref, square_size); this->mtx_l_ani->convert_to(mtx_l_coo); diff --git a/test/factorization/par_ilu_kernels.cpp b/test/factorization/par_ilu_kernels.cpp index 88f5ecff0d9..216a4f597cb 100644 --- a/test/factorization/par_ilu_kernels.cpp +++ b/test/factorization/par_ilu_kernels.cpp @@ -59,8 +59,7 @@ class ParIlu : public CommonTestFixture { return gko::test::generate_random_matrix( num_rows, num_cols, std::uniform_int_distribution(0, num_cols - 1), - std::normal_distribution>(0.0, 1.0), - rand_engine, ref); + std::normal_distribution<>(0.0, 1.0), rand_engine, ref); } std::unique_ptr gen_unsorted_mtx(index_type num_rows, @@ -145,7 +144,8 @@ class ParIlu : public CommonTestFixture { } }; -TYPED_TEST_SUITE(ParIlu, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST_SUITE(ParIlu, gko::test::ValueIndexTypesWithHalf, + PairTypenameNameGenerator); TYPED_TEST(ParIlu, KernelAddDiagonalElementsSortedEquivalentToRef) @@ -237,6 +237,8 @@ TYPED_TEST(ParIlu, KernelInitializeParILUIsEquivalentToRef) TYPED_TEST(ParIlu, KernelComputeParILUIsEquivalentToRef) { using Csr = typename TestFixture::Csr; + using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); std::unique_ptr l_mtx{}; std::unique_ptr u_mtx{}; std::unique_ptr dl_mtx{}; @@ -255,6 +257,7 @@ TYPED_TEST(ParIlu, KernelComputeParILUWithMoreIterationsIsEquivalentToRef) { using Csr = typename TestFixture::Csr; using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); std::unique_ptr l_mtx{}; std::unique_ptr u_mtx{}; std::unique_ptr dl_mtx{}; diff --git a/test/factorization/par_ilut_kernels.cpp b/test/factorization/par_ilut_kernels.cpp index dff3cc702c1..6804a3edcce 100644 --- a/test/factorization/par_ilut_kernels.cpp +++ b/test/factorization/par_ilut_kernels.cpp @@ -48,39 +48,27 @@ class ParIlut : public CommonTestFixture { mtx1 = gko::test::generate_random_matrix( mtx_size[0], mtx_size[1], std::uniform_int_distribution(10, mtx_size[1]), - std::normal_distribution>(-1.0, - 1.0), - rand_engine, ref); + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); mtx2 = gko::test::generate_random_matrix( mtx_size[0], mtx_size[1], std::uniform_int_distribution(0, mtx_size[1]), - std::normal_distribution>(-1.0, - 1.0), - rand_engine, ref); + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); mtx_square = gko::test::generate_random_matrix( mtx_size[0], mtx_size[0], std::uniform_int_distribution(1, mtx_size[0]), - std::normal_distribution>(-1.0, - 1.0), - rand_engine, ref); + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); mtx_l = gko::test::generate_random_lower_triangular_matrix( mtx_size[0], false, std::uniform_int_distribution(10, mtx_size[0]), - std::normal_distribution>(-1.0, - 1.0), - rand_engine, ref); + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); mtx_l2 = gko::test::generate_random_lower_triangular_matrix( mtx_size[0], true, std::uniform_int_distribution(1, mtx_size[0]), - std::normal_distribution>(-1.0, - 1.0), - rand_engine, ref); + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); mtx_u = gko::test::generate_random_upper_triangular_matrix( mtx_size[0], false, std::uniform_int_distribution(10, mtx_size[0]), - std::normal_distribution>(-1.0, - 1.0), - rand_engine, ref); + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); dmtx1 = gko::clone(exec, mtx1); dmtx2 = gko::clone(exec, mtx2); @@ -134,7 +122,7 @@ class ParIlut : public CommonTestFixture { const std::unique_ptr& dmtx, index_type rank) { double tolerance = - gko::is_complex() ? r::value : 0.0; + gko::is_complex() ? double(r::value) : 0.0; auto size = index_type(mtx->get_num_stored_elements()); using ValueType = typename Mtx::value_type; @@ -189,7 +177,7 @@ class ParIlut : public CommonTestFixture { const std::unique_ptr& dmtx, index_type rank) { double tolerance = - gko::is_complex() ? r::value : 0.0; + gko::is_complex() ? double(r::value) : 0.0; auto res = Mtx::create(ref, mtx_size); auto dres = Mtx::create(exec, mtx_size); auto res_coo = Coo::create(ref, mtx_size); @@ -245,12 +233,15 @@ class ParIlut : public CommonTestFixture { std::unique_ptr dmtx_u; }; -TYPED_TEST_SUITE(ParIlut, gko::test::ValueIndexTypes, +TYPED_TEST_SUITE(ParIlut, gko::test::ValueIndexTypesWithHalf, PairTypenameNameGenerator); TYPED_TEST(ParIlut, KernelThresholdSelectIsEquivalentToRef) { + using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); + this->test_select(this->mtx_l, this->dmtx_l, this->mtx_l->get_num_stored_elements() / 3); } @@ -258,12 +249,18 @@ TYPED_TEST(ParIlut, KernelThresholdSelectIsEquivalentToRef) TYPED_TEST(ParIlut, KernelThresholdSelectMinIsEquivalentToRef) { + using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); + this->test_select(this->mtx_l, this->dmtx_l, 0); } TYPED_TEST(ParIlut, KernelThresholdSelectMaxIsEquivalentToRef) { + using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); + this->test_select(this->mtx_l, this->dmtx_l, this->mtx_l->get_num_stored_elements() - 1); } @@ -330,6 +327,7 @@ TYPED_TEST(ParIlut, KernelThresholdFilterApproxNullptrCooIsEquivalentToRef) using Coo = typename TestFixture::Coo; using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; + SKIP_IF_HALF(value_type); this->test_filter(this->mtx_l, this->dmtx_l, 0.5, true); auto res = Csr::create(this->ref, this->mtx_size); auto dres = Csr::create(this->exec, this->mtx_size); @@ -355,6 +353,9 @@ TYPED_TEST(ParIlut, KernelThresholdFilterApproxNullptrCooIsEquivalentToRef) TYPED_TEST(ParIlut, KernelThresholdFilterApproxLowerIsEquivalentToRef) { + using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); + this->test_filter_approx(this->mtx_l, this->dmtx_l, this->mtx_l->get_num_stored_elements() / 2); } @@ -362,12 +363,18 @@ TYPED_TEST(ParIlut, KernelThresholdFilterApproxLowerIsEquivalentToRef) TYPED_TEST(ParIlut, KernelThresholdFilterApproxNoneLowerIsEquivalentToRef) { + using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); + this->test_filter_approx(this->mtx_l, this->dmtx_l, 0); } TYPED_TEST(ParIlut, KernelThresholdFilterApproxAllLowerIsEquivalentToRef) { + using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); + this->test_filter_approx(this->mtx_l, this->dmtx_l, this->mtx_l->get_num_stored_elements() - 1); } @@ -377,6 +384,8 @@ TYPED_TEST(ParIlut, KernelAddCandidatesIsEquivalentToRef) { using Csr = typename TestFixture::Csr; using value_type = typename TestFixture::value_type; + // there's one value larger than half range + SKIP_IF_HALF(value_type); auto square_size = this->mtx_square->get_size(); auto mtx_lu = Csr::create(this->ref, square_size); this->mtx_l2->apply(this->mtx_u, mtx_lu); @@ -405,6 +414,8 @@ TYPED_TEST(ParIlut, KernelComputeLUIsEquivalentToRef) { using Csr = typename TestFixture::Csr; using Coo = typename TestFixture::Coo; + using value_type = typename TestFixture::value_type; + SKIP_IF_HALF(value_type); auto square_size = this->mtx_ani->get_size(); auto mtx_l_coo = Coo::create(this->ref, square_size); auto mtx_u_coo = Coo::create(this->ref, square_size);