diff --git a/benchmark/utils/cuda_linops.cpp b/benchmark/utils/cuda_linops.cpp index 4683d6086e1..961b055135b 100644 --- a/benchmark/utils/cuda_linops.cpp +++ b/benchmark/utils/cuda_linops.cpp @@ -87,237 +87,6 @@ class CusparseBase : public gko::LinOp { }; -#if CUDA_VERSION < 11000 - - -template -class CusparseCsrmp - : public gko::EnableLinOp, - CusparseBase>, - public gko::ReadableFromMatrixData, - public gko::EnableCreateMethod> { - friend class gko::EnableCreateMethod; - friend class gko::EnablePolymorphicObject; - -public: - using csr = gko::matrix::Csr; - using mat_data = gko::matrix_data; - using device_mat_data = gko::device_matrix_data; - - void read(const device_mat_data& data) override - { - this->read(data.copy_to_host()); - } - - void read(device_mat_data&& data) override - { - this->read(data.copy_to_host()); - } - - void read(const mat_data& data) override - { - csr_->read(data); - this->set_size(csr_->get_size()); - } - - gko::size_type get_num_stored_elements() const noexcept - { - return csr_->get_num_stored_elements(); - } - -protected: - void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override - { - auto dense_b = gko::as>(b); - auto dense_x = gko::as>(x); - auto db = dense_b->get_const_values(); - auto dx = dense_x->get_values(); - - auto guard = this->get_gpu_exec()->get_scoped_device_id_guard(); - gko::kernels::cuda::cusparse::spmv_mp( - this->get_gpu_exec()->get_sparselib_handle(), trans_, - this->get_size()[0], this->get_size()[1], - csr_->get_num_stored_elements(), &scalars.get_const_data()[0], - this->get_descr(), csr_->get_const_values(), - csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db, - &scalars.get_const_data()[1], dx); - } - - void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, - const gko::LinOp* beta, - gko::LinOp* x) const override GKO_NOT_IMPLEMENTED; - - CusparseCsrmp(std::shared_ptr exec, - const gko::dim<2>& size = gko::dim<2>{}) - : gko::EnableLinOp(exec, size), - csr_(std::move( - csr::create(exec, std::make_shared()))), - trans_(SPARSELIB_OPERATION_NON_TRANSPOSE) - {} - -private: - // Contains {alpha, beta} - gko::array scalars{ - this->get_executor(), {gko::one(), gko::zero()}}; - std::shared_ptr csr_; - cusparseOperation_t trans_; -}; - - -template -class CusparseCsr - : public gko::EnableLinOp, CusparseBase>, - public gko::EnableCreateMethod>, - public gko::ReadableFromMatrixData { - friend class gko::EnableCreateMethod; - friend class gko::EnablePolymorphicObject; - -public: - using csr = gko::matrix::Csr; - using mat_data = gko::matrix_data; - using device_mat_data = gko::device_matrix_data; - - void read(const device_mat_data& data) override - { - this->read(data.copy_to_host()); - } - - void read(device_mat_data&& data) override - { - this->read(data.copy_to_host()); - } - - void read(const mat_data& data) override - { - csr_->read(data); - this->set_size(csr_->get_size()); - } - - gko::size_type get_num_stored_elements() const noexcept - { - return csr_->get_num_stored_elements(); - } - -protected: - void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override - { - auto dense_b = gko::as>(b); - auto dense_x = gko::as>(x); - auto db = dense_b->get_const_values(); - auto dx = dense_x->get_values(); - - auto guard = this->get_gpu_exec()->get_scoped_device_id_guard(); - gko::kernels::cuda::cusparse::spmv( - this->get_gpu_exec()->get_sparselib_handle(), trans_, - this->get_size()[0], this->get_size()[1], - csr_->get_num_stored_elements(), &scalars.get_const_data()[0], - this->get_descr(), csr_->get_const_values(), - csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db, - &scalars.get_const_data()[1], dx); - } - - void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, - const gko::LinOp* beta, - gko::LinOp* x) const override GKO_NOT_IMPLEMENTED; - - CusparseCsr(std::shared_ptr exec, - const gko::dim<2>& size = gko::dim<2>{}) - : gko::EnableLinOp(exec, size), - csr_(std::move( - csr::create(exec, std::make_shared()))), - trans_(SPARSELIB_OPERATION_NON_TRANSPOSE) - {} - -private: - // Contains {alpha, beta} - gko::array scalars{ - this->get_executor(), {gko::one(), gko::zero()}}; - std::shared_ptr csr_; - cusparseOperation_t trans_; -}; - - -template -class CusparseCsrmm - : public gko::EnableLinOp, - CusparseBase>, - public gko::EnableCreateMethod>, - public gko::ReadableFromMatrixData { - friend class gko::EnableCreateMethod; - friend class gko::EnablePolymorphicObject; - -public: - using csr = gko::matrix::Csr; - using mat_data = gko::matrix_data; - using device_mat_data = gko::device_matrix_data; - - void read(const device_mat_data& data) override - { - this->read(data.copy_to_host()); - } - - void read(device_mat_data&& data) override - { - this->read(data.copy_to_host()); - } - - void read(const mat_data& data) override - { - csr_->read(data); - this->set_size(csr_->get_size()); - } - - gko::size_type get_num_stored_elements() const noexcept - { - return csr_->get_num_stored_elements(); - } - -protected: - void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override - { - auto dense_b = gko::as>(b); - auto dense_x = gko::as>(x); - auto db = dense_b->get_const_values(); - auto dx = dense_x->get_values(); - - auto guard = this->get_gpu_exec()->get_scoped_device_id_guard(); - gko::kernels::cuda::cusparse::spmm( - this->get_gpu_exec()->get_sparselib_handle(), trans_, - this->get_size()[0], dense_b->get_size()[1], this->get_size()[1], - csr_->get_num_stored_elements(), &scalars.get_const_data()[0], - this->get_descr(), csr_->get_const_values(), - csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db, - dense_b->get_size()[0], &scalars.get_const_data()[1], dx, - dense_x->get_size()[0]); - } - - void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, - const gko::LinOp* beta, - gko::LinOp* x) const override GKO_NOT_IMPLEMENTED; - - CusparseCsrmm(std::shared_ptr exec, - const gko::dim<2>& size = gko::dim<2>{}) - : gko::EnableLinOp(exec, size), - csr_(std::move( - csr::create(exec, std::make_shared()))), - trans_(SPARSELIB_OPERATION_NON_TRANSPOSE) - {} - -private: - // Contains {alpha, beta} - gko::array scalars{ - this->get_executor(), {gko::one(), gko::zero()}}; - std::shared_ptr csr_; - cusparseOperation_t trans_; -}; - - -#endif // CUDA_VERSION < 11000 - - #if CUDA_VERSION < 11021 @@ -421,112 +190,6 @@ class CusparseCsrEx #endif // CUDA_VERSION < 11021 -#if CUDA_VERSION < 11000 - - -template -class CusparseHybrid - : public gko::EnableLinOp< - CusparseHybrid, - CusparseBase>, - public gko::EnableCreateMethod< - CusparseHybrid>, - public gko::ReadableFromMatrixData { - friend class gko::EnableCreateMethod; - friend class gko::EnablePolymorphicObject; - -public: - using csr = gko::matrix::Csr; - using mat_data = gko::matrix_data; - using device_mat_data = gko::device_matrix_data; - - void read(const device_mat_data& data) override - { - this->read(data.copy_to_host()); - } - - void read(device_mat_data&& data) override - { - this->read(data.copy_to_host()); - } - - void read(const mat_data& data) override - { - auto t_csr = csr::create(this->get_executor(), - std::make_shared()); - t_csr->read(data); - this->set_size(t_csr->get_size()); - - auto guard = this->get_gpu_exec()->get_scoped_device_id_guard(); - gko::kernels::cuda::cusparse::csr2hyb( - this->get_gpu_exec()->get_sparselib_handle(), this->get_size()[0], - this->get_size()[1], this->get_descr(), t_csr->get_const_values(), - t_csr->get_const_row_ptrs(), t_csr->get_const_col_idxs(), hyb_, - Threshold, Partition); - } - - ~CusparseHybrid() override - { - try { - auto guard = this->get_gpu_exec()->get_scoped_device_id_guard(); - GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyHybMat(hyb_)); - } catch (const std::exception& e) { - std::cerr << "Error when unallocating CusparseHybrid hyb_ matrix: " - << e.what() << std::endl; - } - } - - CusparseHybrid(const CusparseHybrid& other) = delete; - - CusparseHybrid& operator=(const CusparseHybrid& other) = default; - -protected: - void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override - { - auto dense_b = gko::as>(b); - auto dense_x = gko::as>(x); - auto db = dense_b->get_const_values(); - auto dx = dense_x->get_values(); - - auto guard = this->get_gpu_exec()->get_scoped_device_id_guard(); - gko::kernels::cuda::cusparse::spmv( - this->get_gpu_exec()->get_sparselib_handle(), trans_, - &scalars.get_const_data()[0], this->get_descr(), hyb_, db, - &scalars.get_const_data()[1], dx); - } - - void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, - const gko::LinOp* beta, - gko::LinOp* x) const override GKO_NOT_IMPLEMENTED; - - CusparseHybrid(std::shared_ptr exec, - const gko::dim<2>& size = gko::dim<2>{}) - : gko::EnableLinOp(exec, size), - trans_(SPARSELIB_OPERATION_NON_TRANSPOSE) - { - auto guard = this->get_gpu_exec()->get_scoped_device_id_guard(); - GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCreateHybMat(&hyb_)); - } - -private: - // Contains {alpha, beta} - gko::array scalars{ - this->get_executor(), {gko::one(), gko::zero()}}; - cusparseOperation_t trans_; - cusparseHybMat_t hyb_; -}; - - -#endif // CUDA_VERSION < 11000 - - -#if CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__))) - - template void cusparse_generic_spmv(std::shared_ptr gpu_exec, const cusparseSpMatDescr_t mat, @@ -755,10 +418,6 @@ class CusparseGenericCoo }; -#endif // CUDA_VERSION >= 11000 || ((CUDA_VERSION >= 10020) && - // !(defined(_WIN32) || defined(__CYGWIN__))) - - } // namespace detail @@ -769,22 +428,12 @@ IMPL_CREATE_SPARSELIB_LINOP(cusparse_csrex, STUB_CREATE_SPARSELIB_LINOP(cusparse_csrex); #endif -#if CUDA_VERSION < 11000 -IMPL_CREATE_SPARSELIB_LINOP(cusparse_csr, detail::CusparseCsr); -IMPL_CREATE_SPARSELIB_LINOP(cusparse_csrmp, - detail::CusparseCsrmp); -IMPL_CREATE_SPARSELIB_LINOP(cusparse_csrmm, - detail::CusparseCsrmm); -#else // CUDA_VERSION >= 11000 IMPL_CREATE_SPARSELIB_LINOP(cusparse_csr, detail::CusparseGenericCsr); STUB_CREATE_SPARSELIB_LINOP(cusparse_csrmp); STUB_CREATE_SPARSELIB_LINOP(cusparse_csrmm); -#endif // CUDA_VERSION >= 11000 -#if CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__))) IMPL_CREATE_SPARSELIB_LINOP(cusparse_gcsr, detail::CusparseGenericCsr); #if CUDA_VERSION >= 11021 @@ -796,26 +445,7 @@ IMPL_CREATE_SPARSELIB_LINOP(cusparse_gcsr2, detail::CusparseGenericCsr); IMPL_CREATE_SPARSELIB_LINOP(cusparse_gcoo, detail::CusparseGenericCoo); -#else -STUB_CREATE_SPARSELIB_LINOP(cusparse_gcsr); -STUB_CREATE_SPARSELIB_LINOP(cusparse_gcsr2); -STUB_CREATE_SPARSELIB_LINOP(cusparse_gcoo); -#endif // CUDA_VERSION < 11000 && ((CUDA_VERSION < 10020) || (defined(_WIN32) - // && defined(__CYGWIN__)))) - - -#if CUDA_VERSION < 11000 -IMPL_CREATE_SPARSELIB_LINOP( - cusparse_coo, - detail::CusparseHybrid); -IMPL_CREATE_SPARSELIB_LINOP( - cusparse_ell, - detail::CusparseHybrid); -IMPL_CREATE_SPARSELIB_LINOP(cusparse_hybrid, - detail::CusparseHybrid); -#else // CUDA_VERSION >= 11000 IMPL_CREATE_SPARSELIB_LINOP(cusparse_coo, detail::CusparseGenericCoo); STUB_CREATE_SPARSELIB_LINOP(cusparse_ell); STUB_CREATE_SPARSELIB_LINOP(cusparse_hybrid); -#endif // CUDA_VERSION >= 11000 diff --git a/common/cuda_hip/components/atomic.hpp b/common/cuda_hip/components/atomic.hpp index 2fbb1664165..6922ef6039b 100644 --- a/common/cuda_hip/components/atomic.hpp +++ b/common/cuda_hip/components/atomic.hpp @@ -101,10 +101,11 @@ GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned long long int); GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned int); -#if !(defined(CUDA_VERSION) && (CUDA_VERSION < 10010)) -// CUDA 10.1 starts supporting 16-bit unsigned short int atomicCAS +#if defined(CUDA_VERSION) +// Support 16-bit ATOMIC_ADD and ATOMIC_MAX only on CUDA GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned short int); -#endif // !(defined(CUDA_VERSION) && (CUDA_VERSION < 10010)) +#endif + #undef GKO_BIND_ATOMIC_HELPER_STRUCTURE @@ -142,32 +143,26 @@ GKO_BIND_ATOMIC_ADD(double); #else // NVIDIA -#if !((defined(CUDA_VERSION) && (CUDA_VERSION < 8000)) || \ - (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600))) +#if !(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600)) // CUDA 8.0 starts suppoting 64-bit double atomicAdd on devices of compute // capability 6.x and higher GKO_BIND_ATOMIC_ADD(double); -#endif // !((defined(CUDA_VERSION) && (CUDA_VERSION < 8000)) || - // (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600))) +#endif // !(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600)) -#if !((defined(CUDA_VERSION) && (CUDA_VERSION < 10000)) || \ - (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700))) +#if !(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700)) // CUDA 10.0 starts supporting 16-bit __half floating-point atomicAdd on devices // of compute capability 7.x and higher. GKO_BIND_ATOMIC_ADD(__half); -#endif // !((defined(CUDA_VERSION) && (CUDA_VERSION < 10000)) || - // (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700))) +#endif // !(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700)) -#if !((defined(CUDA_VERSION) && (CUDA_VERSION < 10000)) || \ - (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600))) +#if !(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600)) // CUDA 10.0 starts supporting 32-bit __half2 floating-point atomicAdd on // devices of compute capability 6.x and higher. note: The atomicity of the // __half2 add operation is guaranteed separately for each of the two __half // elements; the entire __half2 is not guaranteed to be atomic as a single // 32-bit access. GKO_BIND_ATOMIC_ADD(__half2); -#endif // !((defined(CUDA_VERSION) && (CUDA_VERSION < 10000)) || - // (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600))) +#endif // !(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600)) #endif // defined(__HIPCC__) && GINKGO_HIP_PLATFORM_HCC diff --git a/cuda/base/cusparse_bindings.hpp b/cuda/base/cusparse_bindings.hpp index bca0a80a37b..4be00b88aaf 100644 --- a/cuda/base/cusparse_bindings.hpp +++ b/cuda/base/cusparse_bindings.hpp @@ -57,58 +57,6 @@ template <> struct is_supported, int32> : std::true_type {}; -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -#define GKO_BIND_CUSPARSE32_SPMV(ValueType, CusparseName) \ - inline void spmv(cusparseHandle_t handle, cusparseOperation_t transA, \ - int32 m, int32 n, int32 nnz, const ValueType* alpha, \ - const cusparseMatDescr_t descrA, \ - const ValueType* csrValA, const int32* csrRowPtrA, \ - const int32* csrColIndA, const ValueType* x, \ - const ValueType* beta, ValueType* y) \ - { \ - GKO_ASSERT_NO_CUSPARSE_ERRORS(CusparseName( \ - handle, transA, m, n, nnz, as_culibs_type(alpha), descrA, \ - as_culibs_type(csrValA), csrRowPtrA, csrColIndA, \ - as_culibs_type(x), as_culibs_type(beta), as_culibs_type(y))); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -#define GKO_BIND_CUSPARSE64_SPMV(ValueType, CusparseName) \ - inline void spmv(cusparseHandle_t handle, cusparseOperation_t transA, \ - int64 m, int64 n, int64 nnz, const ValueType* alpha, \ - const cusparseMatDescr_t descrA, \ - const ValueType* csrValA, const int64* csrRowPtrA, \ - const int64* csrColIndA, const ValueType* x, \ - const ValueType* beta, ValueType* y) GKO_NOT_IMPLEMENTED; \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -GKO_BIND_CUSPARSE32_SPMV(float, cusparseScsrmv); -GKO_BIND_CUSPARSE32_SPMV(double, cusparseDcsrmv); -GKO_BIND_CUSPARSE32_SPMV(std::complex, cusparseCcsrmv); -GKO_BIND_CUSPARSE32_SPMV(std::complex, cusparseZcsrmv); -GKO_BIND_CUSPARSE64_SPMV(float, cusparseScsrmv); -GKO_BIND_CUSPARSE64_SPMV(double, cusparseDcsrmv); -GKO_BIND_CUSPARSE64_SPMV(std::complex, cusparseCcsrmv); -GKO_BIND_CUSPARSE64_SPMV(std::complex, cusparseZcsrmv); -template -GKO_BIND_CUSPARSE32_SPMV(ValueType, detail::not_implemented); -template -GKO_BIND_CUSPARSE64_SPMV(ValueType, detail::not_implemented); - - -#undef GKO_BIND_CUSPARSE32_SPMV -#undef GKO_BIND_CUSPARSE64_SPMV - - -#else // CUDA_VERSION >= 11000 - - template inline void spmv_buffersize(cusparseHandle_t handle, cusparseOperation_t opA, const ValueType* alpha, @@ -164,109 +112,6 @@ inline void spmm(cusparseHandle_t handle, cusparseOperation_t opA, } -#endif - - -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -#define GKO_BIND_CUSPARSE32_SPMV(ValueType, CusparseName) \ - inline void spmv_mp(cusparseHandle_t handle, cusparseOperation_t transA, \ - int32 m, int32 n, int32 nnz, const ValueType* alpha, \ - const cusparseMatDescr_t descrA, \ - const ValueType* csrValA, const int32* csrRowPtrA, \ - const int32* csrColIndA, const ValueType* x, \ - const ValueType* beta, ValueType* y) \ - { \ - GKO_ASSERT_NO_CUSPARSE_ERRORS(CusparseName( \ - handle, transA, m, n, nnz, as_culibs_type(alpha), descrA, \ - as_culibs_type(csrValA), csrRowPtrA, csrColIndA, \ - as_culibs_type(x), as_culibs_type(beta), as_culibs_type(y))); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -#define GKO_BIND_CUSPARSE64_SPMV(ValueType, CusparseName) \ - inline void spmv_mp( \ - cusparseHandle_t handle, cusparseOperation_t transA, int64 m, int64 n, \ - int64 nnz, const ValueType* alpha, const cusparseMatDescr_t descrA, \ - const ValueType* csrValA, const int64* csrRowPtrA, \ - const int64* csrColIndA, const ValueType* x, const ValueType* beta, \ - ValueType* y) GKO_NOT_IMPLEMENTED; \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -GKO_BIND_CUSPARSE32_SPMV(float, cusparseScsrmv_mp); -GKO_BIND_CUSPARSE32_SPMV(double, cusparseDcsrmv_mp); -GKO_BIND_CUSPARSE32_SPMV(std::complex, cusparseCcsrmv_mp); -GKO_BIND_CUSPARSE32_SPMV(std::complex, cusparseZcsrmv_mp); -GKO_BIND_CUSPARSE64_SPMV(float, cusparseScsrmv_mp); -GKO_BIND_CUSPARSE64_SPMV(double, cusparseDcsrmv_mp); -GKO_BIND_CUSPARSE64_SPMV(std::complex, cusparseCcsrmv_mp); -GKO_BIND_CUSPARSE64_SPMV(std::complex, cusparseZcsrmv_mp); -template -GKO_BIND_CUSPARSE32_SPMV(ValueType, detail::not_implemented); -template -GKO_BIND_CUSPARSE64_SPMV(ValueType, detail::not_implemented); - - -#undef GKO_BIND_CUSPARSE32_SPMV -#undef GKO_BIND_CUSPARSE64_SPMV - - -#define GKO_BIND_CUSPARSE32_SPMM(ValueType, CusparseName) \ - inline void spmm(cusparseHandle_t handle, cusparseOperation_t transA, \ - int32 m, int32 n, int32 k, int32 nnz, \ - const ValueType* alpha, const cusparseMatDescr_t descrA, \ - const ValueType* csrValA, const int32* csrRowPtrA, \ - const int32* csrColIndA, const ValueType* B, int32 ldb, \ - const ValueType* beta, ValueType* C, int32 ldc) \ - { \ - GKO_ASSERT_NO_CUSPARSE_ERRORS( \ - CusparseName(handle, transA, m, n, k, nnz, as_culibs_type(alpha), \ - descrA, as_culibs_type(csrValA), csrRowPtrA, \ - csrColIndA, as_culibs_type(B), ldb, \ - as_culibs_type(beta), as_culibs_type(C), ldc)); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -#define GKO_BIND_CUSPARSE64_SPMM(ValueType, CusparseName) \ - inline void spmm(cusparseHandle_t handle, cusparseOperation_t transA, \ - int64 m, int64 n, int64 k, int64 nnz, \ - const ValueType* alpha, const cusparseMatDescr_t descrA, \ - const ValueType* csrValA, const int64* csrRowPtrA, \ - const int64* csrColIndA, const ValueType* B, int64 ldb, \ - const ValueType* beta, ValueType* C, int64 ldc) \ - GKO_NOT_IMPLEMENTED; \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -GKO_BIND_CUSPARSE32_SPMM(float, cusparseScsrmm); -GKO_BIND_CUSPARSE32_SPMM(double, cusparseDcsrmm); -GKO_BIND_CUSPARSE32_SPMM(std::complex, cusparseCcsrmm); -GKO_BIND_CUSPARSE32_SPMM(std::complex, cusparseZcsrmm); -GKO_BIND_CUSPARSE64_SPMM(float, cusparseScsrmm); -GKO_BIND_CUSPARSE64_SPMM(double, cusparseDcsrmm); -GKO_BIND_CUSPARSE64_SPMM(std::complex, cusparseCcsrmm); -GKO_BIND_CUSPARSE64_SPMM(std::complex, cusparseZcsrmm); -template -GKO_BIND_CUSPARSE32_SPMM(ValueType, detail::not_implemented); -template -GKO_BIND_CUSPARSE64_SPMM(ValueType, detail::not_implemented); - - -#undef GKO_BIND_CUSPARSE32_SPMM -#undef GKO_BIND_CUSPARSE64_SPMM - - -#endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - #if defined(CUDA_VERSION) && (CUDA_VERSION < 11021) @@ -357,160 +202,6 @@ GKO_BIND_CUSPARSE_SPMV_BUFFERSIZE(std::complex); #endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11021) -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -#define GKO_BIND_CUSPARSE32_SPMV(ValueType, CusparseName) \ - inline void spmv(cusparseHandle_t handle, cusparseOperation_t transA, \ - const ValueType* alpha, const cusparseMatDescr_t descrA, \ - const cusparseHybMat_t hybA, const ValueType* x, \ - const ValueType* beta, ValueType* y) \ - { \ - GKO_ASSERT_NO_CUSPARSE_ERRORS(CusparseName( \ - handle, transA, as_culibs_type(alpha), descrA, hybA, \ - as_culibs_type(x), as_culibs_type(beta), as_culibs_type(y))); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -GKO_BIND_CUSPARSE32_SPMV(float, cusparseShybmv); -GKO_BIND_CUSPARSE32_SPMV(double, cusparseDhybmv); -GKO_BIND_CUSPARSE32_SPMV(std::complex, cusparseChybmv); -GKO_BIND_CUSPARSE32_SPMV(std::complex, cusparseZhybmv); -template -GKO_BIND_CUSPARSE32_SPMV(ValueType, detail::not_implemented); - - -#undef GKO_BIND_CUSPARSE32_SPMV - - -template -void spgemm_buffer_size( - cusparseHandle_t handle, IndexType m, IndexType n, IndexType k, - const ValueType* alpha, const cusparseMatDescr_t descrA, IndexType nnzA, - const IndexType* csrRowPtrA, const IndexType* csrColIndA, - const cusparseMatDescr_t descrB, IndexType nnzB, - const IndexType* csrRowPtrB, const IndexType* csrColIndB, - const ValueType* beta, const cusparseMatDescr_t descrD, IndexType nnzD, - const IndexType* csrRowPtrD, const IndexType* csrColIndD, - csrgemm2Info_t info, size_type& result) GKO_NOT_IMPLEMENTED; - -#define GKO_BIND_CUSPARSE_SPGEMM_BUFFER_SIZE(ValueType, CusparseName) \ - template <> \ - inline void spgemm_buffer_size( \ - cusparseHandle_t handle, int32 m, int32 n, int32 k, \ - const ValueType* alpha, const cusparseMatDescr_t descrA, int32 nnzA, \ - const int32* csrRowPtrA, const int32* csrColIndA, \ - const cusparseMatDescr_t descrB, int32 nnzB, const int32* csrRowPtrB, \ - const int32* csrColIndB, const ValueType* beta, \ - const cusparseMatDescr_t descrD, int32 nnzD, const int32* csrRowPtrD, \ - const int32* csrColIndD, csrgemm2Info_t info, size_type& result) \ - { \ - GKO_ASSERT_NO_CUSPARSE_ERRORS( \ - CusparseName(handle, m, n, k, as_culibs_type(alpha), descrA, nnzA, \ - csrRowPtrA, csrColIndA, descrB, nnzB, csrRowPtrB, \ - csrColIndB, as_culibs_type(beta), descrD, nnzD, \ - csrRowPtrD, csrColIndD, info, &result)); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -GKO_BIND_CUSPARSE_SPGEMM_BUFFER_SIZE(float, cusparseScsrgemm2_bufferSizeExt); -GKO_BIND_CUSPARSE_SPGEMM_BUFFER_SIZE(double, cusparseDcsrgemm2_bufferSizeExt); -GKO_BIND_CUSPARSE_SPGEMM_BUFFER_SIZE(std::complex, - cusparseCcsrgemm2_bufferSizeExt); -GKO_BIND_CUSPARSE_SPGEMM_BUFFER_SIZE(std::complex, - cusparseZcsrgemm2_bufferSizeExt); - - -#undef GKO_BIND_CUSPARSE_SPGEMM_BUFFER_SIZE - - -template -void spgemm_nnz(cusparseHandle_t handle, IndexType m, IndexType n, IndexType k, - const cusparseMatDescr_t descrA, IndexType nnzA, - const IndexType* csrRowPtrA, const IndexType* csrColIndA, - const cusparseMatDescr_t descrB, IndexType nnzB, - const IndexType* csrRowPtrB, const IndexType* csrColIndB, - const cusparseMatDescr_t descrD, IndexType nnzD, - const IndexType* csrRowPtrD, const IndexType* csrColIndD, - const cusparseMatDescr_t descrC, IndexType* csrRowPtrC, - IndexType* nnzC, csrgemm2Info_t info, - void* buffer) GKO_NOT_IMPLEMENTED; - -template <> -inline void spgemm_nnz( - cusparseHandle_t handle, int32 m, int32 n, int32 k, - const cusparseMatDescr_t descrA, int32 nnzA, const int32* csrRowPtrA, - const int32* csrColIndA, const cusparseMatDescr_t descrB, int32 nnzB, - const int32* csrRowPtrB, const int32* csrColIndB, - const cusparseMatDescr_t descrD, int32 nnzD, const int32* csrRowPtrD, - const int32* csrColIndD, const cusparseMatDescr_t descrC, int32* csrRowPtrC, - int32* nnzC, csrgemm2Info_t info, void* buffer) -{ - GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseXcsrgemm2Nnz( - handle, m, n, k, descrA, nnzA, csrRowPtrA, csrColIndA, descrB, nnzB, - csrRowPtrB, csrColIndB, descrD, nnzD, csrRowPtrD, csrColIndD, descrC, - csrRowPtrC, nnzC, info, buffer)); -} - - -template -void spgemm(cusparseHandle_t handle, IndexType m, IndexType n, IndexType k, - const ValueType* alpha, const cusparseMatDescr_t descrA, - IndexType nnzA, const ValueType* csrValA, - const IndexType* csrRowPtrA, const IndexType* csrColIndA, - const cusparseMatDescr_t descrB, IndexType nnzB, - const ValueType* csrValB, const IndexType* csrRowPtrB, - const IndexType* csrColIndB, const ValueType* beta, - const cusparseMatDescr_t descrD, IndexType nnzD, - const ValueType* csrValD, const IndexType* csrRowPtrD, - const IndexType* csrColIndD, const cusparseMatDescr_t descrC, - ValueType* csrValC, const IndexType* csrRowPtrC, - IndexType* csrColIndC, csrgemm2Info_t info, - void* buffer) GKO_NOT_IMPLEMENTED; - -#define GKO_BIND_CUSPARSE_SPGEMM(ValueType, CusparseName) \ - template <> \ - inline void spgemm( \ - cusparseHandle_t handle, int32 m, int32 n, int32 k, \ - const ValueType* alpha, const cusparseMatDescr_t descrA, int32 nnzA, \ - const ValueType* csrValA, const int32* csrRowPtrA, \ - const int32* csrColIndA, const cusparseMatDescr_t descrB, int32 nnzB, \ - const ValueType* csrValB, const int32* csrRowPtrB, \ - const int32* csrColIndB, const ValueType* beta, \ - const cusparseMatDescr_t descrD, int32 nnzD, const ValueType* csrValD, \ - const int32* csrRowPtrD, const int32* csrColIndD, \ - const cusparseMatDescr_t descrC, ValueType* csrValC, \ - const int32* csrRowPtrC, int32* csrColIndC, csrgemm2Info_t info, \ - void* buffer) \ - { \ - GKO_ASSERT_NO_CUSPARSE_ERRORS(CusparseName( \ - handle, m, n, k, as_culibs_type(alpha), descrA, nnzA, \ - as_culibs_type(csrValA), csrRowPtrA, csrColIndA, descrB, nnzB, \ - as_culibs_type(csrValB), csrRowPtrB, csrColIndB, \ - as_culibs_type(beta), descrD, nnzD, as_culibs_type(csrValD), \ - csrRowPtrD, csrColIndD, descrC, as_culibs_type(csrValC), \ - csrRowPtrC, csrColIndC, info, buffer)); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -GKO_BIND_CUSPARSE_SPGEMM(float, cusparseScsrgemm2); -GKO_BIND_CUSPARSE_SPGEMM(double, cusparseDcsrgemm2); -GKO_BIND_CUSPARSE_SPGEMM(std::complex, cusparseCcsrgemm2); -GKO_BIND_CUSPARSE_SPGEMM(std::complex, cusparseZcsrgemm2); - - -#undef GKO_BIND_CUSPARSE_SPGEMM - - -#else // CUDA_VERSION >= 11000 - - template void spgemm_work_estimation(cusparseHandle_t handle, const ValueType* alpha, cusparseSpMatDescr_t a_descr, @@ -574,101 +265,6 @@ void csr_set_pointers(cusparseSpMatDescr_t descr, IndexType* row_ptrs, } -#endif // CUDA_VERSION >= 11000 - - -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -#define GKO_BIND_CUSPARSE32_CSR2HYB(ValueType, CusparseName) \ - inline void csr2hyb(cusparseHandle_t handle, int32 m, int32 n, \ - const cusparseMatDescr_t descrA, \ - const ValueType* csrValA, const int32* csrRowPtrA, \ - const int32* csrColIndA, cusparseHybMat_t hybA, \ - int32 userEllWidth, \ - cusparseHybPartition_t partitionType) \ - { \ - GKO_ASSERT_NO_CUSPARSE_ERRORS(CusparseName( \ - handle, m, n, descrA, as_culibs_type(csrValA), csrRowPtrA, \ - csrColIndA, hybA, userEllWidth, partitionType)); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -#define GKO_BIND_CUSPARSE64_CSR2HYB(ValueType, CusparseName) \ - inline void csr2hyb( \ - cusparseHandle_t handle, int64 m, int64 n, \ - const cusparseMatDescr_t descrA, const ValueType* csrValA, \ - const int64* csrRowPtrA, const int64* csrColIndA, \ - cusparseHybMat_t hybA, int64 userEllWidth, \ - cusparseHybPartition_t partitionType) GKO_NOT_IMPLEMENTED; \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -GKO_BIND_CUSPARSE32_CSR2HYB(float, cusparseScsr2hyb); -GKO_BIND_CUSPARSE32_CSR2HYB(double, cusparseDcsr2hyb); -GKO_BIND_CUSPARSE32_CSR2HYB(std::complex, cusparseCcsr2hyb); -GKO_BIND_CUSPARSE32_CSR2HYB(std::complex, cusparseZcsr2hyb); -GKO_BIND_CUSPARSE64_CSR2HYB(float, cusparseScsr2hyb); -GKO_BIND_CUSPARSE64_CSR2HYB(double, cusparseDcsr2hyb); -GKO_BIND_CUSPARSE64_CSR2HYB(std::complex, cusparseCcsr2hyb); -GKO_BIND_CUSPARSE64_CSR2HYB(std::complex, cusparseZcsr2hyb); -template -GKO_BIND_CUSPARSE32_CSR2HYB(ValueType, detail::not_implemented); -template -GKO_BIND_CUSPARSE64_CSR2HYB(ValueType, detail::not_implemented); - - -#undef GKO_BIND_CUSPARSE32_CSR2HYB -#undef GKO_BIND_CUSPARSE64_CSR2HYB - - -#endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - -template -inline void transpose(cusparseHandle_t handle, size_type m, size_type n, - size_type nnz, const ValueType* OrigValA, - const IndexType* OrigRowPtrA, - const IndexType* OrigColIndA, ValueType* TransValA, - IndexType* TransRowPtrA, IndexType* TransColIndA, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase) GKO_NOT_IMPLEMENTED; - -// Cusparse csr2csc use the order (row_inx, col_ptr) for csc, so we need to -// switch row_ptr and col_idx of transposed csr here -#define GKO_BIND_CUSPARSE_TRANSPOSE32(ValueType, CusparseName) \ - template <> \ - inline void transpose( \ - cusparseHandle_t handle, size_type m, size_type n, size_type nnz, \ - const ValueType* OrigValA, const int32* OrigRowPtrA, \ - const int32* OrigColIndA, ValueType* TransValA, int32* TransRowPtrA, \ - int32* TransColIndA, cusparseAction_t copyValues, \ - cusparseIndexBase_t idxBase) \ - { \ - GKO_ASSERT_NO_CUSPARSE_ERRORS( \ - CusparseName(handle, m, n, nnz, as_culibs_type(OrigValA), \ - OrigRowPtrA, OrigColIndA, as_culibs_type(TransValA), \ - TransColIndA, TransRowPtrA, copyValues, idxBase)); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -GKO_BIND_CUSPARSE_TRANSPOSE32(float, cusparseScsr2csc); -GKO_BIND_CUSPARSE_TRANSPOSE32(double, cusparseDcsr2csc); -GKO_BIND_CUSPARSE_TRANSPOSE32(std::complex, cusparseCcsr2csc); -GKO_BIND_CUSPARSE_TRANSPOSE32(std::complex, cusparseZcsr2csc); - -#undef GKO_BIND_CUSPARSE_TRANSPOSE32 - - -#else // CUDA_VERSION >= 11000 - template inline void transpose_buffersize( cusparseHandle_t handle, size_type m, size_type n, size_type nnz, @@ -737,9 +333,6 @@ GKO_BIND_CUSPARSE_TRANSPOSE32(std::complex); GKO_BIND_CUSPARSE_TRANSPOSE32(std::complex); -#endif - - inline cusparseMatDescr_t create_mat_descr() { cusparseMatDescr_t descr{}; @@ -774,26 +367,6 @@ inline void destroy(cusparseMatDescr_t descr) } -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -inline csrgemm2Info_t create_spgemm_info() -{ - csrgemm2Info_t info{}; - GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCreateCsrgemm2Info(&info)); - return info; -} - - -inline void destroy(csrgemm2Info_t info) -{ - GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyCsrgemm2Info(info)); -} - - -#else // CUDA_VERSION >= 11000 - - inline cusparseSpGEMMDescr_t create_spgemm_descr() { cusparseSpGEMMDescr_t descr{}; @@ -886,7 +459,7 @@ inline void destroy(cusparseSpMatDescr_t descr) } -#if (CUDA_VERSION >= 11031) +#if defined(CUDA_VERSION) && (CUDA_VERSION >= 11031) template @@ -915,9 +488,6 @@ inline void destroy(cusparseSpSMDescr_t info) #endif // CUDA_VERSION >= 11031 -#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000) - - #if defined(CUDA_VERSION) && (CUDA_VERSION < 11031) @@ -1209,38 +779,6 @@ inline void csrsort(cusparseHandle_t handle, int32 m, int32 n, int32 nnz, } -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -template -void gather(cusparseHandle_t handle, IndexType nnz, const ValueType* in, - ValueType* out, const IndexType* permutation) GKO_NOT_IMPLEMENTED; - -#define GKO_BIND_CUSPARSE_GATHER(ValueType, CusparseName) \ - template <> \ - inline void gather(cusparseHandle_t handle, int32 nnz, \ - const ValueType* in, ValueType* out, \ - const int32* permutation) \ - { \ - GKO_ASSERT_NO_CUSPARSE_ERRORS( \ - CusparseName(handle, nnz, as_culibs_type(in), as_culibs_type(out), \ - permutation, CUSPARSE_INDEX_BASE_ZERO)); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") - -GKO_BIND_CUSPARSE_GATHER(float, cusparseSgthr); -GKO_BIND_CUSPARSE_GATHER(double, cusparseDgthr); -GKO_BIND_CUSPARSE_GATHER(std::complex, cusparseCgthr); -GKO_BIND_CUSPARSE_GATHER(std::complex, cusparseZgthr); - -#undef GKO_BIND_CUSPARSE_GATHER - - -#else // CUDA_VERSION >= 11000 - - inline void gather(cusparseHandle_t handle, cusparseDnVecDescr_t in, cusparseSpVecDescr_t out) { @@ -1248,9 +786,6 @@ inline void gather(cusparseHandle_t handle, cusparseDnVecDescr_t in, } -#endif - - GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS template void ilu0_buffer_size(cusparseHandle_t handle, IndexType m, IndexType nnz, diff --git a/cuda/base/types.hpp b/cuda/base/types.hpp index 7252f7d673d..a4a2b877c28 100644 --- a/cuda/base/types.hpp +++ b/cuda/base/types.hpp @@ -206,11 +206,6 @@ GKO_CUDA_DATA_TYPE(int8, CUDA_R_8I); #undef GKO_CUDA_DATA_TYPE -#if defined(CUDA_VERSION) && \ - (CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) - - template struct cusparse_index_type_impl {}; @@ -227,10 +222,6 @@ GKO_CUDA_INDEX_TYPE(int64, CUSPARSE_INDEX_64I); #undef GKO_CUDA_INDEX_TYPE -#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >= - // 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) - - } // namespace detail @@ -249,11 +240,6 @@ constexpr cudaDataType_t cuda_data_type() } -#if defined(CUDA_VERSION) && \ - (CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) - - /** * This is an alias for the `cudaIndexType_t` equivalent of `T`. By default, * CUSPARSE_INDEX_16U is returned. @@ -269,10 +255,6 @@ constexpr cusparseIndexType_t cusparse_index_type() } -#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >= - // 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) - - /** * This is an alias for CUDA's equivalent of `T`. * diff --git a/cuda/components/cooperative_groups.cuh b/cuda/components/cooperative_groups.cuh index 983ec32f9ac..14c104c8e29 100644 --- a/cuda/components/cooperative_groups.cuh +++ b/cuda/components/cooperative_groups.cuh @@ -280,90 +280,10 @@ struct is_synchronizable_group_impl : std::true_type {}; } // namespace detail -namespace detail { - - -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -// Adds generalized shuffles that support any type to the group. -template -class enable_extended_shuffle : public Group { -public: - using Group::Group; - using Group::shfl; - using Group::shfl_down; - using Group::shfl_up; - using Group::shfl_xor; - -#define GKO_ENABLE_SHUFFLE_OPERATION(_name, SelectorType) \ - template \ - __device__ __forceinline__ ValueType _name(const ValueType& var, \ - SelectorType selector) const \ - { \ - return shuffle_impl( \ - [this](uint32 v, SelectorType s) { \ - return static_cast(this)->_name(v, s); \ - }, \ - var, selector); \ - } - - GKO_ENABLE_SHUFFLE_OPERATION(shfl, int32) - GKO_ENABLE_SHUFFLE_OPERATION(shfl_up, uint32) - GKO_ENABLE_SHUFFLE_OPERATION(shfl_down, uint32) - GKO_ENABLE_SHUFFLE_OPERATION(shfl_xor, int32) - -#undef GKO_ENABLE_SHUFFLE_OPERATION - -private: - template - static __device__ __forceinline__ ValueType - shuffle_impl(ShuffleOperator intrinsic_shuffle, const ValueType var, - SelectorType selector) - { - static_assert(sizeof(ValueType) % sizeof(uint32) == 0, - "Unable to shuffle sizes which are not 4-byte multiples"); - constexpr auto value_size = sizeof(ValueType) / sizeof(uint32); - ValueType result; - auto var_array = reinterpret_cast(&var); - auto result_array = reinterpret_cast(&result); -#pragma unroll - for (std::size_t i = 0; i < value_size; ++i) { - result_array[i] = intrinsic_shuffle(var_array[i], selector); - } - return result; - } -}; - - -#endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -} // namespace detail - - -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -// Implementing this as a using directive messes up with SFINAE for some reason, -// probably a bug in NVCC. If it is a complete type, everything works fine. -template -struct thread_block_tile : detail::enable_extended_shuffle< - cooperative_groups::thread_block_tile> { - using detail::enable_extended_shuffle< - cooperative_groups::thread_block_tile>::enable_extended_shuffle; -}; - - -#else // CUDA_VERSION >= 11000 - - // Cuda11 cooperative group's shuffle supports complex using cooperative_groups::thread_block_tile; -#endif // inherits thread_group // // public API: @@ -385,28 +305,6 @@ using cooperative_groups::thread_block_tile; namespace detail { -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -template -struct is_group_impl> : std::true_type {}; -template -struct is_synchronizable_group_impl> : std::true_type { -}; -template -struct is_communicator_group_impl> : std::true_type {}; -// make sure the original CUDA group is recognized whenever possible -template -struct is_group_impl> - : std::true_type {}; -template -struct is_synchronizable_group_impl> - : std::true_type {}; - - -#else // CUDA_VERSION >= 11000 - - // thread_block_tile is same as cuda11's template struct is_group_impl> : std::true_type {}; @@ -418,9 +316,6 @@ struct is_communicator_group_impl> : std::true_type {}; -#endif - - } // namespace detail @@ -471,24 +366,6 @@ __device__ __forceinline__ auto tiled_partition(const Group& g) // Only support tile_partition with 1, 2, 4, 8, 16, 32. // Reference: // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#warp-notes -#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) - - -// cooperative group before cuda11 does not contain parent group in template -template -__device__ __forceinline__ - std::enable_if_t<(Size <= kernels::cuda::config::warp_size) && (Size > 0) && - (kernels::cuda::config::warp_size % Size == 0), - thread_block_tile> - tiled_partition(const Group&) -{ - return thread_block_tile(); -} - - -#else // CUDA_VERSION >= 11000 - - // cooperative group after cuda11 contain parent group in template. // we remove the information because we do not restrict cooperative group by its // parent group type. @@ -500,9 +377,6 @@ __device__ __forceinline__ thread_block_tile tiled_partition( } -#endif - - } // namespace group } // namespace cuda } // namespace kernels diff --git a/third_party/identify_stream_usage/identify_stream_usage.cpp b/third_party/identify_stream_usage/identify_stream_usage.cpp index 5cdd4d30b09..9dc16fc1bb3 100644 --- a/third_party/identify_stream_usage/identify_stream_usage.cpp +++ b/third_party/identify_stream_usage/identify_stream_usage.cpp @@ -104,15 +104,10 @@ __attribute__((init_priority(1001))) std::unordered_map // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html#group__CUDART__EVENT DEFINE_OVERLOAD(cudaEventRecord, ARG(cudaEvent_t event, cudaStream_t stream), ARG(event, stream)); - -#if CUDA_VERSION >= 11000 - DEFINE_OVERLOAD(cudaEventRecordWithFlags, ARG(cudaEvent_t event, cudaStream_t stream, unsigned int flags), ARG(event, stream, flags)); -#endif - // Execution APIS: // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXECUTION.html#group__CUDART__EXECUTION DEFINE_OVERLOAD(cudaLaunchKernel,