Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Thrust to CCCL #206

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Fix compilation for newer thrust versions
Recent changes in Thrust, for the CUDA backend, no longer allow including `<thrust/device_vector.h>` into a translation unit that is not compiled by a CUDA compiler. As a workaround, the vectors containing the data are moved from the header into the benchmark implementation file.
  • Loading branch information
bernhardmgruber committed Jun 26, 2024

Unverified

This user has not yet uploaded their public signing key.
commit b5ad6c0eae1606245720ae6a0f85856e0aa45385
74 changes: 48 additions & 26 deletions src/thrust/ThrustStream.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Copyright (c) 2020 Tom Deakin
// University of Bristol HPC
// Copyright (c) 2020 Tom Deakin, 2024 Bernhard Manfred Gruber
// University of Bristol HPC, NVIDIA
//
// For full license terms please see the LICENSE file distributed with this
// source code
@@ -10,6 +10,25 @@
#include <thrust/iterator/zip_iterator.h>
#include <thrust/zip_function.h>

#if defined(MANAGED)
#include <thrust/universal_vector.h>
#else
#include <thrust/device_vector.h>
#endif

template <class T>
using vector =
#if defined(MANAGED)
thrust::universal_vector<T>;
#else
thrust::device_vector<T>;
#endif

template <class T>
struct ThrustStream<T>::Impl{
vector<T> a, b, c;
};

static inline void synchronise()
{
// rocThrust doesn't synchronise between thrust calls
@@ -20,7 +39,7 @@ static inline void synchronise()

template <class T>
ThrustStream<T>::ThrustStream(const intptr_t array_size, int device)
: array_size{array_size}, a(array_size), b(array_size), c(array_size) {
: array_size{array_size}, impl(new Impl{vector<T>(array_size), vector<T>(array_size), vector<T>(array_size)}) {
std::cout << "Using CUDA device: " << getDeviceName(device) << std::endl;
std::cout << "Driver: " << getDeviceDriver(device) << std::endl;
std::cout << "Thrust version: " << THRUST_VERSION << std::endl;
@@ -50,27 +69,30 @@ ThrustStream<T>::ThrustStream(const intptr_t array_size, int device)

}

template <class T>
ThrustStream<T>::~ThrustStream() = default;

template <class T>
void ThrustStream<T>::init_arrays(T initA, T initB, T initC)
{
thrust::fill(a.begin(), a.end(), initA);
thrust::fill(b.begin(), b.end(), initB);
thrust::fill(c.begin(), c.end(), initC);
thrust::fill(impl->a.begin(), impl->a.end(), initA);
thrust::fill(impl->b.begin(), impl->b.end(), initB);
thrust::fill(impl->c.begin(), impl->c.end(), initC);
synchronise();
}

template <class T>
void ThrustStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
{
thrust::copy(a.begin(), a.end(), h_a.begin());
thrust::copy(b.begin(), b.end(), h_b.begin());
thrust::copy(c.begin(), c.end(), h_c.begin());
thrust::copy(impl->a.begin(), impl->a.end(), h_a.begin());
thrust::copy(impl->b.begin(), impl->b.end(), h_b.begin());
thrust::copy(impl->c.begin(), impl->c.end(), h_c.begin());
}

template <class T>
void ThrustStream<T>::copy()
{
thrust::copy(a.begin(), a.end(),c.begin());
thrust::copy(impl->a.begin(), impl->a.end(),impl->c.begin());
synchronise();
}

@@ -79,9 +101,9 @@ void ThrustStream<T>::mul()
{
const T scalar = startScalar;
thrust::transform(
c.begin(),
c.end(),
b.begin(),
impl->c.begin(),
impl->c.end(),
impl->b.begin(),
[=] __device__ __host__ (const T &ci){
return ci * scalar;
}
@@ -93,9 +115,9 @@ template <class T>
void ThrustStream<T>::add()
{
thrust::transform(
thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin())),
thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end())),
c.begin(),
thrust::make_zip_iterator(thrust::make_tuple(impl->a.begin(), impl->b.begin())),
thrust::make_zip_iterator(thrust::make_tuple(impl->a.end(), impl->b.end())),
impl->c.begin(),
thrust::make_zip_function(
[] __device__ __host__ (const T& ai, const T& bi){
return ai + bi;
@@ -109,9 +131,9 @@ void ThrustStream<T>::triad()
{
const T scalar = startScalar;
thrust::transform(
thrust::make_zip_iterator(thrust::make_tuple(b.begin(), c.begin())),
thrust::make_zip_iterator(thrust::make_tuple(b.end(), c.end())),
a.begin(),
thrust::make_zip_iterator(thrust::make_tuple(impl->b.begin(), impl->c.begin())),
thrust::make_zip_iterator(thrust::make_tuple(impl->b.end(), impl->c.end())),
impl->a.begin(),
thrust::make_zip_function(
[=] __device__ __host__ (const T& bi, const T& ci){
return bi + scalar * ci;
@@ -125,9 +147,9 @@ void ThrustStream<T>::nstream()
{
const T scalar = startScalar;
thrust::transform(
thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin(), c.begin())),
thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end(), c.end())),
a.begin(),
thrust::make_zip_iterator(thrust::make_tuple(impl->a.begin(), impl->b.begin(), impl->c.begin())),
thrust::make_zip_iterator(thrust::make_tuple(impl->a.end(), impl->b.end(), impl->c.end())),
impl->a.begin(),
thrust::make_zip_function(
[=] __device__ __host__ (const T& ai, const T& bi, const T& ci){
return ai + bi + scalar * ci;
@@ -139,7 +161,7 @@ void ThrustStream<T>::nstream()
template <class T>
T ThrustStream<T>::dot()
{
return thrust::inner_product(a.begin(), a.end(), b.begin(), T{});
return thrust::inner_product(impl->a.begin(), impl->a.end(), impl->b.begin(), T{});
}

#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA || \
@@ -155,7 +177,7 @@ T ThrustStream<T>::dot()
# error Unsupported compiler for Thrust
#endif

void check_error(void)
void check_error()
{
IMPL_FN__(Error_t) err = IMPL_FN__(GetLastError());
if (err != IMPL_FN__(Success))
@@ -165,7 +187,7 @@ void check_error(void)
}
}

void listDevices(void)
void listDevices()
{
// Get number of devices
int count;
@@ -213,7 +235,7 @@ std::string getDeviceDriver(const int device)

#else

void listDevices(void)
void listDevices()
{
std::cout << "0: CPU" << std::endl;
}
27 changes: 7 additions & 20 deletions src/thrust/ThrustStream.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Copyright (c) 2020 Tom Deakin
// University of Bristol HPC
// Copyright (c) 2020 Tom Deakin, 2024 Bernhard Manfred Gruber
// University of Bristol HPC, NVIDIA
//
// For full license terms please see the LICENSE file distributed with this
// source code
@@ -8,11 +8,7 @@

#include <iostream>
#include <vector>
#if defined(MANAGED)
#include <thrust/universal_vector.h>
#else
#include <thrust/device_vector.h>
#endif
#include <memory>

#include "Stream.h"

@@ -22,22 +18,13 @@ template <class T>
class ThrustStream : public Stream<T>
{
protected:
// Size of arrays
struct Impl;
std::unique_ptr<Impl> impl; // avoid thrust vectors leaking into non-CUDA translation units
intptr_t array_size;

#if defined(MANAGED)
thrust::universtal_vector<T> a;
thrust::universtal_vector<T> b;
thrust::universtal_vector<T> c;
#else
thrust::device_vector<T> a;
thrust::device_vector<T> b;
thrust::device_vector<T> c;
#endif

public:
ThrustStream(const intptr_t, int);
~ThrustStream() = default;
ThrustStream(intptr_t array_size, int device);
~ThrustStream();

virtual void copy() override;
virtual void add() override;