Fix compilation for newer thrust versions

Recent changes in Thrust, for the CUDA backend, no longer allow including `<thrust/device_vector.h>` into a translation unit that is not compiled by a CUDA compiler. As a workaround, the vectors containing the data are moved from the header into the benchmark implementation file.
UoB-HPC · bernhardmgruber · Jun 26, 2024 · Jun 26, 2024 · Jun 26, 2024 · Jun 26, 2024
commit b5ad6c0eae1606245720ae6a0f85856e0aa45385
diff --git a/src/thrust/ThrustStream.cu b/src/thrust/ThrustStream.cu
@@ -1,5 +1,5 @@
-// Copyright (c) 2020 Tom Deakin
-// University of Bristol HPC
+// Copyright (c) 2020 Tom Deakin, 2024 Bernhard Manfred Gruber
+// University of Bristol HPC, NVIDIA
 //
 // For full license terms please see the LICENSE file distributed with this
 // source code
@@ -10,6 +10,25 @@
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/zip_function.h>
 
+#if defined(MANAGED)
+#include <thrust/universal_vector.h>
+#else
+#include <thrust/device_vector.h>
+#endif
+
+template <class T>
+using vector =
+#if defined(MANAGED)
+  thrust::universal_vector<T>;
+#else
+  thrust::device_vector<T>;
+#endif
+
+template <class T>
+struct ThrustStream<T>::Impl{
+  vector<T> a, b, c;
+};
+
 static inline void synchronise()
 {
 // rocThrust doesn't synchronise between thrust calls
@@ -20,7 +39,7 @@ static inline void synchronise()
 
 template <class T>
 ThrustStream<T>::ThrustStream(const intptr_t array_size, int device)
-    : array_size{array_size}, a(array_size), b(array_size), c(array_size) {
+    : array_size{array_size}, impl(new Impl{vector<T>(array_size), vector<T>(array_size), vector<T>(array_size)}) {
   std::cout << "Using CUDA device: " << getDeviceName(device) << std::endl;
   std::cout << "Driver: " << getDeviceDriver(device) << std::endl;
   std::cout << "Thrust version: " << THRUST_VERSION << std::endl;
@@ -50,27 +69,30 @@ ThrustStream<T>::ThrustStream(const intptr_t array_size, int device)
 
 }
 
+template <class T>
+ThrustStream<T>::~ThrustStream() = default;
+
 template <class T>
 void ThrustStream<T>::init_arrays(T initA, T initB, T initC)
 {
-  thrust::fill(a.begin(), a.end(), initA);
-  thrust::fill(b.begin(), b.end(), initB);
-  thrust::fill(c.begin(), c.end(), initC);
+  thrust::fill(impl->a.begin(), impl->a.end(), initA);
+  thrust::fill(impl->b.begin(), impl->b.end(), initB);
+  thrust::fill(impl->c.begin(), impl->c.end(), initC);
   synchronise();
 }
 
 template <class T>
 void ThrustStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
 {
-  thrust::copy(a.begin(), a.end(), h_a.begin());
-  thrust::copy(b.begin(), b.end(), h_b.begin());
-  thrust::copy(c.begin(), c.end(), h_c.begin());
+  thrust::copy(impl->a.begin(), impl->a.end(), h_a.begin());
+  thrust::copy(impl->b.begin(), impl->b.end(), h_b.begin());
+  thrust::copy(impl->c.begin(), impl->c.end(), h_c.begin());
 }
 
 template <class T>
 void ThrustStream<T>::copy()
 {
-  thrust::copy(a.begin(), a.end(),c.begin());
+  thrust::copy(impl->a.begin(), impl->a.end(),impl->c.begin());
   synchronise();
 }
 
@@ -79,9 +101,9 @@ void ThrustStream<T>::mul()
 {
   const T scalar = startScalar;
   thrust::transform(
-      c.begin(),
-      c.end(),
-      b.begin(),
+      impl->c.begin(),
+      impl->c.end(),
+      impl->b.begin(),
       [=] __device__ __host__ (const T &ci){
         return ci * scalar;
       }
@@ -93,9 +115,9 @@ template <class T>
 void ThrustStream<T>::add()
 {
   thrust::transform(
-      thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin())),
-      thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end())),
-      c.begin(),
+      thrust::make_zip_iterator(thrust::make_tuple(impl->a.begin(), impl->b.begin())),
+      thrust::make_zip_iterator(thrust::make_tuple(impl->a.end(), impl->b.end())),
+      impl->c.begin(),
       thrust::make_zip_function(
           [] __device__ __host__ (const T& ai, const T& bi){
             return ai + bi;
@@ -109,9 +131,9 @@ void ThrustStream<T>::triad()
 {
   const T scalar = startScalar;
   thrust::transform(
-      thrust::make_zip_iterator(thrust::make_tuple(b.begin(), c.begin())),
-      thrust::make_zip_iterator(thrust::make_tuple(b.end(), c.end())),
-      a.begin(),
+      thrust::make_zip_iterator(thrust::make_tuple(impl->b.begin(), impl->c.begin())),
+      thrust::make_zip_iterator(thrust::make_tuple(impl->b.end(), impl->c.end())),
+      impl->a.begin(),
       thrust::make_zip_function(
           [=] __device__ __host__ (const T& bi, const T& ci){
             return bi + scalar * ci;
@@ -125,9 +147,9 @@ void ThrustStream<T>::nstream()
 {
   const T scalar = startScalar;
   thrust::transform(
-      thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin(), c.begin())),
-      thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end(), c.end())),
-      a.begin(),
+      thrust::make_zip_iterator(thrust::make_tuple(impl->a.begin(), impl->b.begin(), impl->c.begin())),
+      thrust::make_zip_iterator(thrust::make_tuple(impl->a.end(), impl->b.end(), impl->c.end())),
+      impl->a.begin(),
       thrust::make_zip_function(
           [=] __device__ __host__ (const T& ai, const T& bi, const T& ci){
             return ai + bi + scalar * ci;
@@ -139,7 +161,7 @@ void ThrustStream<T>::nstream()
 template <class T>
 T ThrustStream<T>::dot()
 {
-  return thrust::inner_product(a.begin(), a.end(), b.begin(), T{});
+  return thrust::inner_product(impl->a.begin(), impl->a.end(), impl->b.begin(), T{});
 }
 
 #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA || \
@@ -155,7 +177,7 @@ T ThrustStream<T>::dot()
 # error Unsupported compiler for Thrust
 #endif
 
-void check_error(void)
+void check_error()
 {
   IMPL_FN__(Error_t) err =  IMPL_FN__(GetLastError());
   if (err !=  IMPL_FN__(Success))
@@ -165,7 +187,7 @@ void check_error(void)
   }
 }
 
-void listDevices(void)
+void listDevices()
 {
   // Get number of devices
   int count;
@@ -213,7 +235,7 @@ std::string getDeviceDriver(const int device)
 
 #else
 
-void listDevices(void)
+void listDevices()
 {
   std::cout << "0: CPU" << std::endl;
 }

diff --git a/src/thrust/ThrustStream.h b/src/thrust/ThrustStream.h
@@ -1,5 +1,5 @@
-// Copyright (c) 2020 Tom Deakin
-// University of Bristol HPC
+// Copyright (c) 2020 Tom Deakin, 2024 Bernhard Manfred Gruber
+// University of Bristol HPC, NVIDIA
 //
 // For full license terms please see the LICENSE file distributed with this
 // source code
@@ -8,11 +8,7 @@
 
 #include <iostream>
 #include <vector>
-#if defined(MANAGED)
-#include <thrust/universal_vector.h>
-#else
-#include <thrust/device_vector.h>
-#endif
+#include <memory>
 
 #include "Stream.h"
 
@@ -22,22 +18,13 @@ template <class T>
 class ThrustStream : public Stream<T>
 {
   protected:
-    // Size of arrays
+    struct Impl;
+    std::unique_ptr<Impl> impl; // avoid thrust vectors leaking into non-CUDA translation units
     intptr_t array_size;
 
-  #if defined(MANAGED)
-    thrust::universtal_vector<T> a;
-    thrust::universtal_vector<T> b;
-    thrust::universtal_vector<T> c;
-  #else
-    thrust::device_vector<T> a;
-    thrust::device_vector<T> b;
-    thrust::device_vector<T> c;
-  #endif
-
   public:
-    ThrustStream(const intptr_t, int);
-    ~ThrustStream() = default;
+    ThrustStream(intptr_t array_size, int device);
+    ~ThrustStream();
 
     virtual void copy() override;
     virtual void add() override;