Skip to content

Commit

Permalink
Update cl2.hpp
Browse files Browse the repository at this point in the history
  • Loading branch information
tomdeakin committed May 3, 2016
1 parent 95f9efb commit 83516ae
Showing 1 changed file with 127 additions and 82 deletions.
209 changes: 127 additions & 82 deletions CL/cl2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@

/*! \file
*
* \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33),
* \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33),
* OpenCL 1.2 (rev 15) and OpenCL 2.0 (rev 29)
* \author Lee Howes and Bruce Merry
*
* Derived from the OpenCL 1.x C++ bindings written by
*
* Derived from the OpenCL 1.x C++ bindings written by
* Benedict R. Gaster, Laurent Morichetti and Lee Howes
* With additions and fixes from:
* Brian Cole, March 3rd 2010 and April 2012
Expand All @@ -52,6 +52,18 @@
* #define CL_HPP_USE_DX_INTEROP
* cl_khr_sub_groups
* #define CL_HPP_USE_CL_SUB_GROUPS_KHR
*
* Doxygen documentation for this header is available here:
*
* http://khronosgroup.github.io/OpenCL-CLHPP/
*
* The latest version of this header can be found on the GitHub releases page:
*
* https://github.com/KhronosGroup/OpenCL-CLHPP/releases
*
* Bugs and patches can be submitted to the GitHub repository:
*
* https://github.com/KhronosGroup/OpenCL-CLHPP
*/

/*! \mainpage
Expand Down Expand Up @@ -134,41 +146,64 @@
*
* \section parameterization Parameters
* This header may be parameterized by a set of preprocessor macros.
* CL_HPP_TARGET_OPENCL_VERSION
* - Defines the target OpenCL runtime version to build the header against.
* Defaults to 200, representing OpenCL 2.0.
* CL_HPP_NO_STD_STRING
* - Do not use the standard library string class.
* cl::string is not defined and may be defined by the user before
* cl2.hpp is included.
* CL_HPP_NO_STD_VECTOR
* - Do not use the standard library vector class.
* cl::vector is not defined and may be defined by the user before
* cl2.hpp is included.
* CL_HPP_NO_STD_ARRAY
* - Do not use the standard library array class.
* cl::array is not defined and may be defined by the user before
* cl2.hpp is included.
* CL_HPP_NO_STD_UNIQUE_PTR
* - Do not use the standard library unique_ptr class.
* cl::pointer and the cl::allocate_pointer function are not defined
* and may be defined by the user before cl2.hpp is included.
* CL_HPP_ENABLE_DEVICE_FISSION
* - Enables device fission for OpenCL 1.2 platforms
* CL_HPP_ENABLE_EXCEPTIONS
* - Enable exceptions for use in the C++ bindings header.
* This is the preferred error handling mechanism but is not required.
* CL_HPP_ENABLE_SIZE_T_COMPATIBILITY
* - Backward compatibility option to support cl.hpp-style size_t class.
* Replaces the updated std::array derived version and removal of size_t
* from the namespace. Note that in this case the new size_t class
* is placed in the cl::compatibility namespace and thus requires
* an additional using declaration for direct backward compatibility.
* CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY
* - Enable older vector of pairs interface for construction of programs.
* CL_HPP_CL_1_2_DEFAULT_BUILD
* - Default to OpenCL C 1.2 compilation rather than OpenCL C 2.0
* - applies to use of cl::Program construction and other program build variants.
*
* - CL_HPP_TARGET_OPENCL_VERSION
*
* Defines the target OpenCL runtime version to build the header
* against. Defaults to 200, representing OpenCL 2.0.
*
* - CL_HPP_NO_STD_STRING
*
* Do not use the standard library string class. cl::string is not
* defined and may be defined by the user before cl2.hpp is
* included.
*
* - CL_HPP_NO_STD_VECTOR
*
* Do not use the standard library vector class. cl::vector is not
* defined and may be defined by the user before cl2.hpp is
* included.
*
* - CL_HPP_NO_STD_ARRAY
*
* Do not use the standard library array class. cl::array is not
* defined and may be defined by the user before cl2.hpp is
* included.
*
* - CL_HPP_NO_STD_UNIQUE_PTR
*
* Do not use the standard library unique_ptr class. cl::pointer and
* the cl::allocate_pointer functions are not defined and may be
* defined by the user before cl2.hpp is included.
*
* - CL_HPP_ENABLE_DEVICE_FISSION
*
* Enables device fission for OpenCL 1.2 platforms.
*
* - CL_HPP_ENABLE_EXCEPTIONS
*
* Enable exceptions for use in the C++ bindings header. This is the
* preferred error handling mechanism but is not required.
*
* - CL_HPP_ENABLE_SIZE_T_COMPATIBILITY
*
* Backward compatibility option to support cl.hpp-style size_t
* class. Replaces the updated std::array derived version and
* removal of size_t from the namespace. Note that in this case the
* new size_t class is placed in the cl::compatibility namespace and
* thus requires an additional using declaration for direct backward
* compatibility.
*
* - CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY
*
* Enable older vector of pairs interface for construction of
* programs.
*
* - CL_HPP_CL_1_2_DEFAULT_BUILD
*
* Default to OpenCL C 1.2 compilation rather than OpenCL C 2.0
* applies to use of cl::Program construction and other program
* build variants.
*
*
* \section example Example
Expand All @@ -177,19 +212,19 @@
* bindings, including support for the optional exception feature and
* also the supplied vector and string classes, see following sections for
* decriptions of these features.
*
*
* \code
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_TARGET_OPENCL_VERSION 200
#include <CL/cl2.hpp>
#include <iostream>
#include <vector>
#include <memory>
#include <algorithm>
const int numElements = 32;
int main(void)
{
// Filter for a 2.0 platform and set it as the default
Expand All @@ -212,35 +247,45 @@
std::cout << "Error setting default platform.";
return -1;
}
std::string kernel1{
"global int globalA;"
"kernel void updateGlobal(){"
" globalA = 75;"
"}"};
std::string kernel2{
"typedef struct { global int *bar; } Foo; kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, global int *output, int val, write_only pipe int outPipe, queue_t childQueue){"
" output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);"
" write_pipe(outPipe, &val);"
" queue_t default_queue = get_default_queue(); "
" ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); "
// Have a child kernel write into third quarter of output
" enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, "
" ^{"
" output[get_global_size(0)*2 + get_global_id(0)] = inputA[get_global_size(0)*2+get_global_id(0)] + inputB[get_global_size(0)*2+get_global_id(0)] + globalA;"
" });"
// Have a child kernel write into last quarter of output
" enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, "
" ^{"
" output[get_global_size(0)*3 + get_global_id(0)] = inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;"
" });"
"}" };
// Use C++11 raw string literals for kernel source code
std::string kernel1{R"CLC(
global int globalA;
kernel void updateGlobal()
{
globalA = 75;
}
)CLC"};
std::string kernel2{R"CLC(
typedef struct { global int *bar; } Foo;
kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB,
global int *output, int val, write_only pipe int outPipe, queue_t childQueue)
{
output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);
write_pipe(outPipe, &val);
queue_t default_queue = get_default_queue();
ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2);
// Have a child kernel write into third quarter of output
enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
^{
output[get_global_size(0)*2 + get_global_id(0)] =
inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA;
});
// Have a child kernel write into last quarter of output
enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
^{
output[get_global_size(0)*3 + get_global_id(0)] =
inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;
});
}
)CLC"};
// New simpler string interface style
std::vector<std::string> programStrings {kernel1, kernel2};
cl::Program vectorAddProgram(
programStrings);
cl::Program vectorAddProgram(programStrings);
try {
vectorAddProgram.build("-cl-std=CL2.0");
}
Expand All @@ -251,7 +296,7 @@
for (auto &pair : buildInfo) {
std::cerr << pair.second << std::endl << std::endl;
}
return 1;
}
Expand All @@ -264,17 +309,17 @@
program2Kernel(
cl::EnqueueArgs(
cl::NDRange(1)));
//////////////////
// SVM allocations
cl::pointer<int> anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
auto anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
*anSVMInt = 5;
cl::SVMAllocator<int, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
cl::SVMAllocator<Foo, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
auto fooPointer = cl::allocate_pointer<Foo>(svmAllocReadOnly);
fooPointer->bar = anSVMInt.get();
cl::SVMAllocator<int, cl::SVMTraitCoarse<>> svmAlloc;
std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> inputA(numElements, 1, svmAlloc);
std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> inputA(numElements, 1, svmAlloc);
cl::coarse_svm_vector<int> inputB(numElements, 2, svmAlloc);
//
Expand All @@ -284,7 +329,7 @@
std::vector<int> output(numElements, 0xdeadbeef);
cl::Buffer outputBuffer(begin(output), end(output), false);
cl::Pipe aPipe(sizeof(cl_int), numElements / 2);
// Default command queue, also passed in as a parameter
cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault(
cl::Context::getDefault(), cl::Device::getDefault());
Expand Down Expand Up @@ -339,7 +384,7 @@
return 0;
}
*
*
* \endcode
*
*/
Expand Down Expand Up @@ -3538,7 +3583,7 @@ template <class T, class Alloc, class... Args>
cl::pointer<T, detail::Deleter<Alloc>> allocate_pointer(const Alloc &alloc_, Args&&... args)
{
Alloc alloc(alloc_);
static const size_t copies = 1;
static const size_type copies = 1;

// Ensure that creation of the management block and the
// object are dealt with separately such that we only provide a deleter
Expand Down Expand Up @@ -6520,7 +6565,7 @@ inline cl_int cl::Program::getInfo(cl_program_info name, vector<vector<unsigned

// Resize the parameter array and constituent arrays
param->resize(numBinaries);
for (int i = 0; i < numBinaries; ++i) {
for (size_type i = 0; i < numBinaries; ++i) {
(*param)[i].resize(sizes[i]);
}

Expand Down Expand Up @@ -7107,7 +7152,7 @@ class CommandQueue : public detail::Wrapper<cl_command_queue>
size_type buffer_slice_pitch,
size_type host_row_pitch,
size_type host_slice_pitch,
void *ptr,
const void *ptr,
const vector<Event>* events = NULL,
Event* event = NULL) const
{
Expand Down Expand Up @@ -7255,7 +7300,7 @@ class CommandQueue : public detail::Wrapper<cl_command_queue>
const array<size_type, 3>& region,
size_type row_pitch,
size_type slice_pitch,
void* ptr,
const void* ptr,
const vector<Event>* events = NULL,
Event* event = NULL) const
{
Expand Down Expand Up @@ -7845,7 +7890,7 @@ class CommandQueue : public detail::Wrapper<cl_command_queue>
CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask(
const Kernel& kernel,
const vector<Event>* events = NULL,
Event* event = NULL) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED const
Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED
{
cl_event tmp;
cl_int err = detail::errHandler(
Expand Down Expand Up @@ -8873,7 +8918,7 @@ inline cl_int enqueueWriteBufferRect(
size_type buffer_slice_pitch,
size_type host_row_pitch,
size_type host_slice_pitch,
void *ptr,
const void *ptr,
const vector<Event>* events = NULL,
Event* event = NULL)
{
Expand Down Expand Up @@ -8971,7 +9016,7 @@ inline cl_int enqueueWriteImage(
const array<size_type, 3>& region,
size_type row_pitch,
size_type slice_pitch,
void* ptr,
const void* ptr,
const vector<Event>* events = NULL,
Event* event = NULL)
{
Expand Down

0 comments on commit 83516ae

Please sign in to comment.