Skip to content

Commit

Permalink
ocl: prepared OpenCL backend for CP2K's Offload/DBM/DBT libraries
Browse files Browse the repository at this point in the history
* Introduced ACC_OPENCL_MEM_DEBUG for some debug details.
* Auto-detecting device-memory offset (ACC_OPENCL_MEM_OFFSET), i.e.,
  range-finding device pointers to adjust the offset accordingly.
  - c_dbcsr_acc_memcpy_d2d, c_dbcsr_acc_memcpy_h2d,
  - c_dbcsr_acc_memcpy_d2h, c_dbcsr_acc_opencl_memset.
* Resolved cast-qual warnings.
* Removed ACC_OPENCL_MEM_NOALLOC/ACC_OPENCL_MEM macros.
* Improved performance when using clEnqueueFillBuffer.
* Allow stream-argument (ACC-interface) to be NULL
  - Introduced c_dbcsr_acc_opencl_stream_default().
  - Implemented ACC_OPENCL_STREAM_NULL.
  - Enables synchronous operation).
* Exposed/implemented OpenCL-only c_dbcsr_acc_opencl_memset,
  and based c_dbcsr_acc_memset_zero on c_dbcsr_acc_opencl_memset.
  - Allow custom memset-value to match CP2K's offload library
* Adjusted console output (ACC_OPENCL_VERBOSE).
* Revised acc_opencl.sh (generating kernel header).
* Increased number of repetitions of standalone benchmark.
* Updated tuned parameters.
  • Loading branch information
hfp committed Dec 16, 2023
1 parent 67488a9 commit 20980ef
Show file tree
Hide file tree
Showing 14 changed files with 1,282 additions and 1,081 deletions.
8 changes: 6 additions & 2 deletions src/acc/acc_bench_smm.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@
#if !defined(NREPEAT)
# define NREPEAT 3
#endif
#if !defined(XREPEAT)
# define XREPEAT 66
#endif
#if !defined(TRANSPOSE)
# define TRANSPOSE
#endif
Expand Down Expand Up @@ -292,7 +295,8 @@ int main(int argc, char* argv[]) {
double duration = 0;
#endif
const char* const env_stack_size = getenv("SMM_BATCHSIZE");
int nrepeat = (0 < inr ? inr : NREPEAT);
const int xrepeat = (0 != check ? NREPEAT : XREPEAT);
int nrepeat = (0 < inr ? inr : xrepeat);
int stack_size, na, nb, nc, nr, r;
if (NULL == env_stack_size) {
stack_size = 0;
Expand Down Expand Up @@ -325,7 +329,7 @@ int main(int argc, char* argv[]) {
const int r = rnd[nok % NRAND], ss = -stack_size, bs = (1 < ss ? ss : BATCHSIZE);
const int limit = (BATCHGRAIN < ss ? ((bs + BATCHGRAIN - 1) / BATCHGRAIN) : ss);
stack_size = (r % limit + 1) * BATCHGRAIN;
nrepeat = MAX((BATCHSIZE * nrepeat + stack_size - 1) / stack_size, NREPEAT);
nrepeat = MAX((BATCHSIZE * nrepeat + stack_size - 1) / stack_size, xrepeat);
}
else stack_size = BATCHSIZE; /* plain default */
}
Expand Down
4 changes: 2 additions & 2 deletions src/acc/opencl/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ endif
endif

CFLAGS := -fPIC \
-Wall -Wextra \
-Wall -Wextra -Wcast-qual \
-Wno-overlength-strings \
-Wno-variadic-macros \
-Wno-unused-function \
Expand Down Expand Up @@ -299,7 +299,7 @@ endif
fi

$(MAKDIR)/smm/opencl_kernels.h: $(MAKDIR)/acc_opencl.sh $(KERNEL) $(PARAMS)
@CPPFLAGS=$(CPP_OPENCL_FLAGS) $(MAKDIR)/acc_opencl.sh $(KERNEL) $(PARAMS) $@
CPPFLAGS=$(CPP_OPENCL_FLAGS) $(MAKDIR)/acc_opencl.sh $(KERNEL) $(PARAMS) $@

.PHONY: backend
backend: $(ACCDIR)/dbcsr_acc.a
Expand Down
69 changes: 50 additions & 19 deletions src/acc/opencl/acc_opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -531,26 +531,45 @@ int c_dbcsr_acc_init(void) {
else {
result = EXIT_FAILURE;
}
c_dbcsr_acc_opencl_config.handle = 0;
c_dbcsr_acc_opencl_config.handles = NULL;
c_dbcsr_acc_opencl_config.nclmems = c_dbcsr_acc_opencl_config.nevents = 0;
c_dbcsr_acc_opencl_config.clmems = c_dbcsr_acc_opencl_config.events = NULL;
c_dbcsr_acc_opencl_config.storage = NULL;
# if LIBXSMM_VERSION4(1, 17, 0, 0) < LIBXSMM_VERSION_NUMBER && defined(ACC_OPENCL_HANDLES_MAXCOUNT) && \
(0 < ACC_OPENCL_HANDLES_MAXCOUNT)
if (EXIT_SUCCESS == result) {
c_dbcsr_acc_opencl_config.handle = ACC_OPENCL_HANDLES_MAXCOUNT * c_dbcsr_acc_opencl_config.nthreads;
c_dbcsr_acc_opencl_config.handles = (void**)malloc(sizeof(void*) * c_dbcsr_acc_opencl_config.handle);
c_dbcsr_acc_opencl_config.storage = malloc(sizeof(void*) * c_dbcsr_acc_opencl_config.handle);
if (NULL != c_dbcsr_acc_opencl_config.handles && NULL != c_dbcsr_acc_opencl_config.storage) {
libxsmm_pmalloc_init(sizeof(void*), &c_dbcsr_acc_opencl_config.handle, c_dbcsr_acc_opencl_config.handles,
const size_t nhandles = ACC_OPENCL_HANDLES_MAXCOUNT * c_dbcsr_acc_opencl_config.nthreads;
# if defined(ACC_OPENCL_MEM_OFFSET)
c_dbcsr_acc_opencl_config.nclmems = nhandles;
c_dbcsr_acc_opencl_config.clmems = (void**)malloc(sizeof(void*) * nhandles);
c_dbcsr_acc_opencl_config.storage = malloc(sizeof(void*) * (nhandles + nhandles));
if (NULL != c_dbcsr_acc_opencl_config.clmems && NULL != c_dbcsr_acc_opencl_config.storage) {
libxsmm_pmalloc_init(sizeof(void*), &c_dbcsr_acc_opencl_config.nclmems, c_dbcsr_acc_opencl_config.clmems,
(void**)c_dbcsr_acc_opencl_config.storage + nhandles);
}
else {
free(c_dbcsr_acc_opencl_config.clmems);
c_dbcsr_acc_opencl_config.clmems = NULL;
c_dbcsr_acc_opencl_config.nclmems = 0;
result = EXIT_FAILURE;
}
# else
c_dbcsr_acc_opencl_config.storage = malloc(sizeof(void*) * nhandles);
# endif
c_dbcsr_acc_opencl_config.nevents = nhandles;
c_dbcsr_acc_opencl_config.events = (void**)malloc(sizeof(void*) * nhandles);
if (NULL != c_dbcsr_acc_opencl_config.events && NULL != c_dbcsr_acc_opencl_config.storage) {
libxsmm_pmalloc_init(sizeof(void*), &c_dbcsr_acc_opencl_config.nevents, c_dbcsr_acc_opencl_config.events,
c_dbcsr_acc_opencl_config.storage);
}
else {
free(c_dbcsr_acc_opencl_config.handles);
free(c_dbcsr_acc_opencl_config.events);
c_dbcsr_acc_opencl_config.events = NULL;
c_dbcsr_acc_opencl_config.nevents = 0;
result = EXIT_FAILURE;
}
if (EXIT_SUCCESS != result) {
free(c_dbcsr_acc_opencl_config.storage);
c_dbcsr_acc_opencl_config.handles = NULL;
c_dbcsr_acc_opencl_config.storage = NULL;
c_dbcsr_acc_opencl_config.handle = 0;
result = EXIT_FAILURE;
}
}
# endif
Expand Down Expand Up @@ -663,7 +682,7 @@ int c_dbcsr_acc_finalize(void) {
}
}
/* release/reset buffers */
free(c_dbcsr_acc_opencl_config.handles);
free(c_dbcsr_acc_opencl_config.events);
free(c_dbcsr_acc_opencl_config.storage);
free(c_dbcsr_acc_opencl_config.streams);
/* clear configuration */
Expand Down Expand Up @@ -1253,7 +1272,7 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
else break;
}
# if !defined(NDEBUG)
if (EXIT_SUCCESS == c_dbcsr_acc_opencl_device_ext(active_id, (const char**)&ext, 1))
if (EXIT_SUCCESS == c_dbcsr_acc_opencl_device_ext(active_id, (const char* const*)&ext, 1))
# endif
{ /* NDEBUG: assume given extension is supported (confirmed upfront) */
if (NULL == line) { /* extension is not already part of source */
Expand Down Expand Up @@ -1318,7 +1337,11 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
: NULL);
if (NULL != src) {
if ((size_t)size == fread(src, 1 /*sizeof(char)*/, size /*count*/, file)) {
if (source != ext_source) libxsmm_free((void*)ext_source);
if (source != ext_source) {
void* p = NULL;
LIBXSMM_ASSIGN127(&p, &ext_source);
libxsmm_free(p);
}
src[size] = '\0';
ext_source = src;
}
Expand Down Expand Up @@ -1357,7 +1380,11 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
}
ok = EXIT_FAILURE;
}
if (source != ext_source) libxsmm_free((void*)ext_source);
if (source != ext_source) {
void* p = NULL;
LIBXSMM_ASSIGN127(&p, &ext_source);
libxsmm_free(p);
}
buffer[0] = '\0'; /* reset to empty */
if (CL_SUCCESS == result) {
*kernel = clCreateKernel(program, kernel_name, &result);
Expand Down Expand Up @@ -1414,7 +1441,9 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
}
}
else if (source != ext_source) { /* error: creating program */
libxsmm_free((void*)ext_source);
void* p = NULL;
LIBXSMM_ASSIGN127(&p, &ext_source);
libxsmm_free(p);
}
}
else if (EXIT_SUCCESS == result) { /* binary representation */
Expand All @@ -1424,7 +1453,7 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
# endif
{
program = clCreateProgramWithBinary(
context, 1, &active_id, &size_src, (const unsigned char**)(const void*)&source, NULL /*binary_status*/, &result);
context, 1, &active_id, &size_src, (const unsigned char**)&source, NULL /*binary_status*/, &result);
}
if (CL_SUCCESS == result) {
assert(NULL != program);
Expand All @@ -1443,7 +1472,7 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
# endif
{
program = clCreateProgramWithBinary(
context, 1, &active_id, &size_src, (const unsigned char**)(const void*)&source, NULL /*binary_status*/, &result);
context, 1, &active_id, &size_src, (const unsigned char**)&source, NULL /*binary_status*/, &result);
}
assert(CL_SUCCESS != result || NULL != program);
if (CL_SUCCESS == result) {
Expand Down Expand Up @@ -1479,8 +1508,10 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
}
}
if (NULL != file_src) {
void* p = NULL;
LIBXSMM_ASSIGN127(&p, (const void*)&source);
assert(0 != source_is_file);
libxsmm_free((void*)source);
libxsmm_free(p);
}
# if !defined(NDEBUG)
if (EXIT_SUCCESS != result && NULL != kernel) *kernel = NULL;
Expand Down
24 changes: 15 additions & 9 deletions src/acc/opencl/acc_opencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,21 @@
# define ACC_OPENCL_STREAM_PRIORITIES
# endif
#endif
/** Stream-argument (ACC-interface) can be NULL (synchronous) */
#if !defined(ACC_OPENCL_STREAM_NULL) && 1
# define ACC_OPENCL_STREAM_NULL
#endif

/** Automatically determine cl_mem offset */
#if !defined(ACC_OPENCL_MEM_OFFSET) && 1
# define ACC_OPENCL_MEM_OFFSET
#endif

/** Use DBCSR's profile for detailed timings */
#if !defined(ACC_OPENCL_PROFILE) && 0
# define ACC_OPENCL_PROFILE
#endif

/* can depend on OpenCL implementation (unlikely) */
#if !defined(ACC_OPENCL_MEM_NOALLOC) && 1
# define ACC_OPENCL_MEM_NOALLOC
# define ACC_OPENCL_MEM(A) ((cl_mem*)&(A))
#else
# define ACC_OPENCL_MEM(A) ((cl_mem*)(A))
#endif
/* attaching c_dbcsr_acc_opencl_info_stream_t is needed */
#define ACC_OPENCL_STREAM(A) ((cl_command_queue*)(A))
/* incompatible with c_dbcsr_acc_event_record */
Expand Down Expand Up @@ -240,9 +243,9 @@ typedef struct c_dbcsr_acc_opencl_config_t {
/** Table of devices (thread-specific). */
c_dbcsr_acc_opencl_device_t* device;
/** Handle-counter. */
size_t handle;
size_t nclmems, nevents;
/** All handles and related storage. */
void **handles, *storage;
void **clmems, **events, *storage;
/** All created streams partitioned by thread-ID (thread-local slots). */
void** streams;
/** Counts number of streams created (thread-local). */
Expand Down Expand Up @@ -293,8 +296,11 @@ typedef struct c_dbcsr_acc_opencl_info_stream_t {
c_dbcsr_acc_opencl_info_stream_t* c_dbcsr_acc_opencl_info_stream(void* stream);
const int* c_dbcsr_acc_opencl_stream_priority(const void* stream);

void* c_dbcsr_acc_opencl_stream_default(void);

/** Get host-pointer associated with device-memory (c_dbcsr_acc_dev_mem_allocate). */
void* c_dbcsr_acc_opencl_get_hostptr(cl_mem memory);
int c_dbcsr_acc_opencl_memset(void* dev_mem, int value, size_t offset, size_t nbytes, void* stream);
/** Amount of device memory; local memory is only non-zero if separate from global. */
int c_dbcsr_acc_opencl_info_devmem(cl_device_id device, size_t* mem_free, size_t* mem_total, size_t* mem_local, int* mem_unified);
/** Get device associated with thread-ID. */
Expand Down
25 changes: 10 additions & 15 deletions src/acc/opencl/acc_opencl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ then
-h|--help)
shift $#;;
-p|--params)
PARAMPATH=yes
PARAMS=$2
PARAMS="$2\t"
shift 2;;
-c|-d|--debug|--comments)
CPPFLAGS+=" -C"
Expand All @@ -50,13 +49,9 @@ then
esac
done
HERE="$(cd "$(dirname "$0")" && pwd -P)"
PARAMDIR=$(if [ ! "${PARAMDIR}" ]; then echo "${HERE}/smm/params"; fi)
if [ "${PARAMPATH}" ]; then
PARAMPATH=${PARAMS}
else
HERE="$(cd "$(dirname "$0")" && pwd -P)"
PARAMPATH=${PARAMDIR}
fi
PARAMDIR=${PARAMDIR:-${PARAMS}}
PARAMDIR=${PARAMDIR:-${HERE}/smm/params}
PARAMDIR=$(echo -e "${PARAMDIR}" | ${TR} -d '\t')
if [ "$#" -gt 1 ]; then
# allow for instance /dev/stdout
if [ "${OFILE##*.}" = "h" ]; then
Expand Down Expand Up @@ -127,7 +122,7 @@ then
NFILES_CSV=0
for CSVFILE in "${CSVFILES[@]}"; do
if [ "${CSVFILE##*.}" = "csv" ]; then
if [ -e "${CSVFILE}" ]; then
if [ -f "${CSVFILE}" ]; then
NFILES_CSV=$((NFILES_CSV+1))
fi
else
Expand All @@ -136,15 +131,15 @@ then
exit 1
fi
done
if [ "0" = "${NFILES_CSV}" ] && [ "${PARAMPATH}" ]; then
CSVFILES=("${PARAMPATH}"/*.csv)
if [ "0" = "${NFILES_CSV}" ] && [ "${PARAMDIR}" ] && [ -d "${PARAMDIR}" ]; then
CSVFILES=("${PARAMDIR}"/*.csv)
NFILES_CSV=${#CSVFILES[@]}
fi
for CSVFILE in "${CSVFILES[@]}"; do
if [ ! "${DELIM}" ]; then
SEPAR=$(${SED} -n "1s/[^${DELIMS}]//gp" "${CSVFILE}")
SEPAR=$(${SED} -n "1s/[^${DELIMS}]//gp" "${CSVFILE}" 2>/dev/null)
DELIM=${SEPAR:0:1}
MATCH=$(${SED} -n "1s/[^${DELIM}]//gp" "${CSVFILE}")
MATCH=$(${SED} -n "1s/[^${DELIM}]//gp" "${CSVFILE}" 2>/dev/null)
fi
if [ "${DELIM}" ]; then
CHECK=$(${SED} "/^[[:space:]]*$/d;s/[^${DELIM}]//g" "${CSVFILE}" | ${SORT} -u | ${SED} -n "0,/./p")
Expand All @@ -156,7 +151,7 @@ then
else
ERRFILE=${CSVFILE}
fi
if [ "${ERRFILE}" ]; then
if [ "${ERRFILE}" ] && [ -f "${ERRFILE}" ]; then
>&2 echo "ERROR: ${ERRFILE} is malformed!"
if [ "${HFILE}" ]; then ${RM} -f "${OFILE}"; fi
exit 1
Expand Down
10 changes: 5 additions & 5 deletions src/acc/opencl/acc_opencl_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ int c_dbcsr_acc_event_create(void** event_p) {
if (EXIT_SUCCESS == result)
# endif
{
assert(NULL == c_dbcsr_acc_opencl_config.handles || sizeof(void*) >= sizeof(cl_event));
assert(NULL == c_dbcsr_acc_opencl_config.events || sizeof(void*) >= sizeof(cl_event));
*event_p = (
# if LIBXSMM_VERSION4(1, 17, 0, 0) < LIBXSMM_VERSION_NUMBER && defined(ACC_OPENCL_HANDLES_MAXCOUNT) && \
(0 < ACC_OPENCL_HANDLES_MAXCOUNT)
NULL != c_dbcsr_acc_opencl_config.handles
? libxsmm_pmalloc(c_dbcsr_acc_opencl_config.handles, &c_dbcsr_acc_opencl_config.handle)
NULL != c_dbcsr_acc_opencl_config.events
? libxsmm_pmalloc(c_dbcsr_acc_opencl_config.events, &c_dbcsr_acc_opencl_config.nevents)
:
# endif
malloc(sizeof(cl_event)));
Expand Down Expand Up @@ -110,9 +110,9 @@ int c_dbcsr_acc_event_destroy(void* event) {
if (NULL != clevent) result = clReleaseEvent(clevent);
# if LIBXSMM_VERSION4(1, 17, 0, 0) < LIBXSMM_VERSION_NUMBER && defined(ACC_OPENCL_HANDLES_MAXCOUNT) && \
(0 < ACC_OPENCL_HANDLES_MAXCOUNT)
if (NULL != c_dbcsr_acc_opencl_config.handles) {
if (NULL != c_dbcsr_acc_opencl_config.events) {
/**(cl_event*)event = NULL; assert(NULL == *ACC_OPENCL_EVENT(event));*/
libxsmm_pfree(event, c_dbcsr_acc_opencl_config.handles, &c_dbcsr_acc_opencl_config.handle);
libxsmm_pfree(event, c_dbcsr_acc_opencl_config.events, &c_dbcsr_acc_opencl_config.nevents);
}
else
# endif
Expand Down
Loading

0 comments on commit 20980ef

Please sign in to comment.