From ef89fe0cfc16234d6146abbb2f494d2e7556d2cb Mon Sep 17 00:00:00 2001 From: Weili An Date: Sat, 11 Jan 2025 19:28:54 -0500 Subject: [PATCH] balar-mmio: create constant macros to keep things consistent --- src/sst/elements/balar/Makefile.am | 1 + src/sst/elements/balar/balar_consts.h | 11 +++++++++++ src/sst/elements/balar/balar_packet.h | 11 ++++++----- src/sst/elements/balar/cuda_runtime_api.h | 7 ++++--- .../balar/tests/testBalar_testsuite_util.py | 1 + .../balar/tests/vanadisHandshake/cuda_runtime_api.c | 7 ++++--- .../balar/tests/vanadisHandshake/cuda_runtime_api.h | 13 +++++++------ .../vanadisLLVMRISCV/cuda_runtime_api_vanadis.cc | 11 ++++++----- 8 files changed, 40 insertions(+), 22 deletions(-) create mode 100644 src/sst/elements/balar/balar_consts.h diff --git a/src/sst/elements/balar/Makefile.am b/src/sst/elements/balar/Makefile.am index b70fdffdc4..6a3ed5c1ad 100644 --- a/src/sst/elements/balar/Makefile.am +++ b/src/sst/elements/balar/Makefile.am @@ -16,6 +16,7 @@ libbalar_la_SOURCES = \ cuda_runtime_api.h \ util.cc \ util.h \ + balar_consts.h \ balarMMIO.cc \ balarMMIO.h \ dmaEngine.cc \ diff --git a/src/sst/elements/balar/balar_consts.h b/src/sst/elements/balar/balar_consts.h new file mode 100644 index 0000000000..92ed6f2dd5 --- /dev/null +++ b/src/sst/elements/balar/balar_consts.h @@ -0,0 +1,11 @@ +#ifndef BALAR_CONSTS_H +#define BALAR_CONSTS_H +// Some constant macros to keep things consistent across both +// balar component and test example + +#define BALAR_CUDA_MAX_ARG_SIZE 256 +#define BALAR_CUDA_MAX_FILE_NAME 256 +#define BALAR_CUDA_MAX_KERNEL_NAME 512 +#define BALAR_CUDA_MAX_DEV_VAR_NAME 256 + +#endif \ No newline at end of file diff --git a/src/sst/elements/balar/balar_packet.h b/src/sst/elements/balar/balar_packet.h index 8b063b0c58..abc4a94be0 100644 --- a/src/sst/elements/balar/balar_packet.h +++ b/src/sst/elements/balar/balar_packet.h @@ -21,6 +21,7 @@ #include #include #include +#include "balar_consts.h" namespace SST { namespace BalarComponent { @@ -92,13 +93,13 @@ namespace BalarComponent { } cuda_malloc; struct { - char file_name[256]; + char file_name[BALAR_CUDA_MAX_FILE_NAME]; } register_fatbin; struct { uint64_t fatCubinHandle; uint64_t hostFun; - char deviceFun[256]; + char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME]; } register_function; struct { @@ -140,7 +141,7 @@ namespace BalarComponent { uint64_t arg; uint64_t size; uint64_t offset; - uint8_t value[200]; + uint8_t value[BALAR_CUDA_MAX_ARG_SIZE]; } setup_argument; struct { @@ -155,7 +156,7 @@ namespace BalarComponent { void **fatCubinHandle; char *hostVar; //pointer to...something char *deviceAddress; //name of variable - char deviceName[256]; //name of variable + char deviceName[BALAR_CUDA_MAX_DEV_VAR_NAME]; //name of variable int32_t ext; int32_t size; int32_t constant; @@ -188,7 +189,7 @@ namespace BalarComponent { uint64_t hostVar_ptr; struct textureReference texRef; const void **deviceAddress; - char deviceName[256]; + char deviceName[BALAR_CUDA_MAX_DEV_VAR_NAME]; int dim; int norm; int ext; diff --git a/src/sst/elements/balar/cuda_runtime_api.h b/src/sst/elements/balar/cuda_runtime_api.h index 16e0cefdea..7dec4d126b 100644 --- a/src/sst/elements/balar/cuda_runtime_api.h +++ b/src/sst/elements/balar/cuda_runtime_api.h @@ -21,6 +21,7 @@ #define __dv(v) #endif /* __cplusplus */ #endif /* !__dv */ +#include "balar_consts.h" extern "C"{ @@ -29,12 +30,12 @@ uint64_t cudaMallocSST(void **devPtr, size_t size); // `addr` is passed by value __host__ cudaError_t CUDARTAPI cudaMallocHostSST(void *addr, size_t size); -unsigned CUDARTAPI __cudaRegisterFatBinarySST(char file_name[256]); +unsigned CUDARTAPI __cudaRegisterFatBinarySST(char file_name[BALAR_CUDA_MAX_FILE_NAME]); void CUDARTAPI __cudaRegisterFunctionSST( uint64_t fatCubinHandle, uint64_t hostFun, - char deviceFun[256] + char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME] ); __host__ cudaError_t CUDARTAPI cudaMemcpySST(uint64_t dst, uint64_t src, size_t count, enum cudaMemcpyKind kind, uint8_t *payload); @@ -49,7 +50,7 @@ __host__ cudaError_t CUDARTAPI cudaConfigureCallSST(dim3 gridDim, dim3 blockDim, __host__ cudaError_t CUDARTAPI cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream ); -__host__ cudaError_t CUDARTAPI cudaSetupArgumentSST(uint64_t arg, uint8_t value[200], size_t size, size_t offset); +__host__ cudaError_t CUDARTAPI cudaSetupArgumentSST(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], size_t size, size_t offset); __host__ cudaError_t CUDARTAPI cudaLaunchSST(uint64_t func); diff --git a/src/sst/elements/balar/tests/testBalar_testsuite_util.py b/src/sst/elements/balar/tests/testBalar_testsuite_util.py index b6e1a9678e..c63ca7ac45 100644 --- a/src/sst/elements/balar/tests/testBalar_testsuite_util.py +++ b/src/sst/elements/balar/tests/testBalar_testsuite_util.py @@ -387,6 +387,7 @@ def _setupbalarTestFiles(self): os_symlink_file(test_path, self.testbalarDir, "vanadisOS.py") # Copy the shared packet definition files from balar src os_symlink_file(self.balarElementDir, tmpdir, "balar_packet.h") + os_symlink_file(self.balarElementDir, tmpdir, "balar_consts.h") # Create a simlink of each file in the balar/tests/vectorAdd directory for f in os.listdir(self.balarElementVectorAddTestDir): diff --git a/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.c b/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.c index a744869a79..f8f7089106 100644 --- a/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.c +++ b/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.c @@ -14,6 +14,7 @@ // distribution. #include "cuda_runtime_api.h" +#include "../../balar_consts.h" #include #include @@ -149,7 +150,7 @@ cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, uint64_t sharedMem) { } // Cuda Setup argument -cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[200], uint64_t size, uint64_t offset) { +cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], uint64_t size, uint64_t offset) { if (g_debug_level >= LOG_LEVEL_DEBUG) { printf("Start setup argument:\n"); printf("Size: %d offset: %d\n", size, offset); @@ -238,7 +239,7 @@ void __vanadisMapBalar() { } } -unsigned int __cudaRegisterFatBinary(char file_name[256]) { +unsigned int __cudaRegisterFatBinary(char file_name[BALAR_CUDA_MAX_FILE_NAME]) { if (g_debug_level >= LOG_LEVEL_DEBUG) { printf("Registering fat binary: %s\n", file_name); fflush(stdout); @@ -274,7 +275,7 @@ unsigned int __cudaRegisterFatBinary(char file_name[256]) { void __cudaRegisterFunction( uint64_t fatCubinHandle, uint64_t hostFun, - char deviceFun[256] + char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME] ) { if (g_debug_level >= LOG_LEVEL_DEBUG) { printf("Registering kernel function:\n"); diff --git a/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.h b/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.h index 05eb0f0a85..69a53d7764 100644 --- a/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.h +++ b/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.h @@ -21,6 +21,7 @@ #include #include #include +#include "../../balar_consts.h" #define LOG_LEVEL_INFO 20 #define LOG_LEVEL_DEBUG 15 @@ -140,13 +141,13 @@ typedef struct BalarCudaCallPacket { } cuda_malloc; struct { - char file_name[256]; + char file_name[BALAR_CUDA_MAX_FILE_NAME]; } register_fatbin; struct { uint64_t fatCubinHandle; uint64_t hostFun; - char deviceFun[256]; + char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME]; } register_function; struct { @@ -172,7 +173,7 @@ typedef struct BalarCudaCallPacket { uint64_t arg; uint64_t size; uint64_t offset; - uint8_t value[200]; + uint8_t value[BALAR_CUDA_MAX_ARG_SIZE]; } setup_argument; struct { @@ -239,19 +240,19 @@ cudaError_t cudaMemcpy(uint64_t dst, uint64_t src, uint64_t count, enum cudaMemc cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, uint64_t sharedMem); // Cuda Setup argument -cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[200], uint64_t size, uint64_t offset); +cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], uint64_t size, uint64_t offset); cudaError_t cudaLaunch(uint64_t func); // Use syscall to map balar to virtual memory space in vanadis void __vanadisMapBalar(); -unsigned int __cudaRegisterFatBinary(char file_name[256]); +unsigned int __cudaRegisterFatBinary(char file_name[BALAR_CUDA_MAX_FILE_NAME]); void __cudaRegisterFunction( uint64_t fatCubinHandle, uint64_t hostFun, - char deviceFun[256] + char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME] ); diff --git a/src/sst/elements/balar/tests/vanadisLLVMRISCV/cuda_runtime_api_vanadis.cc b/src/sst/elements/balar/tests/vanadisLLVMRISCV/cuda_runtime_api_vanadis.cc index 30d50d2cf4..77f7316c89 100644 --- a/src/sst/elements/balar/tests/vanadisLLVMRISCV/cuda_runtime_api_vanadis.cc +++ b/src/sst/elements/balar/tests/vanadisLLVMRISCV/cuda_runtime_api_vanadis.cc @@ -16,6 +16,7 @@ #include "cuda_runtime_api.h" #include "balar_vanadis.h" #include "../../balar_packet.h" +#include "../../balar_consts.h" using namespace SST::BalarComponent; @@ -374,7 +375,7 @@ unsigned CUDARTAPI __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim, } // Cuda Setup argument -cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[200], uint64_t size, uint64_t offset) { +cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], uint64_t size, uint64_t offset) { if (g_debug_level >= LOG_LEVEL_DEBUG) { printf("Start setup argument:\n"); printf("Size: %d offset: %d\n", size, offset); @@ -459,9 +460,9 @@ __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *hostFun, // How to pass argument? All use the value[8] // as cudaSetupArgument will make a copy of its content // so that GPGPU-Sim will know both the constant and pointer pass to the kernel - uint8_t value[200]; - if (ret.cudaparamconfig.size > 200) { - printf("CUDA function argument size(%d) exceeds %d bytes limit!\n", ret.cudaparamconfig.size, 200); + uint8_t value[BALAR_CUDA_MAX_ARG_SIZE]; + if (ret.cudaparamconfig.size > BALAR_CUDA_MAX_ARG_SIZE) { + printf("CUDA function argument size(%d) exceeds %d bytes limit!\n", ret.cudaparamconfig.size, BALAR_CUDA_MAX_ARG_SIZE); } memcpy(value, args[index], ret.cudaparamconfig.size); cudaSetupArgument((uint64_t) NULL, value, ret.cudaparamconfig.size, ret.cudaparamconfig.alignment); @@ -520,7 +521,7 @@ unsigned int __cudaRegisterFatBinary(void *fatCubin) { void __cudaRegisterFunction( uint64_t fatCubinHandle, uint64_t hostFun, - char deviceFun[256] + char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME] ) { if (g_debug_level >= LOG_LEVEL_DEBUG) { printf("Registering kernel function:\n");