balar-mmio: create constant macros to keep things consistent

sstsimulator · Jan 12, 2025 · ef89fe0 · ef89fe0
1 parent 3aef532
commit ef89fe0
Show file tree

Hide file tree

Showing 8 changed files with 40 additions and 22 deletions.
diff --git a/src/sst/elements/balar/Makefile.am b/src/sst/elements/balar/Makefile.am
@@ -16,6 +16,7 @@ libbalar_la_SOURCES = \
 	cuda_runtime_api.h \
 	util.cc \
 	util.h \
+	balar_consts.h \
 	balarMMIO.cc \
 	balarMMIO.h \
 	dmaEngine.cc \

diff --git a/src/sst/elements/balar/balar_consts.h b/src/sst/elements/balar/balar_consts.h
@@ -0,0 +1,11 @@
+#ifndef BALAR_CONSTS_H
+#define BALAR_CONSTS_H
+// Some constant macros to keep things consistent across both
+// balar component and test example
+
+#define BALAR_CUDA_MAX_ARG_SIZE 256
+#define BALAR_CUDA_MAX_FILE_NAME 256
+#define BALAR_CUDA_MAX_KERNEL_NAME 512
+#define BALAR_CUDA_MAX_DEV_VAR_NAME 256
+
+#endif
diff --git a/src/sst/elements/balar/balar_packet.h b/src/sst/elements/balar/balar_packet.h
@@ -21,6 +21,7 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdbool.h>
+#include "balar_consts.h"
 
 namespace SST {
 namespace BalarComponent {
@@ -92,13 +93,13 @@ namespace BalarComponent {
             } cuda_malloc;
 
             struct {
-                char file_name[256];
+                char file_name[BALAR_CUDA_MAX_FILE_NAME];
             } register_fatbin;
 
             struct {
                 uint64_t fatCubinHandle;
                 uint64_t hostFun;
-                char deviceFun[256];
+                char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME];
             } register_function;
 
             struct {
@@ -140,7 +141,7 @@ namespace BalarComponent {
                 uint64_t arg;
                 uint64_t size;
                 uint64_t offset;
-                uint8_t value[200];
+                uint8_t value[BALAR_CUDA_MAX_ARG_SIZE];
             } setup_argument;
 
             struct {
@@ -155,7 +156,7 @@ namespace BalarComponent {
                 void **fatCubinHandle;
                 char *hostVar; //pointer to...something
                 char *deviceAddress; //name of variable
-                char deviceName[256]; //name of variable
+                char deviceName[BALAR_CUDA_MAX_DEV_VAR_NAME]; //name of variable
                 int32_t ext;
                 int32_t size;
                 int32_t constant;
@@ -188,7 +189,7 @@ namespace BalarComponent {
                 uint64_t hostVar_ptr;
                 struct textureReference texRef;
                 const void **deviceAddress;
-                char deviceName[256];
+                char deviceName[BALAR_CUDA_MAX_DEV_VAR_NAME];
                 int dim;
                 int norm;
                 int ext;

diff --git a/src/sst/elements/balar/cuda_runtime_api.h b/src/sst/elements/balar/cuda_runtime_api.h
@@ -21,6 +21,7 @@
 #define __dv(v)
 #endif /* __cplusplus */
 #endif /* !__dv */
+#include "balar_consts.h"
 
 extern "C"{
 
@@ -29,12 +30,12 @@ uint64_t cudaMallocSST(void **devPtr, size_t size);
 // `addr` is passed by value
 __host__ cudaError_t CUDARTAPI cudaMallocHostSST(void *addr, size_t size);
 
-unsigned CUDARTAPI __cudaRegisterFatBinarySST(char file_name[256]);
+unsigned CUDARTAPI __cudaRegisterFatBinarySST(char file_name[BALAR_CUDA_MAX_FILE_NAME]);
 
 void CUDARTAPI __cudaRegisterFunctionSST(
     uint64_t fatCubinHandle,
     uint64_t hostFun,
-    char deviceFun[256]
+    char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME]
 );
 
 __host__ cudaError_t CUDARTAPI cudaMemcpySST(uint64_t dst, uint64_t src, size_t count, enum cudaMemcpyKind kind, uint8_t *payload);
@@ -49,7 +50,7 @@ __host__ cudaError_t CUDARTAPI cudaConfigureCallSST(dim3 gridDim, dim3 blockDim,
 
 __host__ cudaError_t CUDARTAPI cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream );
 
-__host__ cudaError_t CUDARTAPI cudaSetupArgumentSST(uint64_t arg, uint8_t value[200], size_t size, size_t offset);
+__host__ cudaError_t CUDARTAPI cudaSetupArgumentSST(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], size_t size, size_t offset);
 
 __host__ cudaError_t CUDARTAPI cudaLaunchSST(uint64_t func);
 

diff --git a/src/sst/elements/balar/tests/testBalar_testsuite_util.py b/src/sst/elements/balar/tests/testBalar_testsuite_util.py
@@ -387,6 +387,7 @@ def _setupbalarTestFiles(self):
         os_symlink_file(test_path, self.testbalarDir, "vanadisOS.py")
         # Copy the shared packet definition files from balar src
         os_symlink_file(self.balarElementDir, tmpdir, "balar_packet.h")
+        os_symlink_file(self.balarElementDir, tmpdir, "balar_consts.h")
 
         # Create a simlink of each file in the balar/tests/vectorAdd directory
         for f in os.listdir(self.balarElementVectorAddTestDir):

diff --git a/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.c b/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.c
@@ -14,6 +14,7 @@
 // distribution.
 
 #include "cuda_runtime_api.h"
+#include "../../balar_consts.h"
 #include <sys/syscall.h>
 #include <sys/mman.h>
 
@@ -149,7 +150,7 @@ cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, uint64_t sharedMem) {
 }
 
 // Cuda Setup argument
-cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[200], uint64_t size, uint64_t offset) {
+cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], uint64_t size, uint64_t offset) {
     if (g_debug_level >= LOG_LEVEL_DEBUG) {
         printf("Start setup argument:\n");
         printf("Size: %d offset: %d\n", size, offset);
@@ -238,7 +239,7 @@ void __vanadisMapBalar() {
     }
 }
 
-unsigned int __cudaRegisterFatBinary(char file_name[256]) {
+unsigned int __cudaRegisterFatBinary(char file_name[BALAR_CUDA_MAX_FILE_NAME]) {
     if (g_debug_level >= LOG_LEVEL_DEBUG) {
         printf("Registering fat binary: %s\n", file_name);
         fflush(stdout);
@@ -274,7 +275,7 @@ unsigned int __cudaRegisterFatBinary(char file_name[256]) {
 void __cudaRegisterFunction(
     uint64_t fatCubinHandle,
     uint64_t hostFun,
-    char deviceFun[256]
+    char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME]
 ) {
     if (g_debug_level >= LOG_LEVEL_DEBUG) {
         printf("Registering kernel function:\n");

diff --git a/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.h b/src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.h
@@ -21,6 +21,7 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdbool.h>
+#include "../../balar_consts.h"
 
 #define LOG_LEVEL_INFO 20
 #define LOG_LEVEL_DEBUG 15
@@ -140,13 +141,13 @@ typedef struct BalarCudaCallPacket {
         } cuda_malloc;
 
         struct {
-            char file_name[256];
+            char file_name[BALAR_CUDA_MAX_FILE_NAME];
         } register_fatbin;
 
         struct {
             uint64_t fatCubinHandle;
             uint64_t hostFun;
-            char deviceFun[256];
+            char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME];
         } register_function;
 
         struct {
@@ -172,7 +173,7 @@ typedef struct BalarCudaCallPacket {
             uint64_t arg;
             uint64_t size;
             uint64_t offset;
-            uint8_t value[200];
+            uint8_t value[BALAR_CUDA_MAX_ARG_SIZE];
         } setup_argument;
 
         struct {
@@ -239,19 +240,19 @@ cudaError_t cudaMemcpy(uint64_t dst, uint64_t src, uint64_t count, enum cudaMemc
 cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, uint64_t sharedMem);
 
 // Cuda Setup argument
-cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[200], uint64_t size, uint64_t offset);
+cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], uint64_t size, uint64_t offset);
 
 cudaError_t cudaLaunch(uint64_t func);
 
 // Use syscall to map balar to virtual memory space in vanadis
 void __vanadisMapBalar();
 
-unsigned int __cudaRegisterFatBinary(char file_name[256]);
+unsigned int __cudaRegisterFatBinary(char file_name[BALAR_CUDA_MAX_FILE_NAME]);
 
 void __cudaRegisterFunction(
     uint64_t fatCubinHandle,
     uint64_t hostFun,
-    char deviceFun[256]
+    char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME]
 );
 
 

diff --git a/src/sst/elements/balar/tests/vanadisLLVMRISCV/cuda_runtime_api_vanadis.cc b/src/sst/elements/balar/tests/vanadisLLVMRISCV/cuda_runtime_api_vanadis.cc
@@ -16,6 +16,7 @@
 #include "cuda_runtime_api.h"
 #include "balar_vanadis.h"
 #include "../../balar_packet.h"
+#include "../../balar_consts.h"
 
 using namespace SST::BalarComponent;
 
@@ -374,7 +375,7 @@ unsigned CUDARTAPI __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim,
 }
 
 // Cuda Setup argument
-cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[200], uint64_t size, uint64_t offset) {
+cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], uint64_t size, uint64_t offset) {
     if (g_debug_level >= LOG_LEVEL_DEBUG) {
         printf("Start setup argument:\n");
         printf("Size: %d offset: %d\n", size, offset);
@@ -459,9 +460,9 @@ __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *hostFun,
             // How to pass argument? All use the value[8]
             // as cudaSetupArgument will make a copy of its content
             // so that GPGPU-Sim will know both the constant and pointer pass to the kernel
-            uint8_t value[200];
-            if (ret.cudaparamconfig.size > 200) {
-                printf("CUDA function argument size(%d) exceeds %d bytes limit!\n", ret.cudaparamconfig.size, 200);
+            uint8_t value[BALAR_CUDA_MAX_ARG_SIZE];
+            if (ret.cudaparamconfig.size > BALAR_CUDA_MAX_ARG_SIZE) {
+                printf("CUDA function argument size(%d) exceeds %d bytes limit!\n", ret.cudaparamconfig.size, BALAR_CUDA_MAX_ARG_SIZE);
             }
             memcpy(value, args[index], ret.cudaparamconfig.size);
             cudaSetupArgument((uint64_t) NULL, value, ret.cudaparamconfig.size, ret.cudaparamconfig.alignment);
@@ -520,7 +521,7 @@ unsigned int __cudaRegisterFatBinary(void *fatCubin) {
 void __cudaRegisterFunction(
     uint64_t fatCubinHandle,
     uint64_t hostFun,
-    char deviceFun[256]
+    char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME]
 ) {
     if (g_debug_level >= LOG_LEVEL_DEBUG) {
         printf("Registering kernel function:\n");