Skip to content

Commit

Permalink
balar-mmio: create constant macros to keep things consistent
Browse files Browse the repository at this point in the history
  • Loading branch information
William-An committed Jan 12, 2025
1 parent 3aef532 commit ef89fe0
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 22 deletions.
1 change: 1 addition & 0 deletions src/sst/elements/balar/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ libbalar_la_SOURCES = \
cuda_runtime_api.h \
util.cc \
util.h \
balar_consts.h \
balarMMIO.cc \
balarMMIO.h \
dmaEngine.cc \
Expand Down
11 changes: 11 additions & 0 deletions src/sst/elements/balar/balar_consts.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef BALAR_CONSTS_H
#define BALAR_CONSTS_H
// Some constant macros to keep things consistent across both
// balar component and test example

#define BALAR_CUDA_MAX_ARG_SIZE 256
#define BALAR_CUDA_MAX_FILE_NAME 256
#define BALAR_CUDA_MAX_KERNEL_NAME 512
#define BALAR_CUDA_MAX_DEV_VAR_NAME 256

#endif
11 changes: 6 additions & 5 deletions src/sst/elements/balar/balar_packet.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include "balar_consts.h"

namespace SST {
namespace BalarComponent {
Expand Down Expand Up @@ -92,13 +93,13 @@ namespace BalarComponent {
} cuda_malloc;

struct {
char file_name[256];
char file_name[BALAR_CUDA_MAX_FILE_NAME];
} register_fatbin;

struct {
uint64_t fatCubinHandle;
uint64_t hostFun;
char deviceFun[256];
char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME];
} register_function;

struct {
Expand Down Expand Up @@ -140,7 +141,7 @@ namespace BalarComponent {
uint64_t arg;
uint64_t size;
uint64_t offset;
uint8_t value[200];
uint8_t value[BALAR_CUDA_MAX_ARG_SIZE];
} setup_argument;

struct {
Expand All @@ -155,7 +156,7 @@ namespace BalarComponent {
void **fatCubinHandle;
char *hostVar; //pointer to...something
char *deviceAddress; //name of variable
char deviceName[256]; //name of variable
char deviceName[BALAR_CUDA_MAX_DEV_VAR_NAME]; //name of variable
int32_t ext;
int32_t size;
int32_t constant;
Expand Down Expand Up @@ -188,7 +189,7 @@ namespace BalarComponent {
uint64_t hostVar_ptr;
struct textureReference texRef;
const void **deviceAddress;
char deviceName[256];
char deviceName[BALAR_CUDA_MAX_DEV_VAR_NAME];
int dim;
int norm;
int ext;
Expand Down
7 changes: 4 additions & 3 deletions src/sst/elements/balar/cuda_runtime_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define __dv(v)
#endif /* __cplusplus */
#endif /* !__dv */
#include "balar_consts.h"

extern "C"{

Expand All @@ -29,12 +30,12 @@ uint64_t cudaMallocSST(void **devPtr, size_t size);
// `addr` is passed by value
__host__ cudaError_t CUDARTAPI cudaMallocHostSST(void *addr, size_t size);

unsigned CUDARTAPI __cudaRegisterFatBinarySST(char file_name[256]);
unsigned CUDARTAPI __cudaRegisterFatBinarySST(char file_name[BALAR_CUDA_MAX_FILE_NAME]);

void CUDARTAPI __cudaRegisterFunctionSST(
uint64_t fatCubinHandle,
uint64_t hostFun,
char deviceFun[256]
char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME]
);

__host__ cudaError_t CUDARTAPI cudaMemcpySST(uint64_t dst, uint64_t src, size_t count, enum cudaMemcpyKind kind, uint8_t *payload);
Expand All @@ -49,7 +50,7 @@ __host__ cudaError_t CUDARTAPI cudaConfigureCallSST(dim3 gridDim, dim3 blockDim,

__host__ cudaError_t CUDARTAPI cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream );

__host__ cudaError_t CUDARTAPI cudaSetupArgumentSST(uint64_t arg, uint8_t value[200], size_t size, size_t offset);
__host__ cudaError_t CUDARTAPI cudaSetupArgumentSST(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], size_t size, size_t offset);

__host__ cudaError_t CUDARTAPI cudaLaunchSST(uint64_t func);

Expand Down
1 change: 1 addition & 0 deletions src/sst/elements/balar/tests/testBalar_testsuite_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ def _setupbalarTestFiles(self):
os_symlink_file(test_path, self.testbalarDir, "vanadisOS.py")
# Copy the shared packet definition files from balar src
os_symlink_file(self.balarElementDir, tmpdir, "balar_packet.h")
os_symlink_file(self.balarElementDir, tmpdir, "balar_consts.h")

# Create a simlink of each file in the balar/tests/vectorAdd directory
for f in os.listdir(self.balarElementVectorAddTestDir):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// distribution.

#include "cuda_runtime_api.h"
#include "../../balar_consts.h"
#include <sys/syscall.h>
#include <sys/mman.h>

Expand Down Expand Up @@ -149,7 +150,7 @@ cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, uint64_t sharedMem) {
}

// Cuda Setup argument
cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[200], uint64_t size, uint64_t offset) {
cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], uint64_t size, uint64_t offset) {
if (g_debug_level >= LOG_LEVEL_DEBUG) {
printf("Start setup argument:\n");
printf("Size: %d offset: %d\n", size, offset);
Expand Down Expand Up @@ -238,7 +239,7 @@ void __vanadisMapBalar() {
}
}

unsigned int __cudaRegisterFatBinary(char file_name[256]) {
unsigned int __cudaRegisterFatBinary(char file_name[BALAR_CUDA_MAX_FILE_NAME]) {
if (g_debug_level >= LOG_LEVEL_DEBUG) {
printf("Registering fat binary: %s\n", file_name);
fflush(stdout);
Expand Down Expand Up @@ -274,7 +275,7 @@ unsigned int __cudaRegisterFatBinary(char file_name[256]) {
void __cudaRegisterFunction(
uint64_t fatCubinHandle,
uint64_t hostFun,
char deviceFun[256]
char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME]
) {
if (g_debug_level >= LOG_LEVEL_DEBUG) {
printf("Registering kernel function:\n");
Expand Down
13 changes: 7 additions & 6 deletions src/sst/elements/balar/tests/vanadisHandshake/cuda_runtime_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include "../../balar_consts.h"

#define LOG_LEVEL_INFO 20
#define LOG_LEVEL_DEBUG 15
Expand Down Expand Up @@ -140,13 +141,13 @@ typedef struct BalarCudaCallPacket {
} cuda_malloc;

struct {
char file_name[256];
char file_name[BALAR_CUDA_MAX_FILE_NAME];
} register_fatbin;

struct {
uint64_t fatCubinHandle;
uint64_t hostFun;
char deviceFun[256];
char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME];
} register_function;

struct {
Expand All @@ -172,7 +173,7 @@ typedef struct BalarCudaCallPacket {
uint64_t arg;
uint64_t size;
uint64_t offset;
uint8_t value[200];
uint8_t value[BALAR_CUDA_MAX_ARG_SIZE];
} setup_argument;

struct {
Expand Down Expand Up @@ -239,19 +240,19 @@ cudaError_t cudaMemcpy(uint64_t dst, uint64_t src, uint64_t count, enum cudaMemc
cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, uint64_t sharedMem);

// Cuda Setup argument
cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[200], uint64_t size, uint64_t offset);
cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], uint64_t size, uint64_t offset);

cudaError_t cudaLaunch(uint64_t func);

// Use syscall to map balar to virtual memory space in vanadis
void __vanadisMapBalar();

unsigned int __cudaRegisterFatBinary(char file_name[256]);
unsigned int __cudaRegisterFatBinary(char file_name[BALAR_CUDA_MAX_FILE_NAME]);

void __cudaRegisterFunction(
uint64_t fatCubinHandle,
uint64_t hostFun,
char deviceFun[256]
char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME]
);


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "cuda_runtime_api.h"
#include "balar_vanadis.h"
#include "../../balar_packet.h"
#include "../../balar_consts.h"

using namespace SST::BalarComponent;

Expand Down Expand Up @@ -374,7 +375,7 @@ unsigned CUDARTAPI __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim,
}

// Cuda Setup argument
cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[200], uint64_t size, uint64_t offset) {
cudaError_t cudaSetupArgument(uint64_t arg, uint8_t value[BALAR_CUDA_MAX_ARG_SIZE], uint64_t size, uint64_t offset) {
if (g_debug_level >= LOG_LEVEL_DEBUG) {
printf("Start setup argument:\n");
printf("Size: %d offset: %d\n", size, offset);
Expand Down Expand Up @@ -459,9 +460,9 @@ __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *hostFun,
// How to pass argument? All use the value[8]
// as cudaSetupArgument will make a copy of its content
// so that GPGPU-Sim will know both the constant and pointer pass to the kernel
uint8_t value[200];
if (ret.cudaparamconfig.size > 200) {
printf("CUDA function argument size(%d) exceeds %d bytes limit!\n", ret.cudaparamconfig.size, 200);
uint8_t value[BALAR_CUDA_MAX_ARG_SIZE];
if (ret.cudaparamconfig.size > BALAR_CUDA_MAX_ARG_SIZE) {
printf("CUDA function argument size(%d) exceeds %d bytes limit!\n", ret.cudaparamconfig.size, BALAR_CUDA_MAX_ARG_SIZE);
}
memcpy(value, args[index], ret.cudaparamconfig.size);
cudaSetupArgument((uint64_t) NULL, value, ret.cudaparamconfig.size, ret.cudaparamconfig.alignment);
Expand Down Expand Up @@ -520,7 +521,7 @@ unsigned int __cudaRegisterFatBinary(void *fatCubin) {
void __cudaRegisterFunction(
uint64_t fatCubinHandle,
uint64_t hostFun,
char deviceFun[256]
char deviceFun[BALAR_CUDA_MAX_KERNEL_NAME]
) {
if (g_debug_level >= LOG_LEVEL_DEBUG) {
printf("Registering kernel function:\n");
Expand Down

0 comments on commit ef89fe0

Please sign in to comment.