Skip to content

Commit

Permalink
Introduced callback to Pthread, Win32 and OpenMP backend
Browse files Browse the repository at this point in the history
  • Loading branch information
shivammonaka committed Apr 1, 2024
1 parent 87f83eb commit 66d4ca1
Show file tree
Hide file tree
Showing 10 changed files with 524 additions and 191 deletions.
193 changes: 193 additions & 0 deletions a, *sb
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
commit 3db0922de7600746113d79773d645b1b1740127c (HEAD -> Threading_Callback)
Author: shivammonaka <[email protected]>
Date: Mon Apr 1 16:02:03 2024 +0530

Changes to win32

diff --git a/driver/others/blas_server_win32.c b/driver/others/blas_server_win32.c
index 49d876fc7..9a4916cae 100644
--- a/driver/others/blas_server_win32.c
+++ b/driver/others/blas_server_win32.c
@@ -214,7 +214,6 @@ static DWORD WINAPI blas_thread_server(void *arg) {

/* Thread identifier */
BLASLONG cpu = (BLASLONG)arg;
-
blas_queue_t *queue;

MT_TRACE("Server[%2ld] Thread is started!\n", cpu);
@@ -243,11 +242,8 @@ static DWORD WINAPI blas_thread_server(void *arg) {

LeaveCriticalSection(&queue_lock);

- if (queue) {
-
- exec_threads(cpu, queue, 0);
- }
-
+ exec_threads(cpu, queue, 0);
+ 
MT_TRACE("Server[%2ld] Finished!\n", cpu);

queue->finished = 1;
@@ -579,84 +575,90 @@ static void adjust_thread_buffers() {
static void exec_threads(int cpu, blas_queue_t *queue, int buf_index)
{

- void *buffer, *sa, *sb;
+ if (queue) {
+ void *buffer, *sa, *sb;
+ 
+ buffer = blas_thread_buffer[cpu];
+ sa = queue -> sa;
+ sb = queue -> sb;

- buffer = blas_thread_buffer[cpu];
- sa = queue -> sa;
- sb = queue -> sb;
+ int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;

- int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
+ #ifdef CONSISTENT_FPCSR
+ __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
+ __asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
+ #endif

- #ifdef CONSISTENT_FPCSR
- __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
- __asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
- #endif
+ MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
+ cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);

- MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
- cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
+ // fprintf(stderr, "queue start[%ld]!!!\n", cpu);

- // fprintf(stderr, "queue start[%ld]!!!\n", cpu);
+ #ifdef MONITOR
+ main_status[cpu] = MAIN_RUNNING1;
+ #endif

- #ifdef MONITOR
- main_status[cpu] = MAIN_RUNNING1;
- #endif
+ if (sa == NULL) 
+ sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);

- if (sa == NULL) 
- sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
+ if (sb == NULL) {
+ if (!(queue -> mode & BLAS_COMPLEX)) {
+ #ifdef EXPRECISION
+ if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE) {
+ sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)
+ + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
+ } else
+ #endif
+ if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
+ #ifdef BUILD_DOUBLE
+ sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
+ #endif
+ } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
+ #ifdef BUILD_SINGLE
+ sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
+ #endif
+ } else {
+ /* Other types in future */
+ }
+ } else {
+ #ifdef EXPRECISION
+ if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
+ sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
+ + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
+ } else
+ #endif
+ if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
+ #ifdef BUILD_COMPLEX16
+ sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
+ + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
+ #endif
+ } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
+ #ifdef BUILD_COMPLEX
+ sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
+ + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
+ #endif
+ } else {
+ /* Other types in future */
+ }
+ }
+ queue->sb=sb;
+ }

- if (sb == NULL) {
- if (!(queue -> mode & BLAS_COMPLEX)) {
-#ifdef EXPRECISION
-if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE) {
- sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)
- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
-} else
-#endif
- if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
-#ifdef BUILD_DOUBLE
- sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
-#endif
- } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
-#ifdef BUILD_SINGLE
- sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
-#endif
- } else {
- /* Other types in future */
- }
-} else {
-#ifdef EXPRECISION
-if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
- sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
-} else
-#endif
- if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
-#ifdef BUILD_COMPLEX16
- sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
-#endif
- } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
-#ifdef BUILD_COMPLEX
- sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
-#endif
- } else {
- /* Other types in future */
- }
-}
- queue->sb=sb;
- }
+ #ifdef MONITOR
+ main_status[cpu] = MAIN_RUNNING2;
+ #endif
+
+ if (!(queue -> mode & BLAS_LEGACY)) {
+ (routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
+ } else {
+ legacy_exec(routine, queue -> mode, queue -> args, sb);
+ }

- #ifdef MONITOR
- main_status[cpu] = MAIN_RUNNING2;
- #endif
+ } else {
+  continue; //if queue == NULL

- if (!(queue -> mode & BLAS_LEGACY)) {
- (routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
- } else {
- legacy_exec(routine, queue -> mode, queue -> args, sb);
- }
+  }

}
\ No newline at end of file
5 changes: 5 additions & 0 deletions cblas.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ char* openblas_get_config(void);
/*Get the CPU corename on runtime.*/
char* openblas_get_corename(void);

/*Set the threading backend to a custom callback.*/
typedef void (*openblas_dojob_callback)(int thread_num, void *jobdata, int dojob_data);
typedef void (*openblas_threads_callback)(int sync, openblas_dojob_callback dojob, int numjobs, size_t jobdata_elsize, void *jobdata, int dojob_data);
void openblas_set_threads_callback_function(openblas_threads_callback callback);

#ifdef OPENBLAS_OS_LINUX
/* Sets thread affinity for OpenBLAS threads. `thread_idx` is in [0, openblas_get_num_threads()-1]. */
int openblas_setaffinity(int thread_idx, size_t cpusetsize, cpu_set_t* cpu_set);
Expand Down
5 changes: 5 additions & 0 deletions common_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ int BLASFUNC(xerbla)(char *, blasint *info, blasint);

void openblas_set_num_threads_(int *);

/*Set the threading backend to a custom callback.*/
typedef void (*openblas_dojob_callback)(int thread_num, void *jobdata, int dojob_data);
typedef void (*openblas_threads_callback)(int sync, openblas_dojob_callback dojob, int numjobs, size_t jobdata_elsize, void *jobdata, int dojob_data);
extern openblas_threads_callback openblas_threads_callback_;

FLOATRET BLASFUNC(sdot) (blasint *, float *, blasint *, float *, blasint *);
FLOATRET BLASFUNC(sdsdot)(blasint *, float *, float *, blasint *, float *, blasint *);

Expand Down
1 change: 1 addition & 0 deletions driver/others/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ if (USE_THREAD)
${BLAS_SERVER}
divtable.c # TODO: Makefile has -UDOUBLE
blas_l1_thread.c
blas_server_callback.c
)

if (NOT NO_AFFINITY)
Expand Down
5 changes: 4 additions & 1 deletion driver/others/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX)
#COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)

ifdef SMP
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) blas_server_callback.$(SUFFIX)
ifneq ($(NO_AFFINITY), 1)
COMMONOBJS += init.$(SUFFIX)
endif
Expand Down Expand Up @@ -140,6 +140,9 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h
blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h
$(CC) $(CFLAGS) -c $< -o $(@F)

blas_server_callback.$(SUFFIX) : blas_server_callback.c ../../common.h
$(CC) $(CFLAGS) -c $< -o $(@F)

openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c
$(CC) $(CFLAGS) -c $< -o $(@F)

Expand Down
Loading

0 comments on commit 66d4ca1

Please sign in to comment.