Skip to content

Commit

Permalink
Restructured and updated blas_Server.c
Browse files Browse the repository at this point in the history
  • Loading branch information
shivammonaka committed Mar 25, 2024
1 parent 8c34180 commit afd62a2
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 95 deletions.
22 changes: 13 additions & 9 deletions driver/others/blas_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -462,10 +462,21 @@ blas_queue_t *tscq;
if (queue) {

exec_threads(cpu, queue, 0);

#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Calculation finished!\n", cpu);
#endif

#ifdef MONITOR
main_status[cpu] = MAIN_FINISH;
#endif

// arm: make sure all results are written out _before_
// thread is marked as done and other threads use them
MB;
atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)0);


}

#ifdef MONITOR
Expand Down Expand Up @@ -1055,6 +1066,7 @@ static void exec_threads(int cpu, blas_queue_t *queue, int buf_index)
void *buffer, *sa, *sb;

buffer = blas_thread_buffer[cpu];

int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = (int (*)(blas_arg_t *, void *, void *, void *, void *, BLASLONG))queue -> routine;

atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)1);
Expand All @@ -1068,6 +1080,7 @@ sb = queue -> sb;
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
}
#endif

#ifdef CONSISTENT_FPCSR
#ifdef __aarch64__
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
Expand All @@ -1077,7 +1090,6 @@ sb = queue -> sb;
#endif
#endif


#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING1;
#endif
Expand Down Expand Up @@ -1142,14 +1154,6 @@ sb = queue -> sb;
} else
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);

#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Calculation finished!\n", cpu);
#endif

#ifdef MONITOR
main_status[cpu] = MAIN_FINISH;
#endif

}

#endif
172 changes: 86 additions & 86 deletions driver/others/blas_server_win32.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
Expand Down Expand Up @@ -68,6 +69,7 @@ int blas_server_avail = 0;
int blas_omp_threads_local = 1;

static void * blas_thread_buffer[MAX_CPU_NUMBER];

/* Local Variables */
static BLASULONG server_lock = 0;

Expand Down Expand Up @@ -205,54 +207,46 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb) {
}
}

/* This is a main routine of threads. Each thread waits until job is */
/* queued. */

static DWORD WINAPI blas_thread_server(void *arg){
//
// This is a main routine of threads. Each thread waits until job is queued.
//
static DWORD WINAPI blas_thread_server(void *arg) {

/* Thread identifier */
#ifdef SMP_DEBUG
BLASLONG cpu = (BLASLONG)arg;
#endif

DWORD action;
HANDLE handles[] = {pool.filled, pool.killed};

blas_queue_t *queue;

#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Thread is started!\n", cpu);
#endif
MT_TRACE("Server[%2ld] Thread is started!\n", cpu);

while (1){
while (1) {

/* Waiting for Queue */

#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Waiting for Queue.\n", cpu);
#endif
MT_TRACE("Server[%2ld] Waiting for Queue.\n", cpu);

do {
action = WaitForMultipleObjects(2, handles, FALSE, INFINITE);
} while ((action != WAIT_OBJECT_0) && (action != WAIT_OBJECT_0 + 1));
// event raised when work is added to the queue
WaitForSingleObject(kickoff_event, INFINITE);

if (action == WAIT_OBJECT_0 + 1) break;
if (cpu > thread_target - 2) {
//MT_TRACE("thread [%d] exiting.\n", cpu);
break; // excess thread, so worker thread exits
}

#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Got it.\n", cpu);
#endif
MT_TRACE("Server[%2ld] Got it.\n", cpu);

EnterCriticalSection(&pool.lock);
EnterCriticalSection(&queue_lock);

queue = pool.queue;
if (queue) pool.queue = queue->next;
queue = work_queue;
if (queue)
work_queue = work_queue->next;

LeaveCriticalSection(&pool.lock);
LeaveCriticalSection(&queue_lock);

if (queue) {
if (queue) {

if (pool.queue) SetEvent(pool.filled);
exec_threads(cpu, queue, 0);
}
}

MT_TRACE("Server[%2ld] Finished!\n", cpu);

Expand Down Expand Up @@ -288,7 +282,9 @@ int blas_thread_init(void) {

InitializeCriticalSection(&queue_lock);

for(i = 0; i < blas_cpu_number - 1; i++){
for(i = 0; i < blas_cpu_number - 1; i++) {
//MT_TRACE("thread_init: creating thread [%d]\n", i);

blas_threads[i] = CreateThread(NULL, 0,
blas_thread_server, (void *)i,
0, &blas_threads_id[i]);
Expand Down Expand Up @@ -404,8 +400,7 @@ int exec_blas(BLASLONG num, blas_queue_t *queue) {

if ((num <= 0) || (queue == NULL)) return 0;


//Redirect to caller's callback routine
//Redirect to caller's callback routine
if (openblas_threads_callback_) {
int buf_index = 0;
#ifndef USE_SIMPLE_THREADED_LEVEL3
Expand All @@ -416,7 +411,8 @@ int exec_blas(BLASLONG num, blas_queue_t *queue) {
return 0;
}

if ((num > 1) && queue -> next) exec_blas_async(1, queue -> next);
if ((num > 1) && queue -> next)
exec_blas_async(1, queue -> next);

routine = queue -> routine;

Expand Down Expand Up @@ -449,7 +445,7 @@ int BLASFUNC(blas_thread_shutdown)(void) {

LOCK_COMMAND(&server_lock);

//Free buffers allocated for threads
//Free buffers allocated for threads
for(i=0; i<MAX_CPU_NUMBER; i++){
if(blas_thread_buffer[i]!=NULL){
blas_memory_free(blas_thread_buffer[i]);
Expand Down Expand Up @@ -505,9 +501,12 @@ void goto_set_num_threads(int num_threads)
SetEvent(kickoff_event);

for (i = num_threads - 1; i < blas_num_threads - 1; i++) {
//MT_TRACE("set_num_threads: waiting on thread [%d] to quit.\n", i);

WaitForSingleObject(blas_threads[i], INFINITE);

//MT_TRACE("set_num_threads: thread [%d] has quit.\n", i);

CloseHandle(blas_threads[i]);
}

Expand All @@ -534,7 +533,9 @@ void goto_set_num_threads(int num_threads)
blas_server_avail = 1;
}

for(i = (blas_num_threads > 0) ? blas_num_threads - 1 : 0; i < num_threads - 1; i++){
for (i = (blas_num_threads > 0) ? blas_num_threads - 1 : 0; i < num_threads - 1; i++) {
//MT_TRACE("set_num_threads: creating thread [%d]\n", i);

blas_threads[i] = CreateThread(NULL, 0,
blas_thread_server, (void *)i,
0, &blas_threads_id[i]);
Expand Down Expand Up @@ -587,76 +588,75 @@ static void exec_threads(int cpu, blas_queue_t *queue, int buf_index)
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;

#ifdef CONSISTENT_FPCSR
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
#endif

MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);

// fprintf(stderr, "queue start[%ld]!!!\n", cpu);
// fprintf(stderr, "queue start[%ld]!!!\n", cpu);

#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING1;
#endif
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING1;
#endif

if (sa == NULL)
sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
if (sa == NULL)
sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);

if (sb == NULL) {
if (!(queue -> mode & BLAS_COMPLEX)) {
if (sb == NULL) {
if (!(queue -> mode & BLAS_COMPLEX)) {
#ifdef EXPRECISION
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE) {
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
} else
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE) {
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
} else
#endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
#ifdef BUILD_DOUBLE
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
#ifdef BUILD_SINGLE
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else {
/* Other types in future */
}
} else {
} else {
/* Other types in future */
}
} else {
#ifdef EXPRECISION
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
} else
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
} else
#endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
#ifdef BUILD_COMPLEX16
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
#ifdef BUILD_COMPLEX
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else {
/* Other types in future */
}
}
queue->sb=sb;
}

#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING2;
#endif
} else {
/* Other types in future */
}
}
queue->sb=sb;
}

if (!(queue -> mode & BLAS_LEGACY)) {
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING2;
#endif

if (!(queue -> mode & BLAS_LEGACY)) {
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
} else {
} else {
legacy_exec(routine, queue -> mode, queue -> args, sb);
}
}

}
}

0 comments on commit afd62a2

Please sign in to comment.