forked from OpenMathLib/OpenBLAS
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduced callback to Pthread, Win32 and OpenMP backend
- Loading branch information
1 parent
87f83eb
commit 66d4ca1
Showing
10 changed files
with
524 additions
and
191 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
[33mcommit 3db0922de7600746113d79773d645b1b1740127c[m[33m ([m[1;36mHEAD -> [m[1;32mThreading_Callback[m[33m)[m | ||
Author: shivammonaka <[email protected]> | ||
Date: Mon Apr 1 16:02:03 2024 +0530 | ||
|
||
Changes to win32 | ||
|
||
[1mdiff --git a/driver/others/blas_server_win32.c b/driver/others/blas_server_win32.c[m | ||
[1mindex 49d876fc7..9a4916cae 100644[m | ||
[1m--- a/driver/others/blas_server_win32.c[m | ||
[1m+++ b/driver/others/blas_server_win32.c[m | ||
[36m@@ -214,7 +214,6 @@[m [mstatic DWORD WINAPI blas_thread_server(void *arg) {[m | ||
[m | ||
/* Thread identifier */[m | ||
BLASLONG cpu = (BLASLONG)arg;[m | ||
[31m-[m | ||
blas_queue_t *queue;[m | ||
[m | ||
MT_TRACE("Server[%2ld] Thread is started!\n", cpu);[m | ||
[36m@@ -243,11 +242,8 @@[m [mstatic DWORD WINAPI blas_thread_server(void *arg) {[m | ||
[m | ||
LeaveCriticalSection(&queue_lock);[m | ||
[m | ||
[31m- if (queue) {[m | ||
[31m-[m | ||
[31m- exec_threads(cpu, queue, 0);[m | ||
[31m- }[m | ||
[31m-[m | ||
[32m+[m[32m exec_threads(cpu, queue, 0);[m | ||
[32m+[m[41m [m | ||
MT_TRACE("Server[%2ld] Finished!\n", cpu);[m | ||
[m | ||
queue->finished = 1;[m | ||
[36m@@ -579,84 +575,90 @@[m [mstatic void adjust_thread_buffers() {[m | ||
static void exec_threads(int cpu, blas_queue_t *queue, int buf_index)[m | ||
{[m | ||
[m | ||
[31m- void *buffer, *sa, *sb;[m | ||
[32m+[m[32m if (queue) {[m | ||
[32m+[m[32m void *buffer, *sa, *sb;[m | ||
[32m+[m[41m [m | ||
[32m+[m[32m buffer = blas_thread_buffer[cpu];[m | ||
[32m+[m[32m sa = queue -> sa;[m | ||
[32m+[m[32m sb = queue -> sb;[m | ||
[m | ||
[31m- buffer = blas_thread_buffer[cpu];[m | ||
[31m- sa = queue -> sa;[m | ||
[31m- sb = queue -> sb;[m | ||
[32m+[m[32m int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;[m | ||
[m | ||
[31m- int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;[m | ||
[32m+[m[32m #ifdef CONSISTENT_FPCSR[m | ||
[32m+[m[32m __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));[m | ||
[32m+[m[32m __asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));[m | ||
[32m+[m[32m #endif[m | ||
[m | ||
[31m- #ifdef CONSISTENT_FPCSR[m | ||
[31m- __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));[m | ||
[31m- __asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));[m | ||
[31m- #endif[m | ||
[32m+[m[32m MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",[m | ||
[32m+[m[32m cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);[m | ||
[m | ||
[31m- MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",[m | ||
[31m- cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);[m | ||
[32m+[m[32m // fprintf(stderr, "queue start[%ld]!!!\n", cpu);[m | ||
[m | ||
[31m- // fprintf(stderr, "queue start[%ld]!!!\n", cpu);[m | ||
[32m+[m[32m #ifdef MONITOR[m | ||
[32m+[m[32m main_status[cpu] = MAIN_RUNNING1;[m | ||
[32m+[m[32m #endif[m | ||
[m | ||
[31m- #ifdef MONITOR[m | ||
[31m- main_status[cpu] = MAIN_RUNNING1;[m | ||
[31m- #endif[m | ||
[32m+[m[32m if (sa == NULL)[m[41m [m | ||
[32m+[m[32m sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);[m | ||
[m | ||
[31m- if (sa == NULL) [m | ||
[31m- sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);[m | ||
[32m+[m[32m if (sb == NULL) {[m | ||
[32m+[m[32m if (!(queue -> mode & BLAS_COMPLEX)) {[m | ||
[32m+[m[32m #ifdef EXPRECISION[m | ||
[32m+[m[32m if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE) {[m | ||
[32m+[m[32m sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)[m | ||
[32m+[m[32m + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[32m+[m[32m } else[m | ||
[32m+[m[32m #endif[m | ||
[32m+[m[32m if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {[m | ||
[32m+[m[32m #ifdef BUILD_DOUBLE[m | ||
[32m+[m[32m sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)[m | ||
[32m+[m[32m + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[32m+[m[32m #endif[m | ||
[32m+[m[32m } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {[m | ||
[32m+[m[32m #ifdef BUILD_SINGLE[m | ||
[32m+[m[32m sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)[m | ||
[32m+[m[32m + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[32m+[m[32m #endif[m | ||
[32m+[m[32m } else {[m | ||
[32m+[m[32m /* Other types in future */[m | ||
[32m+[m[32m }[m | ||
[32m+[m[32m } else {[m | ||
[32m+[m[32m #ifdef EXPRECISION[m | ||
[32m+[m[32m if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){[m | ||
[32m+[m[32m sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)[m | ||
[32m+[m[32m + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[32m+[m[32m } else[m | ||
[32m+[m[32m #endif[m | ||
[32m+[m[32m if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){[m | ||
[32m+[m[32m #ifdef BUILD_COMPLEX16[m | ||
[32m+[m[32m sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)[m | ||
[32m+[m[32m + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[32m+[m[32m #endif[m | ||
[32m+[m[32m } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {[m | ||
[32m+[m[32m #ifdef BUILD_COMPLEX[m | ||
[32m+[m[32m sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)[m | ||
[32m+[m[32m + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[32m+[m[32m #endif[m | ||
[32m+[m[32m } else {[m | ||
[32m+[m[32m /* Other types in future */[m | ||
[32m+[m[32m }[m | ||
[32m+[m[32m }[m | ||
[32m+[m[32m queue->sb=sb;[m | ||
[32m+[m[32m }[m | ||
[m | ||
[31m- if (sb == NULL) {[m | ||
[31m- if (!(queue -> mode & BLAS_COMPLEX)) {[m | ||
[31m-#ifdef EXPRECISION[m | ||
[31m-if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE) {[m | ||
[31m- sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)[m | ||
[31m- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[31m-} else[m | ||
[31m-#endif[m | ||
[31m- if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {[m | ||
[31m-#ifdef BUILD_DOUBLE[m | ||
[31m- sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)[m | ||
[31m- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[31m-#endif[m | ||
[31m- } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {[m | ||
[31m-#ifdef BUILD_SINGLE[m | ||
[31m- sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)[m | ||
[31m- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[31m-#endif[m | ||
[31m- } else {[m | ||
[31m- /* Other types in future */[m | ||
[31m- }[m | ||
[31m-} else {[m | ||
[31m-#ifdef EXPRECISION[m | ||
[31m-if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){[m | ||
[31m- sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)[m | ||
[31m- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[31m-} else[m | ||
[31m-#endif[m | ||
[31m- if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){[m | ||
[31m-#ifdef BUILD_COMPLEX16[m | ||
[31m- sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)[m | ||
[31m- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[31m-#endif[m | ||
[31m- } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {[m | ||
[31m-#ifdef BUILD_COMPLEX[m | ||
[31m- sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)[m | ||
[31m- + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);[m | ||
[31m-#endif[m | ||
[31m- } else {[m | ||
[31m- /* Other types in future */[m | ||
[31m- }[m | ||
[31m-}[m | ||
[31m- queue->sb=sb;[m | ||
[31m- }[m | ||
[32m+[m[32m #ifdef MONITOR[m | ||
[32m+[m[32m main_status[cpu] = MAIN_RUNNING2;[m | ||
[32m+[m[32m #endif[m | ||
[32m+[m | ||
[32m+[m[32m if (!(queue -> mode & BLAS_LEGACY)) {[m | ||
[32m+[m[32m (routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);[m | ||
[32m+[m[32m } else {[m | ||
[32m+[m[32m legacy_exec(routine, queue -> mode, queue -> args, sb);[m | ||
[32m+[m[32m }[m | ||
[m | ||
[31m- #ifdef MONITOR[m | ||
[31m- main_status[cpu] = MAIN_RUNNING2;[m | ||
[31m- #endif[m | ||
[32m+[m[32m } else {[m | ||
[32m+[m[41m [m [32mcontinue; //if queue == NULL[m | ||
[m | ||
[31m- if (!(queue -> mode & BLAS_LEGACY)) {[m | ||
[31m- (routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);[m | ||
[31m- } else {[m | ||
[31m- legacy_exec(routine, queue -> mode, queue -> args, sb);[m | ||
[31m- }[m | ||
[32m+[m [32m }[m | ||
[m | ||
}[m | ||
\ No newline at end of file[m |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.