Skip to content

Commit

Permalink
OpenMP locks instead of busy-waiting with NUM_PARALLEL
Browse files Browse the repository at this point in the history
  • Loading branch information
shivammonaka committed Feb 23, 2024
1 parent b1ae777 commit 4f7cf07
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 30 deletions.
2 changes: 1 addition & 1 deletion Makefile.rule
Original file line number Diff line number Diff line change
Expand Up @@ -307,4 +307,4 @@ COMMON_PROF = -pg
# BUILD_COMPLEX16 = 1
#
# End of user configuration
#
#
6 changes: 1 addition & 5 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -1707,11 +1707,7 @@ endif

LIBDLLNAME = $(LIBPREFIX).dll
IMPLIBNAME = lib$(LIBNAMEBASE).dll.a
ifneq ($(OSNAME), AIX)
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
else
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.a)
endif
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
Expand Down Expand Up @@ -1859,4 +1855,4 @@ ifneq ($(OSNAME), SunOS)
SUNPATH = /opt/sunstudio12.1
else
SUNPATH = /opt/SUNWspro
endif
endif
2 changes: 1 addition & 1 deletion cmake/system.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -799,4 +799,4 @@ endif ()
# export CUCC
# export CUFLAGS
# export CULIB
#endif
#endif
2 changes: 1 addition & 1 deletion common.h
Original file line number Diff line number Diff line change
Expand Up @@ -940,4 +940,4 @@ extern int gotoblas_profile;

#endif /* __cplusplus */

#endif
#endif
66 changes: 49 additions & 17 deletions driver/level3/level3_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,18 +540,34 @@ static int round_up(int remainder, int width, int multiple)
return width;
}


static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
*range_n, IFLOAT *sa, IFLOAT *sb,
BLASLONG nthreads_m, BLASLONG nthreads_n) {

#ifndef USE_OPENMP
#ifndef OS_WINDOWS
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
#ifdef USE_OPENMP
static omp_lock_t level3_lock, critical_section_lock;
static volatile BLASLONG init_lock = 0, omp_lock_initialized = 0,
parallel_section_left = MAX_PARALLEL_NUMBER;

// Lock initialization; Todo : Maybe this part can be moved to blas_init() in blas_server_omp.c
while(omp_lock_initialized == 0)
{
blas_lock(&init_lock);
{
if(omp_lock_initialized == 0)
{
omp_init_lock(&level3_lock);
omp_init_lock(&critical_section_lock);
omp_lock_initialized = 1;
}
blas_unlock(&init_lock);
}
}
#elif defined(OS_WINDOWS)
CRITICAL_SECTION level3_lock;
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
CRITICAL_SECTION level3_lock;
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
#endif

blas_arg_t newarg;
Expand Down Expand Up @@ -599,12 +615,21 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
#endif

#ifndef USE_OPENMP
#ifndef OS_WINDOWS
pthread_mutex_lock(&level3_lock);
#ifdef USE_OPENMP
omp_set_lock(&level3_lock);
omp_set_lock(&critical_section_lock);

parallel_section_left--;

if(parallel_section_left != 0)
omp_unset_lock(&level3_lock);

omp_unset_lock(&critical_section_lock);

#elif defined(OS_WINDOWS)
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
pthread_mutex_lock(&level3_lock);
#endif

#ifdef USE_ALLOC_HEAP
Expand Down Expand Up @@ -732,12 +757,19 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
free(job);
#endif

#ifndef USE_OPENMP
#ifndef OS_WINDOWS
pthread_mutex_unlock(&level3_lock);
#else
#ifdef USE_OPENMP
omp_set_lock(&critical_section_lock);
parallel_section_left++;

if(parallel_section_left == 1)
omp_unset_lock(&level3_lock);

omp_unset_lock(&critical_section_lock);

#elif defined(OS_WINDOWS)
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
#else
pthread_mutex_unlock(&level3_lock);
#endif

return 0;
Expand Down
6 changes: 1 addition & 5 deletions driver/others/blas_server_omp.c
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
}
#endif

while(true) {
for(i=0; i < MAX_PARALLEL_NUMBER; i++) {
#ifdef HAVE_C11
_Bool inuse = false;
Expand All @@ -420,9 +419,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
break;
}
}
if(i != MAX_PARALLEL_NUMBER)
break;
}

if (openblas_omp_adaptive_env() != 0) {
#pragma omp parallel for num_threads(num) schedule(OMP_SCHED)
Expand Down Expand Up @@ -455,4 +451,4 @@ if (openblas_omp_adaptive_env() != 0) {
return 0;
}

#endif
#endif

0 comments on commit 4f7cf07

Please sign in to comment.