Skip to content

Commit

Permalink
Using OpenMP locks with NUM_PARALLEL
Browse files Browse the repository at this point in the history
  • Loading branch information
shivammonaka committed Feb 29, 2024
1 parent 394a9fb commit bc19101
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 20 deletions.
78 changes: 62 additions & 16 deletions driver/level3/level3_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -548,13 +548,31 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
*range_n, IFLOAT *sa, IFLOAT *sb,
BLASLONG nthreads_m, BLASLONG nthreads_n) {

#ifndef USE_OPENMP
#ifndef OS_WINDOWS
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
#ifdef USE_OPENMP
static omp_lock_t level3_lock, critical_section_lock;
static volatile BLASLONG init_lock = 0, omp_lock_initialized = 0,
parallel_section_left = MAX_PARALLEL_NUMBER;

// Lock initialization; Todo : Maybe this part can be moved to blas_init() in blas_server_omp.c
while(omp_lock_initialized == 0)
{
blas_lock(&init_lock);
{
if(omp_lock_initialized == 0)
{
omp_init_lock(&level3_lock);
omp_init_lock(&critical_section_lock);
omp_lock_initialized = 1;
WMB;
}
blas_unlock(&init_lock);
}
}
#elif defined(OS_WINDOWS)
CRITICAL_SECTION level3_lock;
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
CRITICAL_SECTION level3_lock;
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
#endif

blas_arg_t newarg;
Expand Down Expand Up @@ -597,12 +615,28 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
#endif

#ifndef USE_OPENMP
#ifndef OS_WINDOWS
pthread_mutex_lock(&level3_lock);
#ifdef USE_OPENMP
omp_set_lock(&level3_lock);
omp_set_lock(&critical_section_lock);

parallel_section_left--;

/*
How OpenMP locks works with NUM_PARALLEL
1) parallel_section_left = Number of available concurrent executions of OpenBLAS - Number of currently executing OpenBLAS executions
2) level3_lock is acting like a master lock or barrier which stops OpenBLAS calls when all the parallel_section are currently busy executing other OpenBLAS calls
3) critical_section_lock is used for updating variables shared between threads executing OpenBLAS calls concurrently and for unlocking of master lock whenever required
4) Unlock master lock only when we have not already exhausted all the parallel_sections and allow another thread with a OpenBLAS call to enter
*/
if(parallel_section_left != 0)
omp_unset_lock(&level3_lock);

omp_unset_lock(&critical_section_lock);

#elif defined(OS_WINDOWS)
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
pthread_mutex_lock(&level3_lock);
#endif

#ifdef USE_ALLOC_HEAP
Expand Down Expand Up @@ -730,12 +764,24 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
free(job);
#endif

#ifndef USE_OPENMP
#ifndef OS_WINDOWS
pthread_mutex_unlock(&level3_lock);
#else
#ifdef USE_OPENMP
omp_set_lock(&critical_section_lock);
parallel_section_left++;

/*
Unlock master lock only when all the parallel_sections are already exhausted and one of the thread has completed its OpenBLAS call
otherwise just increment the parallel_section_left
The master lock is only locked when we have exhausted all the parallel_sections, So only unlock it then and otherwise just increment the count
*/
if(parallel_section_left == 1)
omp_unset_lock(&level3_lock);

omp_unset_lock(&critical_section_lock);

#elif defined(OS_WINDOWS)
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
#else
pthread_mutex_unlock(&level3_lock);
#endif

return 0;
Expand Down
4 changes: 0 additions & 4 deletions driver/others/blas_server_omp.c
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
}
#endif

while(true) {
for(i=0; i < MAX_PARALLEL_NUMBER; i++) {
#ifdef HAVE_C11
_Bool inuse = false;
Expand All @@ -409,9 +408,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
break;
}
}
if(i != MAX_PARALLEL_NUMBER)
break;
}

if (openblas_omp_adaptive_env() != 0) {
#pragma omp parallel for num_threads(num) schedule(OMP_SCHED)
Expand Down

0 comments on commit bc19101

Please sign in to comment.