diff --git a/Makefile.rule b/Makefile.rule index 8dbf5eab63..fcc826c490 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -307,4 +307,4 @@ COMMON_PROF = -pg # BUILD_COMPLEX16 = 1 # # End of user configuration -# +# \ No newline at end of file diff --git a/Makefile.system b/Makefile.system index 0088eaff5e..ac0a0e252b 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1707,11 +1707,7 @@ endif LIBDLLNAME = $(LIBPREFIX).dll IMPLIBNAME = lib$(LIBNAMEBASE).dll.a -ifneq ($(OSNAME), AIX) LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so) -else -LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.a) -endif LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib) LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def) LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp) @@ -1859,4 +1855,4 @@ ifneq ($(OSNAME), SunOS) SUNPATH = /opt/sunstudio12.1 else SUNPATH = /opt/SUNWspro -endif +endif \ No newline at end of file diff --git a/cmake/system.cmake b/cmake/system.cmake index bc87f7b448..60121b3226 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -799,4 +799,4 @@ endif () # export CUCC # export CUFLAGS # export CULIB -#endif +#endif \ No newline at end of file diff --git a/common.h b/common.h index 1f0b9e533a..00695b2b63 100644 --- a/common.h +++ b/common.h @@ -940,4 +940,4 @@ extern int gotoblas_profile; #endif /* __cplusplus */ -#endif +#endif \ No newline at end of file diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index c9ecf73e8a..fcb141a46e 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -540,18 +540,34 @@ static int round_up(int remainder, int width, int multiple) return width; } - static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IFLOAT *sb, BLASLONG nthreads_m, BLASLONG nthreads_n) { -#ifndef USE_OPENMP -#ifndef OS_WINDOWS -static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER; +#ifdef USE_OPENMP + static omp_lock_t level3_lock, critical_section_lock; + static volatile BLASLONG init_lock = 0, omp_lock_initialized = 0, + parallel_section_left = MAX_PARALLEL_NUMBER; + + // Lock initialization; Todo : Maybe this part can be moved to blas_init() in blas_server_omp.c + while(omp_lock_initialized == 0) + { + blas_lock(&init_lock); + { + if(omp_lock_initialized == 0) + { + omp_init_lock(&level3_lock); + omp_init_lock(&critical_section_lock); + omp_lock_initialized = 1; + } + blas_unlock(&init_lock); + } + } +#elif defined(OS_WINDOWS) + CRITICAL_SECTION level3_lock; + InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); #else -CRITICAL_SECTION level3_lock; -InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); -#endif + static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER; #endif blas_arg_t newarg; @@ -599,12 +615,21 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); #endif #endif -#ifndef USE_OPENMP -#ifndef OS_WINDOWS -pthread_mutex_lock(&level3_lock); +#ifdef USE_OPENMP + omp_set_lock(&level3_lock); + omp_set_lock(&critical_section_lock); + + parallel_section_left--; + + if(parallel_section_left != 0) + omp_unset_lock(&level3_lock); + + omp_unset_lock(&critical_section_lock); + +#elif defined(OS_WINDOWS) + EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); #else -EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); -#endif + pthread_mutex_lock(&level3_lock); #endif #ifdef USE_ALLOC_HEAP @@ -732,12 +757,19 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); free(job); #endif -#ifndef USE_OPENMP -#ifndef OS_WINDOWS - pthread_mutex_unlock(&level3_lock); -#else +#ifdef USE_OPENMP + omp_set_lock(&critical_section_lock); + parallel_section_left++; + + if(parallel_section_left == 1) + omp_unset_lock(&level3_lock); + + omp_unset_lock(&critical_section_lock); + +#elif defined(OS_WINDOWS) LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock); -#endif +#else + pthread_mutex_unlock(&level3_lock); #endif return 0; diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 4d3d1e0de1..67581144bb 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -407,7 +407,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){ } #endif - while(true) { for(i=0; i < MAX_PARALLEL_NUMBER; i++) { #ifdef HAVE_C11 _Bool inuse = false; @@ -420,9 +419,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){ break; } } - if(i != MAX_PARALLEL_NUMBER) - break; - } if (openblas_omp_adaptive_env() != 0) { #pragma omp parallel for num_threads(num) schedule(OMP_SCHED) @@ -455,4 +451,4 @@ if (openblas_omp_adaptive_env() != 0) { return 0; } -#endif +#endif \ No newline at end of file