Skip to content

Commit

Permalink
Merge branch 'OpenMathLib:develop' into m3m_exprec
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-frbg authored Dec 30, 2024
2 parents d04686a + 36b0fb3 commit c125866
Show file tree
Hide file tree
Showing 76 changed files with 1,125 additions and 264 deletions.
17 changes: 1 addition & 16 deletions .github/workflows/dynamic_arch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ jobs:
strategy:
fail-fast: false
matrix:
msystem: [UCRT64, MINGW32, CLANG64, CLANG32]
msystem: [UCRT64, MINGW32, CLANG64]
idx: [int32, int64]
build-type: [Release]
include:
Expand All @@ -174,14 +174,6 @@ jobs:
idx: int32
target-prefix: mingw-w64-clang-x86_64
fc-pkg: fc
# Compiling with Flang 16 seems to cause test errors on machines
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
no-avx512-flags: -DNO_AVX512=1
- msystem: CLANG32
idx: int32
target-prefix: mingw-w64-clang-i686
fc-pkg: cc
c-lapack-flags: -DC_LAPACK=ON
- msystem: UCRT64
idx: int64
idx64-flags: -DBINARY=64 -DINTERFACE64=1
Expand All @@ -192,9 +184,6 @@ jobs:
idx64-flags: -DBINARY=64 -DINTERFACE64=1
target-prefix: mingw-w64-clang-x86_64
fc-pkg: fc
# Compiling with Flang 16 seems to cause test errors on machines
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
no-avx512-flags: -DNO_AVX512=1
- msystem: UCRT64
idx: int32
target-prefix: mingw-w64-ucrt-x86_64
Expand All @@ -203,8 +192,6 @@ jobs:
exclude:
- msystem: MINGW32
idx: int64
- msystem: CLANG32
idx: int64

defaults:
run:
Expand Down Expand Up @@ -280,8 +267,6 @@ jobs:
-DNUM_THREADS=64 \
-DTARGET=CORE2 \
${{ matrix.idx64-flags }} \
${{ matrix.c-lapack-flags }} \
${{ matrix.no-avx512-flags }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

cmake_minimum_required(VERSION 3.16.0)

set (CMAKE_ASM_SOURCE_FILE_EXTENSIONS "S")
project(OpenBLAS C ASM)

set(OpenBLAS_MAJOR_VERSION 0)
Expand Down
3 changes: 3 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,3 +229,6 @@ In chronological order:

* Christopher Daley <https://github.com/cdaley>
* [2024-01-24] Optimize GEMV forwarding on ARM64 systems

* Aniket P. Garade <https://github.com/garadeaniket> Sushil Pratap Singh <https://github.com/SushilPratap04> Juliya James <https://github.com/Juliya32>
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE
27 changes: 27 additions & 0 deletions Makefile.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -351,4 +351,31 @@ endif

endif

else
# NVIDIA HPC options necessary to enable SVE in the compiler
ifeq ($(CORE), THUNDERX2T99)
CCOMMON_OPT += -tp=thunderx2t99
FCOMMON_OPT += -tp=thunderx2t99
endif
ifeq ($(CORE), NEOVERSEN1)
CCOMMON_OPT += -tp=neoverse-n1
FCOMMON_OPT += -tp=neoverse-n1
endif
ifeq ($(CORE), NEOVERSEV1)
CCOMMON_OPT += -tp=neoverse-v1
FCOMMON_OPT += -tp=neoverse-v1
endif
ifeq ($(CORE), NEOVERSEV2)
CCOMMON_OPT += -tp=neoverse-v2
FCOMMON_OPT += -tp=neoverse-v2
endif
ifeq ($(CORE), ARMV8SVE)
CCOMMON_OPT += -tp=neoverse-v2
FCOMMON_OPT += -tp=neoverse-v2
endif
ifeq ($(CORE), ARMV9SVE)
CCOMMON_OPT += -tp=neoverse-v2
FCOMMON_OPT += -tp=neoverse-v2
endif

endif
17 changes: 12 additions & 5 deletions Makefile.install
Original file line number Diff line number Diff line change
Expand Up @@ -191,22 +191,29 @@ endif
#Generating OpenBLASConfig.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "file(REAL_PATH \"../../..\" _OpenBLAS_ROOT_DIR BASE_DIRECTORY \$${CMAKE_CURRENT_LIST_DIR} )" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_INCLUDE_DIRS \$${_OpenBLAS_ROOT_DIR}/include)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"

ifneq ($(NO_SHARED),1)
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/bin/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
ifeq ($(OSNAME), Darwin)
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
@echo "add_library(OpenBLAS::OpenBLAS SHARED IMPORTED)"
@echo "target_include_directories(OpenBLAS::OpenBLAS INTERFACE \$${OpenBLAS_INCLUDE_DIRS})"
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
@echo "set_property(TARGET OpenBLAS::OpenBLAS PROPERTY IMPORTED_LOCATION \$${OpenBLAS_LIBRARIES})"
@echo "set_property(TARGET OpenBLAS::OpenBLAS PROPERTY IMPORTED_IMPLIB \$${_OpenBLAS_ROOT_DIR}/lib/libopenblas.lib)"
endif
else
#only static
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
#Generating OpenBLASConfigVersion.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
Expand Down
7 changes: 7 additions & 0 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -1613,6 +1613,13 @@ NO_AFFINITY = 1
endif
endif

ifeq ($(ARCH), POWER)
ifeq ($(DEBUG), 1)
CCOMMON_OPT := $(filter-out -O%, $(CCOMMON_OPT)) -O0
FCOMMON_OPT := $(filter-out -O%, $(FCOMMON_OPT)) -O0
endif
endif

ifdef NO_AFFINITY
ifeq ($(NO_AFFINITY), 0)
override undefine NO_AFFINITY
Expand Down
44 changes: 30 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,36 +15,44 @@ OSUOSL IBMZ-CI [![Build Status](http://ibmz-ci.osuosl.org/buildStatus/icon?job=O

OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version.

Please read the documentation in the OpenBLAS folder: <https://github.com/OpenMathLib/OpenBLAS/docs>.
For more information about OpenBLAS, please see:

- The documentation at [openmathlib.org/OpenBLAS/docs/](http://www.openmathlib.org/OpenBLAS/docs),
- The home page at [openmathlib.org/OpenBLAS/](http://www.openmathlib.org/OpenBLAS).

For a general introduction to the BLAS routines, please refer to the extensive documentation of their reference implementation hosted at netlib:
<https://www.netlib.org/blas>. On that site you will likewise find documentation for the reference implementation of the higher-level library LAPACK - the **L**inear **A**lgebra **Pack**age that comes included with OpenBLAS. If you are looking for a general primer or refresher on Linear Algebra, the set of six
20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare <https://ocw.mit.edu/resources/res-18-010-a-2020-vision-of-linear-algebra-spring-2020/> or Youtube <https://www.youtube.com/playlist?list=PLUl4u3cNGP61iQEFiWLE21EJCxwmWvvek> may be helpful.
20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare [here](https://ocw.mit.edu/resources/res-18-010-a-2020-vision-of-linear-algebra-spring-2020/) or YouTube [here](https://www.youtube.com/playlist?list=PLUl4u3cNGP61iQEFiWLE21EJCxwmWvvek) may be helpful.

## Binary Packages

We provide official binary packages for the following platform:

* Windows x86/x86_64

You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the Releases section of the github project page, [https://github.com/OpenMathLib/OpenBLAS/releases](https://github.com/OpenMathLib/OpenBLAS/releases).
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the [Releases section of the GitHub project page](https://github.com/OpenMathLib/OpenBLAS/releases).

OpenBLAS is also packaged for many package managers - see [the installation section of the docs](http://www.openmathlib.org/OpenBLAS/docs/install/) for details.

## Installation from Source

Download from project homepage, https://github.com/OpenMathLib/OpenBLAS/, or check out the code
using Git from https://github.com/OpenMathLib/OpenBLAS.git. (If you want the most up to date version, be
sure to use the develop branch - master is several years out of date due to a change of maintainership.)
Buildtime parameters can be chosen in Makefile.rule, see there for a short description of each option.
Most can also be given directly on the make or cmake command line.
Obtain the source code from https://github.com/OpenMathLib/OpenBLAS/. Note that the default branch
is `develop` (a `master` branch is still present, but far out of date).

Build-time parameters can be chosen in `Makefile.rule`, see there for a short description of each option.
Most options can also be given directly on the command line as parameters to your `make` or `cmake` invocation.

### Dependencies

Building OpenBLAS requires the following to be installed:

* GNU Make or CMake
* A C compiler, e.g. GCC or Clang
* A C compiler, e.g. GCC or Clang
* A Fortran compiler (optional, for LAPACK)

In general, using a recent version of the compiler is strongly recommended.
If a Fortran compiler is not available, it is possible to compile an older version of the included LAPACK
that has been machine-translated to C.

### Normal compile

Expand All @@ -60,6 +68,9 @@ For building with `cmake`, the usual conventions apply, i.e. create a build dire
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any
build options you plan to set.

For more details, see the [Building from source](http://www.openmathlib.org/OpenBLAS/docs/install/#building-from-source)
section in the docs.

### Cross compile

Set `CC` and `FC` to point to the cross toolchains, and if you use `make`, also set `HOSTCC` to your host C compiler.
Expand All @@ -76,10 +87,12 @@ Examples:
make CC="i686-w64-mingw32-gcc -Bstatic" FC="i686-w64-mingw32-gfortran -static-libgfortran" TARGET=HASWELL BINARY=32 CROSS=1 NUM_THREADS=20 CONSISTENT_FPCSR=1 HOSTCC=gcc
```

You can find instructions for other cases both in the "Supported Systems" section below and in the docs folder. The .yml scripts included with the sources (which contain the
You can find instructions for other cases both in the "Supported Systems" section below and in
the [Building from source docs](http://www.openmathlib.org/OpenBLAS/docs/install).
The `.yml` scripts included with the sources (which contain the
build scripts for the "continuous integration" (CI) build tests automatically run on every proposed change to the sources) may also provide additional hints.

When compiling for a more modern CPU TARGET of the same architecture, e.g. TARGET=SKYLAKEX on a HASWELL host, option "CROSS=1" can be used to suppress the automatic invocation of the tests at the end of the build.
When compiling for a more modern CPU target of the same architecture, e.g. `TARGET=SKYLAKEX` on a `HASWELL` host, option `CROSS=1` can be used to suppress the automatic invocation of the tests at the end of the build.

### Debug version

Expand Down Expand Up @@ -325,11 +338,14 @@ Please see Changelog.txt.
## Troubleshooting
* Please read the [FAQ](https://github.com/OpenMathLib/OpenBLAS/docs/faq,md) in the docs folder first.
* Please read the [FAQ](http://www.openmathlib.org/OpenBLAS/docs/faq) section of the docs first.
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
Clang 3.0 will generate the wrong AVX binary code.
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake AVX512 kernels.
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake/CooperLake AVX512 kernels
* Please use LLVM version 18 and above (version 19 and above on Windows) if you plan to use
its new flang compiler for Fortran
* Please use GCC version 11 and above to compile OpenBLAS on the POWER architecture
* The number of CPUs/cores should be less than or equal to 256. On Linux `x86_64` (`amd64`),
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build
the library with `BIGNUMA=1`.
Expand All @@ -350,4 +366,4 @@ Please see Changelog.txt.
## Donation
Please read [this wiki page](https://github.com/xianyi/OpenBLAS/wiki/Donation).
Please see [the donations section](http://www.openmathlib.org/OpenBLAS/docs/about/#donations) in the docs.
12 changes: 11 additions & 1 deletion benchmark/pybench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,17 @@ have all what it takes to build OpenBLAS from source, plus `python` and
$ python -mpip install numpy meson ninja pytest pytest-benchmark
```

The benchmark syntax is consistent with that of `pytest-benchmark` framework. The incantation to run the suite locally is `$ pytest benchmark/pybench/benchmarks/test_blas.py`.
The Meson build system looks for the installed OpenBLAS using pkgconfig, so the openblas.pc created during the OpenBLAS build needs
to be somewhere on the search path of pkgconfig or in a folder pointed to by the environment variable PKG_CONFIG_PATH.

If you want to build the benchmark suite using flang (or flang-new) instead of gfortran for the Fortran parts, you currently need
to edit the meson.build file and change the line `'fortran_std=legacy'` to `'fortran_std=none'` to work around an incompatibility
between Meson and flang.

If you are building and running the benchmark under MS Windows, it may be necessary to copy the generated openblas_wrap module from
your build folder to the `benchmarks` folder.

The benchmark syntax is consistent with that of `pytest-benchmark` framework. The incantation to run the suite locally is `$ pytest benchmark/pybench/benchmarks/bench_blas.py`.

An ASV compatible benchmark suite is planned but currently not implemented.

3 changes: 3 additions & 0 deletions c_check
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ hostarch=`uname -m | sed -e 's/i.86/x86/'`
if [ "$hostos" = "AIX" ] || [ "$hostos" = "SunOS" ]; then
hostarch=`uname -p`
fi
if [ "$hostarch" = "evbarm" ]; then
hostarch=`uname -p`
fi
case "$hostarch" in
amd64) hostarch=x86_64 ;;
arm*) [ "$hostarch" = "arm64" ] || hostarch='arm' ;;
Expand Down
6 changes: 4 additions & 2 deletions cmake/f_check.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,15 @@ if (NOT ONLY_CBLAS)

# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
# TODO: set FEXTRALIB flags a la f_check?

if (NOT (${CMAKE_SYSTEM_NAME} MATCHES "Windows" AND x${CMAKE_Fortran_COMPILER_ID} MATCHES "IntelLLVM"))
set(BU "_")
file(APPEND ${TARGET_CONF_TEMP}
"#define BUNDERSCORE _\n"
"#define NEEDBUNDERSCORE 1\n"
"#define NEED2UNDERSCORES 0\n")

else ()
set (FCOMMON_OPT "${FCOMMON_OPT} /fp:precise /recursive /names:lowercase /assume:nounderscore")
endif()
else ()

#When we only build CBLAS, we set NOFORTRAN=2
Expand Down
25 changes: 25 additions & 0 deletions cmake/fc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,31 @@ if (${F_COMPILER} STREQUAL "CRAY")
endif ()
endif ()

if (${F_COMPILER} STREQUAL "NAGFOR")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_NAG")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
# Options from Makefile.system
# -dcfuns: Enable non-standard double precision complex intrinsic functions
# -ieee=full: enables all IEEE arithmetic facilities including non-stop arithmetic.
# -w=obs: Suppress warning messages about obsolescent features
# -thread_safe: Compile code for safe execution in a multi-threaded environment.
# -recursive: Specifies that procedures are RECURSIVE by default.
set(FCOMMON_OPT "${FCOMMON_OPT} -dcfuns -recursive -ieee=full -w=obs -thread_safe")
# Options from Reference-LAPACK
# Suppress compiler banner and summary
set(FCOMMON_OPT "${FCOMMON_OPT} -quiet")
# Disable other common warnings
# -w=x77: Suppress warning messages about Fortran 77 features
# -w=ques: Suppress warning messages about questionable usage
# -w=unused: Suppress warning messages about unused variables
set(FCOMMON_OPT "${FCOMMON_OPT} -w=x77 -w=ques -w=unused")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()

# from the root Makefile - this is for lapack-netlib to compile the correct secnd file.
if (${F_COMPILER} STREQUAL "GFORTRAN")
set(TIMER "INT_ETIME")
Expand Down
7 changes: 6 additions & 1 deletion cmake/lapack.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,12 @@ foreach (LA_FILE ${LA_GEN_SRC})
endforeach ()

if (NOT C_LAPACK)
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")
# The below line is duplicating Fortran flags but NAG has a few flags
# that cannot be specified twice. It's possible this is not needed for
# any compiler, but for safety, we only turn off for NAG
if (NOT ${F_COMPILER} STREQUAL "NAGFOR")
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")
endif ()
if (${F_COMPILER} STREQUAL "GFORTRAN")
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS} -fno-tree-vectorize")
endif()
Expand Down
6 changes: 4 additions & 2 deletions cmake/prebuild.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ set(TARGET_CONF_TEMP "${PROJECT_BINARY_DIR}/${TARGET_CONF}.tmp")

# c_check
set(FU "")
if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang"))
if (APPLE OR (MSVC AND NOT (${CMAKE_C_COMPILER_ID} MATCHES "Clang" OR ${CMAKE_C_COMPILER_ID} MATCHES "IntelLLVM")))
set(FU "_")
endif()
if(MINGW AND NOT MINGW64)
Expand Down Expand Up @@ -1433,7 +1433,9 @@ else(NOT CMAKE_CROSSCOMPILING)
message(STATUS "MSVC")
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
else()
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
endif()
if (DEFINED TARGET_CORE)
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_${TARGET_CORE})
endif ()
Expand Down
Loading

0 comments on commit c125866

Please sign in to comment.