Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[rocmlibs] Use tensile while building rocblas #1128

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions bin/rocmlibs/build_hipblas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ for _arch in $GFXLIST ; do
fi
_sep=";"
done
#export CC=$AOMP_INSTALL_DIR/bin/clang
#export CXX=$AOMP_INSTALL_DIR/bin/clang++
export CC=$LLVM_INSTALL_LOC/bin/clang
export CXX=$LLVM_INSTALL_LOC/bin/clang++
export FC=$LLVM_INSTALL_LOC/bin/flang
export ROCM_DIR=$AOMP_INSTALL_DIR
export ROCM_PATH=$AOMP_INSTALL_DIR
export PATH=$AOMP_SUPP/cmake/bin:$AOMP_INSTALL_DIR/bin:$PATH
Expand Down
22 changes: 19 additions & 3 deletions bin/rocmlibs/build_powerinfer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ export CXXFLAGS="-I$AOMP_INSTALL_DIR/include -D__HIP_PLATFORM_AMD__=1"

MYCMAKEOPTS="-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DLLAMA_HIPBLAS=ON \
-DCMAKE_PREFIX_PATH=$AOMP_INSTALL_DIR/lib/cmake \
-DCMAKE_PREFIX_PATH=$LLVM_INSTALL_LOC/lib/cmake \
-DCMAKE_INSTALL_PREFIX=$AOMP_INSTALL_DIR/PowerInfer \
-DCMAKE_C_COMPILER=$AOMP/bin/clang \
-DCMAKE_CXX_COMPILER=$AOMP/bin/clang++ \
-DCMAKE_C_COMPILER=$LLVM_INSTALL_LOC/bin/clang \
-DCMAKE_CXX_COMPILER=$LLVM_INSTALL_LOC/bin/clang++ \
-DHIP_PLATFORM=amd \
-DAMDGPU_TARGETS="\'$GFXSEMICOLONS\'" \
"
Expand Down Expand Up @@ -115,6 +115,22 @@ if [ "$1" == "install" ] ; then
echo
echo "SUCCESSFUL INSTALL to $AOMP_INSTALL_DIR/PowerInfer"
echo
pushd $_repo_dir
cd gguf-py
echo "Installing gguf python package"
pip install .
if [ $? != 0 ] ; then
echo "ERROR pip install failed for PowerInfer/gguf-py package"
exit 1
fi
cd ../powerinfer-py
echo "Installing powerinfer python package"
pip install .
if [ $? != 0 ] ; then
echo "ERROR pip install failed for PowerInfer/powerinfer-py package"
exit 1
fi
popd
removepatch $_repo_dir
else
echo
Expand Down
38 changes: 14 additions & 24 deletions bin/rocmlibs/build_rocblas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,17 @@ thisdir=`dirname $realpath`
_repo_dir=$AOMP_REPOS/rocmlibs/rocBLAS
_build_dir=$_repo_dir/build

AOMP_BUILD_TENSILE=${AOMP_BUILD_TENSILE:-0}

if [ $AOMP_BUILD_TENSILE == 0 ] ; then
echo
echo "WARNING: Building rocblas without Tensile"
_local_tensile_opt=""
else
_tensile_repo_dir=$AOMP_REPOS/rocmlibs/Tensile
_cwd=$PWD
cd $_tensile_repo_dir
git checkout release/rocm-rel-6.2
git pull
# FIXME: We should get the Tensile hash from rocBLAS/tensile_tag.txt
#git checkout 97e0cfc2c8cb87a1e38901d99c39090dc4181652
git checkout 66ab453c3fcfc3f3816e3383cb0eccb528a1b5a9
cd $_cwd
_local_tensile_opt="--test_local_path=$_tensile_repo_dir"
fi
_tensile_repo_dir=$AOMP_REPOS/rocmlibs/Tensile
_cwd=$PWD
cd $_tensile_repo_dir
git checkout release/rocm-rel-6.2
git pull
# Read the commit SHA from the file rocBLAS/tensile_tag.txt
_tensile_commit_sha=$(cat $_repo_dir/tensile_tag.txt)
# Checkout the specific commit SHA
git checkout $_tensile_commit_sha
cd $_cwd
_local_tensile_opt="--test_local_path=$_tensile_repo_dir"

patchrepo $_repo_dir

Expand Down Expand Up @@ -100,11 +93,9 @@ if [ "$1" != "install" ] ; then
echo rm -rf $_build_dir
rm -rf $_build_dir
mkdir -p $_build_dir
if [ $AOMP_BUILD_TENSILE != 0 ] ; then
# Cleanup possible old tensile build area
echo rm -rf $_tensile_repo_dir/build
rm -rf $_tensile_repo_dir/build
fi
# Cleanup possible old tensile build area
echo rm -rf $_tensile_repo_dir/build
rm -rf $_tensile_repo_dir/build
else
if [ ! -d $_build_dir ] ; then
echo "ERROR: The build directory $_build_dir"
Expand All @@ -126,7 +117,6 @@ $_local_tensile_opt \
--install_invoked \
--build_dir $_build_dir \
--src_path=$_repo_dir \
--no_tensile \
--jobs=$AOMP_JOB_THREADS \
--architecture="""$_gfxlist""" \
"
Expand Down
10 changes: 5 additions & 5 deletions bin/rocmlibs/build_rocprim.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ export PATH=$AOMP_SUPP/cmake/bin:$AOMP/bin:$PATH
export USE_PERL_SCRIPTS=1
export NUM_PROC=$AOMP_JOB_THREADS
export AMDGPU_TARGETS="$GFXSEMICOLONS"
export CXXFLAGS="-I$AOMP_INSTALL_DIR/include -D__HIP_PLATFORM_AMD__=1"
export CXXFLAGS="-I$LLVM_INSTALL_LOC/include -D__HIP_PLATFORM_AMD__=1"
export LDFLAGS="-fPIC"
export CMAKE_PREFIX_PATH="$AOMP_INSTALL_DIR/lib/cmake"
export CMAKE_PREFIX_PATH="$LLVM_INSTALL_LOC/lib/cmake"
MYCMAKEOPTS="-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DCMAKE_CXX_COMPILER=$AOMP_INSTALL_DIR/bin/clang++ \
-DHIP_COMPILER=$AOMP_INSTALL_DIR/bin/clang \
-DCMAKE_CXX_COMPILER=$LLVM_INSTALL_LOC/bin/clang++ \
-DHIP_COMPILER=$LLVM_INSTALL_LOC/bin/clang \
-DHIP_CXX_COMPILER=$AOMP_INSTALL_DIR/bin/hipcc \
-DCMAKE_PREFIX_PATH=$AOMP_INSTALL_DIR/lib/cmake \
-DCMAKE_PREFIX_PATH=$LLVM_INSTALL_LOC/lib/cmake \
-DCMAKE_INSTALL_PREFIX=$AOMP_INSTALL_DIR \
-DAMDGPU_TARGETS="\'$GFXSEMICOLONS\'"
-DROCM_DIR=$ROCM_DIR \
Expand Down
4 changes: 2 additions & 2 deletions bin/rocmlibs/build_rocsolver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ for _arch in $GFXLIST ; do
fi
_sep=";"
done
export CC=$AOMP_INSTALL_DIR/bin/clang
export CXX=$AOMP_INSTALL_DIR/bin/clang++
export CC=$LLVM_INSTALL_LOC/bin/clang
export CXX=$LLVM_INSTALL_LOC/bin/clang++
export ROCM_DIR=$AOMP_INSTALL_DIR
export ROCM_PATH=$AOMP_INSTALL_DIR
export PATH=$AOMP_SUPP/cmake/bin:$AOMP_INSTALL_DIR/bin:$PATH
Expand Down
6 changes: 3 additions & 3 deletions bin/rocmlibs/build_rocsparse.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ for _arch in $GFXLIST ; do
fi
_sep=";"
done
export CC=$AOMP_INSTALL_DIR/bin/clang
export CXX=$AOMP_INSTALL_DIR/bin/clang++
export CC=$LLVM_INSTALL_LOC/bin/clang
export CXX=$LLVM_INSTALL_LOC/bin/clang++
export ROCM_DIR=$AOMP_INSTALL_DIR
export ROCM_PATH=$AOMP_INSTALL_DIR
export PATH=$AOMP_SUPP/cmake/bin:$AOMP_INSTALL_DIR/bin:$PATH
export HIP_USE_PERL_SCRIPTS=1
export USE_PERL_SCRIPTS=1
export NUM_PROC=$AOMP_JOB_THREADS
export CXXFLAGS="-I$AOMP_INSTALL_DIR/include -D__HIP_PLATFORM_AMD__=1"
export CXXFLAGS="-I$LLVM_INSTALL_LOC/include -D__HIP_PLATFORM_AMD__=1"
export LDFLAGS="-fPIC"
if [ "$AOMP_USE_CCACHE" != 0 ] ; then
_ccache_bin=`which ccache`
Expand Down
49 changes: 28 additions & 21 deletions bin/rocmlibs/patches/rocblas.patch
Original file line number Diff line number Diff line change
@@ -1,60 +1,67 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9424c65b..e454bc07 100644
index f70de1cf..5eedfcc3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -111,7 +111,7 @@ list( APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/llvm ${ROCM_PATH} ${ROCM_PATH}/hip /
set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
-set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201")
+set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1103;gfx90c")

if(ROCM_PLATFORM_VERSION)
if(${ROCM_PLATFORM_VERSION} VERSION_LESS 5.7.0)
@@ -112,7 +112,7 @@ if (NOT BUILD_ADDRESS_SANITIZER)
set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
- set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201")
+ set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx1103;gfx90c")
else()
set( TARGET_LIST_ROCM_5.6 "gfx908:xnack+;gfx90a:xnack+")
set( TARGET_LIST_ROCM_5.7 "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+")
diff --git a/library/src/handle.cpp b/library/src/handle.cpp
index 44be1ee1..8f4af5c5 100644
index ea1fa1a7..cb14874c 100644
--- a/library/src/handle.cpp
+++ b/library/src/handle.cpp
@@ -101,6 +101,14 @@ static Processor getActiveArch(int deviceId)
@@ -106,6 +106,10 @@ static Processor getActiveArch(int deviceId)
{
return Processor::gfx90a;
}
+ else if(deviceString.find("gfx90c") != std::string::npos)
+ {
+ return Processor::gfx90c;
+ }
else if(deviceString.find("gfx940") != std::string::npos)
{
return Processor::gfx940;
@@ -146,6 +150,10 @@ static Processor getActiveArch(int deviceId)
{
return Processor::gfx1102;
}
+ else if(deviceString.find("gfx1103") != std::string::npos)
+ {
+ return Processor::gfx1103;
+ }
else if(deviceString.find("gfx940") != std::string::npos)
else if(deviceString.find("gfx1151") != std::string::npos)
{
return Processor::gfx940;
return Processor::gfx1151;
diff --git a/library/src/include/handle.hpp b/library/src/include/handle.hpp
index b0717b5d..8df7f0f5 100644
index 94d18c7b..c47cefe8 100644
--- a/library/src/include/handle.hpp
+++ b/library/src/include/handle.hpp
@@ -79,6 +79,7 @@ enum class Processor : int
@@ -82,6 +82,7 @@ enum class Processor : int
gfx906 = 906,
gfx908 = 908,
gfx90a = 910,
+ gfx90c = 912,
gfx940 = 940,
gfx941 = 941,
gfx942 = 942,
@@ -93,6 +94,7 @@ enum class Processor : int
@@ -96,6 +97,7 @@ enum class Processor : int
gfx1100 = 1100,
gfx1101 = 1101,
gfx1102 = 1102,
+ gfx1103 = 1103,
gfx1151 = 1151,
gfx1200 = 1200,
gfx1201 = 1201
};
diff --git a/library/src/tensile_host.cpp b/library/src/tensile_host.cpp
index 53bebba1..f1367429 100644
index f7d4bdb1..47d69c0d 100644
--- a/library/src/tensile_host.cpp
+++ b/library/src/tensile_host.cpp
@@ -266,6 +266,10 @@ namespace
@@ -271,6 +271,10 @@ namespace
{
return Tensile::LazyLoadingInit::gfx90a;
}
Expand All @@ -65,7 +72,7 @@ index 53bebba1..f1367429 100644
else if(deviceString.find("gfx940") != std::string::npos)
{
return Tensile::LazyLoadingInit::gfx940;
@@ -306,6 +310,10 @@ namespace
@@ -311,6 +315,10 @@ namespace
{
return Tensile::LazyLoadingInit::gfx1102;
}
Expand Down
6 changes: 3 additions & 3 deletions bin/rocmlibs/patches/rocprim.patch
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bdff5831..55c05947 100644
index 68e9b7bf..049ed365 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -91,7 +91,7 @@ if(NOT USE_HIP_CPU)

if(GPU_TARGETS STREQUAL "all")
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
- TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102"
+ TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx90c;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103"
- TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151"
+ TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx90c;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1151"
)
set(GPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for" FORCE)
endif()
Expand Down
4 changes: 2 additions & 2 deletions bin/rocmlibs/patches/rocsolver.patch
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f857f1e..de6b11e 100644
index 0f88788..95d5ac6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -159,12 +159,14 @@ if(NOT DEFINED AMDGPU_TARGETS)
Expand All @@ -14,6 +14,6 @@ index f857f1e..de6b11e 100644
gfx1101
gfx1102
+ gfx1103
gfx1151
)
set(AMDGPU_TARGETS_INIT
gfx900
14 changes: 7 additions & 7 deletions bin/rocmlibs/patches/rocsparse.patch
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index afcba556..7dc9bf14 100644
index 6a4f9d21..ff9d3635 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -148,14 +148,14 @@ if(COMMAND rocm_check_target_ids)
@@ -162,14 +162,14 @@ if(COMMAND rocm_check_target_ids)
TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+")
else()
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
- TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201")
+ TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1103;gfx90c")
- TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201")
+ TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx90c;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1151;gfx1200;gfx1201")
endif()
else()
# Use target ID syntax if supported for AMDGPU_TARGETS
if(TARGET_ID_SUPPORT)
- set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201")
+ set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1103;gfx90c")
- set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201")
+ set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90c;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1151;gfx1200;gfx1201")
else()
- set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908")
+ set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908;gfx1103;gfx90c")
+ set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908;gfx90c;gfx1103")
endif()
endif()

5 changes: 3 additions & 2 deletions bin/rocmlibs/test_powerinfer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ if [ ! -d $MODEL_DIR ]; then
fi
pushd $MODEL_DIR

pushd $MODEL_DIR

# Install huggingface-cli to download the PowerInfer GGUF models
pip install -U "huggingface_hub[cli]"
export PATH=$PATH:$HOME/.local/bin
Expand Down Expand Up @@ -64,7 +66,7 @@ popd
PREDICTION_LENGTH=128
THREAD_COUNT=8
PROMPT="Once upon a time"
VRAM_BUDGET=4
VRAM_BUDGET=16

MODEL_NAME_BASE=$(basename $MODEL_NAME)

Expand All @@ -83,7 +85,6 @@ fi
$POWERINFER_INSTALL_DIR/bin/main -m $MODEL_DIR/$MODEL_NAME -n $PREDICTION_LENGTH -t $THREAD_COUNT -p "$PROMPT" --vram-budget $VRAM_BUDGET 2>&1 | tee $LOG_FILE_NAME.log

set +x

# Print the log file name and parameters with which the model was run
echo "Inference via PowerInfer completed for the following:"
echo "Model: $MODEL_NAME"
Expand Down