-
Notifications
You must be signed in to change notification settings - Fork 139
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* initial * Cmake file * successfull compilation but validation failed * Cmake * update * gpu validation * gemm universal * gemm universal sk update * sk bf16 universal instance * gemm_universal_streamk.hpp * only build for gfx94 * Cmakelist * profiler update, bf16 sk only works at gfx42 * clang * clang * clang all * no need flags * cmake script * delete comment * gemm universal sk fix * clang * profiler fix * clang * update * update * delete comment * code formatting * cmake * fix instance * clang * argument supported * argument supported and clang * update * fix * removing unnecessary comments * clang formatting * Update library/src/tensor_operation_instance/gpu/CMakeLists.txt Co-authored-by: afagaj <[email protected]> * CopyRight Comment 2025 * clang reformatting * copy right 2025 --------- Co-authored-by: Emin Ozturk <[email protected]> Co-authored-by: root <[email protected]> Co-authored-by: Muhammed Emin Ozturk <[email protected]> Co-authored-by: root <[email protected]> Co-authored-by: Muhammed Emin Ozturk <[email protected]> Co-authored-by: Muhammed Emin Ozturk <[email protected]> Co-authored-by: Muhammed Emin Ozturk <[email protected]> Co-authored-by: Emin Ozturk <[email protected]> Co-authored-by: Muhammed Emin Ozturk <[email protected]> Co-authored-by: afagaj <[email protected]>
- Loading branch information
1 parent
1d8e4ec
commit 9e95d54
Showing
51 changed files
with
2,101 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved | ||
|
||
#include "common.hpp" | ||
|
||
#include "ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_streamk_v3.hpp" | ||
|
||
using ADataType = ck::bhalf_t; | ||
using BDataType = ck::bhalf_t; | ||
using CDataType = ck::bhalf_t; | ||
using AccDataType = float; | ||
using CShuffleDataType = ck::bhalf_t; | ||
|
||
using ALayout = Row; | ||
using BLayout = Col; | ||
using CLayout = Row; | ||
|
||
using AElementOp = PassThrough; | ||
using BElementOp = PassThrough; | ||
using CElementOp = PassThrough; | ||
|
||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default; | ||
|
||
// clang-format off | ||
using DeviceGemmV2_Streamk_Instance = | ||
ck::tensor_operation::device::DeviceGemm_Xdl_CShuffle_Streamk_V3< | ||
ALayout, BLayout, CLayout, | ||
ADataType, BDataType, CDataType, AccDataType, CShuffleDataType, | ||
PassThrough, PassThrough, PassThrough, GemmDefault, | ||
256, | ||
128, 128, | ||
64, 8, 8, | ||
16, 16, | ||
4, 4, | ||
S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, | ||
2, 8, 8, 0, | ||
S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, | ||
2, 8, 8, 0, | ||
1, 2, S<1, 32, 1, 8>, 8, | ||
ck::BlockGemmPipelineScheduler::Intrawave,ck::BlockGemmPipelineVersion::v3>; | ||
// clang-format on | ||
|
||
using ReferenceGemmInstance = ck::tensor_operation::host:: | ||
ReferenceGemm<ADataType, BDataType, CDataType, AccDataType, AElementOp, BElementOp, CElementOp>; | ||
|
||
using ReferenceGemmInstanceGPU = ck::tensor_operation::device::ReferenceGemm<ALayout, | ||
BLayout, | ||
CLayout, | ||
ADataType, | ||
BDataType, | ||
CDataType, | ||
AccDataType, | ||
AElementOp, | ||
BElementOp, | ||
CElementOp>; | ||
|
||
#include "run_gemm_example_streamk_v2.inc" | ||
|
||
int main(int argc, char* argv[]) { return !run_gemm_universal_streamk_example(argc, argv); } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
500 changes: 500 additions & 0 deletions
500
library/include/ck/library/tensor_operation_instance/gpu/gemm_universal_streamk.hpp
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.