-
Notifications
You must be signed in to change notification settings - Fork 139
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Universal streamk with atomics (#1360)
* universal streamk with atomics with ckprofiler support. grid_size and streamk strategy are tunable. grid_size of -1 leads to #WGs = maximum occupancy X num_CUs. implementation supports many different streamk policies: 1-tile, 2-tile, 3-tile and 4-tile. streamk strategy of -1 leads to default streamk policy (4-tile). * Update README.md * fixing clang-format issues * removed conflicts in struct members between streamk and universal streamk * corrected arg parsing for streamk and universal streamk * added stream-k policies for 3 tile and 4 tile * fixed argument type issue with parsing cmd args * changes suggested in PR review are made- removing comments and correcting copyright * file permissions updated * added default value support for grid_size and streamk-policy selection set to -1 * print messages for arguments * print messages for arguments * print messages for arguments1
- Loading branch information
Showing
61 changed files
with
5,846 additions
and
2 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include "common.hpp" | ||
|
||
#include "ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_streamk_v3.hpp" | ||
|
||
using ADataType = ck::half_t; | ||
using BDataType = ck::half_t; | ||
using AccDataType = float; | ||
using CShuffleDataType = ck::half_t; | ||
using CDataType = ck::half_t; | ||
|
||
using ALayout = Row; | ||
using BLayout = Row; | ||
using CLayout = Row; | ||
|
||
using AElementOp = PassThrough; | ||
using BElementOp = PassThrough; | ||
using CElementOp = PassThrough; | ||
|
||
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::MNPadding; | ||
|
||
// clang-format off | ||
using DeviceGemmV2_Streamk_Instance = | ||
ck::tensor_operation::device::DeviceGemm_Xdl_CShuffle_Streamk_V3< | ||
ALayout, BLayout, CLayout, | ||
ADataType, BDataType, CDataType, AccDataType, CShuffleDataType, | ||
PassThrough, PassThrough, PassThrough, GemmDefault, | ||
256, | ||
224, 256, | ||
64, 8, 2, | ||
16, 16, | ||
7, 8, | ||
S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, | ||
2, 8, 8, 0, | ||
S<8, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, | ||
1, 8, 2, 0, | ||
1, 2, S<1, 32, 1, 8>, 8, | ||
ck::BlockGemmPipelineScheduler::Intrawave,ck::BlockGemmPipelineVersion::v3>; | ||
// clang-format on | ||
|
||
using ReferenceGemmInstance = ck::tensor_operation::host:: | ||
ReferenceGemm<ADataType, BDataType, CDataType, AccDataType, AElementOp, BElementOp, CElementOp>; | ||
|
||
#include "run_gemm_example_streamk_v2.inc" | ||
|
||
int main(int argc, char* argv[]) { return !run_gemm_universal_streamk_example(argc, argv); } |
Oops, something went wrong.