Skip to content

Commit

Permalink
MI300 rhel and sles roofline binaries (#480)
Browse files Browse the repository at this point in the history
* Roofline bins for MI300 on rhel and sles distributions
Built from rocm-amdgpu-bench, tested on respective distro systems with MI300 hardware.

Signed-off-by: Carrie Fallows <[email protected]>

* Minor modifications removing hardcoded variables in roofline files.

Signed-off-by: Carrie Fallows <[email protected]>

---------

Signed-off-by: Carrie Fallows <[email protected]>
  • Loading branch information
cfallows-amd authored Nov 26, 2024
1 parent 1bc0c8d commit c90b6a4
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 21 deletions.
2 changes: 1 addition & 1 deletion src/roofline.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def empirical_roofline(
"""Generate a set of empirical roofline plots given a directory containing required profiling and benchmarking data"""
# Create arithmetic intensity data that will populate the roofline model
console_debug("roofline", "Path: %s" % self.__run_parameters["workload_dir"])
self.__ai_data = calc_ai(self.__run_parameters["sort_type"], ret_df)
self.__ai_data = calc_ai(self.__mspec, self.__run_parameters["sort_type"], ret_df)

msg = "AI at each mem level:"
for i in self.__ai_data:
Expand Down
39 changes: 19 additions & 20 deletions src/utils/roofline_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@

IMGNAME = "empirRoof"

L2_BANKS = 32 # default assuming mi200, mi300

XMIN = 0.01
XMAX = 1000

Expand Down Expand Up @@ -188,7 +186,7 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data):
# Overlay application performance
# -------------------------------------------------------------------------------------
# Calculate relevant metrics for ai calculation
def calc_ai(sort_type, ret_df):
def calc_ai(mspec, sort_type, ret_df):
"""Given counter data, calculate arithmetic intensity for each kernel in the application."""
df = ret_df["pmc_perf"]
# Sort by top kernels or top dispatches?
Expand Down Expand Up @@ -306,8 +304,8 @@ def calc_ai(sort_type, ret_df):
lds_data += (
(df["SQ_LDS_IDX_ACTIVE"][idx] - df["SQ_LDS_BANK_CONFLICT"][idx])
* 4
* L2_BANKS
) # L2_BANKS = 32 (since assuming mi200 or mi300)
* (mspec.lds_banks_per_cu)
)
except KeyError:
console_debug(
"roofline",
Expand Down Expand Up @@ -338,37 +336,38 @@ def calc_ai(sort_type, ret_df):
)
pass
try:
if df.keys().str.contains("TCC_BUBBLE").sum() > 0:
# MI300 uses TCC_BUBBLE_sum to calculate hbm_data
if mspec.gpu_model == "MI200":
hbm_data += (
(df["TCC_BUBBLE_sum"][idx] * 128)
+ (df["TCC_EA0_RDREQ_32B_sum"][idx] * 32)
(df["TCC_EA_RDREQ_32B_sum"][idx] * 32)
+ (
(
df["TCC_EA0_RDREQ_sum"][idx]
- df["TCC_BUBBLE_sum"][idx]
- df["TCC_EA0_RDREQ_32B_sum"][idx]
)
(df["TCC_EA_RDREQ_sum"][idx] - df["TCC_EA_RDREQ_32B_sum"][idx])
* 64
)
+ (df["TCC_EA_WRREQ_64B_sum"][idx] * 64)
+ (
(df["TCC_EA0_WRREQ_sum"][idx] - df["TCC_EA0_WRREQ_64B_sum"][idx])
(df["TCC_EA_WRREQ_sum"][idx] - df["TCC_EA_WRREQ_64B_sum"][idx])
* 32
)
+ (df["TCC_EA0_WRREQ_64B_sum"][idx] * 64)
)

else:
# Use TCC_BUBBLE_sum to calculate hbm_data
hbm_data += (
(df["TCC_EA_RDREQ_32B_sum"][idx] * 32)
(df["TCC_BUBBLE_sum"][idx] * 128)
+ (df["TCC_EA0_RDREQ_32B_sum"][idx] * 32)
+ (
(df["TCC_EA_RDREQ_sum"][idx] - df["TCC_EA_RDREQ_32B_sum"][idx])
(
df["TCC_EA0_RDREQ_sum"][idx]
- df["TCC_BUBBLE_sum"][idx]
- df["TCC_EA0_RDREQ_32B_sum"][idx]
)
* 64
)
+ (df["TCC_EA_WRREQ_64B_sum"][idx] * 64)
+ (
(df["TCC_EA_WRREQ_sum"][idx] - df["TCC_EA_WRREQ_64B_sum"][idx])
(df["TCC_EA0_WRREQ_sum"][idx] - df["TCC_EA0_WRREQ_64B_sum"][idx])
* 32
)
+ (df["TCC_EA0_WRREQ_64B_sum"][idx] * 64)
)
except KeyError:
console_debug(
Expand Down
Binary file added src/utils/rooflines/roofline-rhel8-mi300-rocm6
Binary file not shown.
Binary file not shown.

0 comments on commit c90b6a4

Please sign in to comment.