Skip to content

Commit

Permalink
Move roofline plot generation to analyze mode
Browse files Browse the repository at this point in the history
Signed-off-by: Sohaib Nadeem <[email protected]>
  • Loading branch information
Sohaib Nadeem committed Nov 25, 2024
1 parent 3f90e7a commit 2e97622
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 78 deletions.
94 changes: 51 additions & 43 deletions src/argparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def omniarg_parser(

add_general_group(profile_parser, rocprof_compute_version)
profile_group = profile_parser.add_argument_group("Profile Options")
roofline_group = profile_parser.add_argument_group("Standalone Roofline Options")

profile_group.add_argument(
"-n",
Expand Down Expand Up @@ -220,61 +219,27 @@ def omniarg_parser(
help="\t\t\tProfile without collecting roofline data.",
)
profile_group.add_argument(
"remaining",
metavar="-- [ ...]",
default=None,
nargs=argparse.REMAINDER,
help="\t\t\tProvide command for profiling after double dash.",
)

## Roofline Command Line Options
roofline_group.add_argument(
"--roof-only",
required=False,
default=False,
action="store_true",
help="\t\t\tProfile roofline data only.",
)
roofline_group.add_argument(
"--sort",
required=False,
metavar="",
type=str,
default="kernels",
choices=["kernels", "dispatches"],
help="\t\t\tOverlay top kernels or top dispatches: (DEFAULT: kernels)\n\t\t\t kernels\n\t\t\t dispatches",
)
roofline_group.add_argument(
"-m",
"--mem-level",
required=False,
choices=["HBM", "L2", "vL1D", "LDS"],
metavar="",
nargs="+",
type=str,
default="ALL",
help="\t\t\tFilter by memory level: (DEFAULT: ALL)\n\t\t\t HBM\n\t\t\t L2\n\t\t\t vL1D\n\t\t\t LDS",
profile_group.add_argument(
"remaining",
metavar="-- [ ...]",
default=None,
nargs=argparse.REMAINDER,
help="\t\t\tProvide command for profiling after double dash.",
)
roofline_group.add_argument(
"--device",
profile_group.add_argument(
"--roofline-device",
metavar="",
required=False,
default=-1,
type=int,
help="\t\t\tTarget GPU device ID. (DEFAULT: ALL)",
)
roofline_group.add_argument(
"--kernel-names",
required=False,
default=False,
action="store_true",
help="\t\t\tInclude kernel names in roofline plot.",
)
# roofline_group.add_argument('-w', '--workgroups', required=False, default=-1, type=int, help="\t\t\tNumber of kernel workgroups (DEFAULT: 1024)")
# roofline_group.add_argument('--wsize', required=False, default=-1, type=int, help="\t\t\tWorkgroup size (DEFAULT: 256)")
# roofline_group.add_argument('--dataset', required=False, default = -1, type=int, help="\t\t\tDataset size (DEFAULT: 536M)")
# roofline_group.add_argument('-e', '--experiments', required=False, default=-1, type=int, help="\t\t\tNumber of experiments (DEFAULT: 100)")
# roofline_group.add_argument('--iter', required=False, default=-1, type=int, help="\t\t\tNumber of iterations (DEFAULT: 10)")

## Database Command Line Options
## ----------------------------
Expand Down Expand Up @@ -393,6 +358,7 @@ def omniarg_parser(

add_general_group(analyze_parser, rocprof_compute_version)
analyze_group = analyze_parser.add_argument_group("Analyze Options")
roofline_group = analyze_parser.add_argument_group("Standalone Roofline Options")
analyze_advanced_group = analyze_parser.add_argument_group("Advanced Options")

analyze_group.add_argument(
Expand Down Expand Up @@ -464,6 +430,48 @@ def omniarg_parser(
const=8050,
help="\t\tActivate a GUI to interate with rocprofiler-compute metrics.\n\t\tOptionally, specify port to launch application (DEFAULT: 8050)",
)

## Roofline Command Line Options
roofline_group.add_argument(
"--roofline",
required=False,
default=False,
action="store_true",
help="\t\t\tGenerate roofline plot as pdf.",
)
roofline_group.add_argument(
"--sort",
required=False,
metavar="",
type=str,
default="kernels",
choices=["kernels", "dispatches"],
help="\t\t\tOverlay top kernels or top dispatches: (DEFAULT: kernels)\n\t\t\t kernels\n\t\t\t dispatches",
)
roofline_group.add_argument(
"-m",
"--mem-level",
required=False,
choices=["HBM", "L2", "vL1D", "LDS"],
metavar="",
nargs="+",
type=str,
default="ALL",
help="\t\t\tFilter by memory level: (DEFAULT: ALL)\n\t\t\t HBM\n\t\t\t L2\n\t\t\t vL1D\n\t\t\t LDS",
)
roofline_group.add_argument(
"--kernel-names",
required=False,
default=False,
action="store_true",
help="\t\t\tInclude kernel names in roofline plot.",
)
# roofline_group.add_argument('-w', '--workgroups', required=False, default=-1, type=int, help="\t\t\tNumber of kernel workgroups (DEFAULT: 1024)")
# roofline_group.add_argument('--wsize', required=False, default=-1, type=int, help="\t\t\tWorkgroup size (DEFAULT: 256)")
# roofline_group.add_argument('--dataset', required=False, default = -1, type=int, help="\t\t\tDataset size (DEFAULT: 536M)")
# roofline_group.add_argument('-e', '--experiments', required=False, default=-1, type=int, help="\t\t\tNumber of experiments (DEFAULT: 100)")
# roofline_group.add_argument('--iter', required=False, default=-1, type=int, help="\t\t\tNumber of iterations (DEFAULT: 10)")

analyze_advanced_group.add_argument(
"--random-port",
action="store_true",
Expand Down
33 changes: 33 additions & 0 deletions src/rocprof_compute_analyze/analysis_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
# SOFTWARE.
##############################################################################el

import os

from rocprof_compute_analyze.analysis_base import OmniAnalyze_Base
from utils import file_io, parser, tty
from utils.kernel_name_shortener import kernel_name_shortener
Expand Down Expand Up @@ -87,3 +89,34 @@ def run_analysis(self):
],
self._output,
)

if self.get_args().roofline:
self.generate_roofline()

@demarcate
def generate_roofline(self):
workload_dir = self.get_args().path[0][0]
arch = self._runs[workload_dir].sys_info.iloc[0]["gpu_arch"]
has_roofline = os.path.isfile(os.path.join(workload_dir, "roofline.csv"))
if has_roofline and hasattr(self.get_socs()[arch], "roofline_obj"):
self.get_socs()[arch].analysis_setup(
roofline_parameters={
"workload_dir": workload_dir,
"device_id": 0,
"sort_type": "kernels",
"mem_level": "ALL",
"include_kernel_names": False,
"roofline_cli": True,
}
)
roof_obj = self.get_socs()[arch].roofline_obj
roof_obj.empirical_roofline(
ret_df=parser.apply_filters(
workload=self._runs[workload_dir],
dir=workload_dir,
is_gui=True,
debug=self.get_args().debug,
)
)
else:
console_error("Roofline unsupported for gpu or roofline.csv is missing.")
2 changes: 1 addition & 1 deletion src/rocprof_compute_analyze/analysis_webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def generate_from_filter(
"sort_type": "kernels",
"mem_level": "ALL",
"include_kernel_names": False,
"is_standalone": False,
"roofline_cli": False,
}
)
roof_obj = self.get_socs()[self.arch].roofline_obj
Expand Down
50 changes: 17 additions & 33 deletions src/roofline.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(self, args, mspec, run_parameters=None):
"sort_type": "kernels",
"mem_level": "ALL",
"include_kernel_names": False,
"is_standalone": False,
"roofline_cli": False,
}
)
self.__ai_data = None
Expand All @@ -65,8 +65,8 @@ def __init__(self, args, mspec, run_parameters=None):
# Set roofline run parameters from args
if hasattr(self.__args, "path") and not run_parameters:
self.__run_parameters["workload_dir"] = self.__args.path
if hasattr(self.__args, "roof_only") and self.__args.roof_only == True:
self.__run_parameters["is_standalone"] = True
if hasattr(self.__args, "roofline") and self.__args.roofline == True:
self.__run_parameters["roofline_cli"] = True
if hasattr(self.__args, "kernel_names") and self.__args.kernel_names == True:
self.__run_parameters["include_kernel_names"] = True
if hasattr(self.__args, "mem_level") and self.__args.mem_level != "ALL":
Expand All @@ -84,9 +84,14 @@ def __init__(self, args, mspec, run_parameters=None):

def validate_parameters(self):
if self.__run_parameters["include_kernel_names"] and (
not self.__run_parameters["is_standalone"]
not self.__run_parameters["roofline_cli"]
):
console_error("--roof-only is required for --kernel-names")
console_error("--roofline is required for --kernel-names")

# Change vL1D to a interpretable str, if required
if "vL1D" in self.__run_parameters["mem_level"]:
self.__run_parameters["mem_level"].remove("vL1D")
self.__run_parameters["mem_level"].append("L1")

def roof_setup(self):
# set default workload path if not specified
Expand Down Expand Up @@ -115,7 +120,6 @@ def empirical_roofline(
msg = "AI at each mem level:"
for i in self.__ai_data:
msg += "\n\t%s -> %s" % (i, self.__ai_data[i])
console_debug(msg)

# Generate a roofline figure for each data type
fp32_fig = self.generate_plot(dtype="FP32")
Expand Down Expand Up @@ -145,8 +149,8 @@ def empirical_roofline(
)
self.__figure.update_xaxes(dtick=1)
# Output will be different depending on interaction type:
# Save PDFs if we're in "standalone roofline" mode, otherwise return HTML to be used in GUI output
if self.__run_parameters["is_standalone"]:
# Save PDFs if we're in "roofline cli" mode, otherwise return HTML to be used in GUI output
if self.__run_parameters["roofline_cli"]:
dev_id = str(self.__run_parameters["device_id"])

fp32_fig.write_image(
Expand Down Expand Up @@ -212,7 +216,7 @@ def generate_plot(self, dtype, fig=None) -> go.Figure():
"""Create graph object from ai_data (coordinate points) and ceiling_data (peak FLOP and BW) data."""
if fig is None:
fig = go.Figure()
plot_mode = "lines+text" if self.__run_parameters["is_standalone"] else "lines"
plot_mode = "lines+text" if self.__run_parameters["roofline_cli"] else "lines"
self.__ceiling_data = constuct_roof(
roofline_parameters=self.__run_parameters,
dtype=dtype,
Expand Down Expand Up @@ -242,7 +246,7 @@ def generate_plot(self, dtype, fig=None) -> go.Figure():
),
(
None
if self.__run_parameters["is_standalone"]
if self.__run_parameters["roofline_cli"]
else "{} GB/s".format(
to_int(self.__ceiling_data[cache_level.lower()][2])
)
Expand All @@ -265,7 +269,7 @@ def generate_plot(self, dtype, fig=None) -> go.Figure():
text=[
(
None
if self.__run_parameters["is_standalone"]
if self.__run_parameters["roofline_cli"]
else "{} GFLOP/s".format(
to_int(self.__ceiling_data["valu"][2])
)
Expand All @@ -291,7 +295,7 @@ def generate_plot(self, dtype, fig=None) -> go.Figure():
text=[
(
None
if self.__run_parameters["is_standalone"]
if self.__run_parameters["roofline_cli"]
else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2]))
),
"{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])),
Expand Down Expand Up @@ -354,25 +358,6 @@ def generate_plot(self, dtype, fig=None) -> go.Figure():

return fig

@demarcate
def standalone_roofline(self):
from collections import OrderedDict

import pandas as pd

# Change vL1D to a interpretable str, if required
if "vL1D" in self.__run_parameters["mem_level"]:
self.__run_parameters["mem_level"].remove("vL1D")
self.__run_parameters["mem_level"].append("L1")

app_path = os.path.join(self.__run_parameters["workload_dir"], "pmc_perf.csv")
roofline_exists = os.path.isfile(app_path)
if not roofline_exists:
console_error("roofline", "{} does not exist".format(app_path))
t_df = OrderedDict()
t_df["pmc_perf"] = pd.read_csv(app_path)
self.empirical_roofline(ret_df=t_df)

# Main methods
@abstractmethod
def pre_processing(self):
Expand Down Expand Up @@ -433,8 +418,7 @@ def profile(self):
# we include pre_processing() and profile() methods for those who wish to borrow the roofline module
@abstractmethod
def post_processing(self):
if self.__run_parameters["is_standalone"]:
self.standalone_roofline()
return


def to_int(a):
Expand Down
2 changes: 1 addition & 1 deletion src/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ def mibench(args, mspec):
"-o",
args.path + "/" + "roofline.csv",
"-d",
str(args.device),
str(args.roofline_device),
]
if args.quiet:
my_args += "--quiet"
Expand Down

0 comments on commit 2e97622

Please sign in to comment.