From 4aa33848de02b684ade6e2bb67a83bff59f24993 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Tue, 15 Aug 2023 14:00:36 -0500 Subject: [PATCH] Fixing several bugs on original PR Signed-off-by: coleramos425 --- src/omniperf | 65 ++------ src/omniperf_analyze/omniperf_analyze.py | 13 +- src/parser.py | 17 +-- src/utils/csv_converter.py | 185 ++++++++++------------- 4 files changed, 95 insertions(+), 185 deletions(-) diff --git a/src/omniperf b/src/omniperf index 4195ca64a..4689b02ac 100755 --- a/src/omniperf +++ b/src/omniperf @@ -53,8 +53,6 @@ from common import ( from common import getVersion -cache = dict() - ################################################ # Helper Functions ################################################ @@ -443,26 +441,12 @@ def characterize_app(args, VER): # Update timestamps replace_timestamps(workload_dir, log) - # Manually join each pmc_perf*.csv output if args.use_rocscope == False: + # Manually join each pmc_perf*.csv output join_prof(workload_dir, args.join_type, log, args.verbose) - #demangle - for filename in os.listdir(workload_dir): - try: - # fileName = file[0 : file.find(".")] - # Only shorten KernelNames if instructed to - if args.kernelVerbose < 5: - t1 = pd.read_csv( - os.path.join(workload_dir, filename), - on_bad_lines="skip", - engine="python", - ) - t2 = csv_converter.kernel_name_shortener(t1, cache, level=args.kernelVerbose) - t2.to_csv(fname, index=False) - except pd.errors.EmptyDataError: - print("Skipping empty csv " + filename) - - # Close log + # Demangle and overwrite original KernelNames + csv_converter.kernel_name_shortener(workload_dir, args.kernelVerbose) + log.close() @@ -559,6 +543,10 @@ def omniperf_profile(args, VER): print("IP Blocks: All") else: print("IP Blocks: ", args.ipblocks) + if args.kernelVerbose > 5: + print("KernelName verbose level: DISABLED") + else: + print("KernelName verbose level: ", str(args.kernelVerbose)) # Set up directories workload_dir = args.path + "/" + args.name + "/" + args.target @@ -680,25 +668,11 @@ def omniperf_profile(args, VER): # Update timestamps replace_timestamps(workload_dir, log) - # Manually join each pmc_perf*.csv output if args.use_rocscope == False: + # Manually join each pmc_perf*.csv output join_prof(workload_dir, args.join_type, log, args.verbose) - #demangle - for filename in os.listdir(workload_dir): - if filename.endswith('.csv'): - try: - # fileName = file[0 : file.find(".")] - # Only shorten KernelNames if instructed to - if args.kernelVerbose < 5: - t1 = pd.read_csv( - os.path.join(workload_dir, filename), - on_bad_lines="skip", - engine="python", - ) - t2 = csv_converter.kernel_name_shortener(t1, cache, level=args.kernelVerbose) - t2.to_csv(os.path.join(workload_dir, filename), index=False) - except pd.errors.EmptyDataError: - print("Skipping empty csv " + filename) + # Demangle and overwrite original KernelNames + csv_converter.kernel_name_shortener(workload_dir, args.kernelVerbose) # Generate sysinfo gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof) @@ -814,23 +788,6 @@ def main(): else: print("\n-------------\nProfile only\n-------------\n") omniperf_profile(args, VER) - workload_dir = args.path - #demangle - for filename in os.listdir(workload_dir): - if filename.endswith('.csv'): - try: - # fileName = file[0 : file.find(".")] - # Only shorten KernelNames if instructed to - if args.kernelVerbose < 5: - t1 = pd.read_csv( - os.path.join(workload_dir, filename), - on_bad_lines="skip", - engine="python", - ) - t2 = csv_converter.kernel_name_shortener(t1, cache, level=args.kernelVerbose) - t2.to_csv(os.path.join(workload_dir, filename), index=False) - except pd.errors.EmptyDataError: - print("Skipping empty csv " + filename) ############## # DATABASE MODE diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py index 123bdd15a..099618e8a 100644 --- a/src/omniperf_analyze/omniperf_analyze.py +++ b/src/omniperf_analyze/omniperf_analyze.py @@ -46,11 +46,9 @@ from omniperf_analyze.utils import parser, file_io from omniperf_analyze.utils.gui_components.roofline import get_roofline from utils import csv_converter -import pandas as pd archConfigs = {} - ################################################ # Helper Functions ################################################ @@ -222,16 +220,9 @@ def run_cli(args, runs): # If we assume the panel layout for all archs are similar, it doesn't matter # which archConfig passed into show_all function. # After decide to how to manage kernels display patterns, we can revisit it. - cache = dict() for d in args.path: - # demangle - for filename in os.listdir(d[0]): - if filename.endswith(".csv"): - df = pd.read_csv(os.path.join(d[0], filename)) - new_df = csv_converter.kernel_name_shortener( - df, cache, args.kernelVerbose - ) - new_df.to_csv(os.path.join(d[0], filename), index=False) + # Demangle and overwrite original KernelNames + csv_converter.kernel_name_shortener(d[0], args.kernelVerbose) file_io.create_df_kernel_top_stats( d[0], diff --git a/src/parser.py b/src/parser.py index 8de09542f..e8eb28940 100644 --- a/src/parser.py +++ b/src/parser.py @@ -205,11 +205,10 @@ def parse(my_parser): help="\t\t\tProvide command for profiling after double dash.", ) profile_group.add_argument( - "-f", "--kernelVerbose", required=False, metavar="", - help="\t\t\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)", + help="\t\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)", default=2, type=int, ) @@ -351,15 +350,6 @@ def parse(my_parser): dest="workload", help="\t\t\t\tSpecify name of workload (to remove) or path to workload (to import)", ) - connection_group.add_argument( - "-k", - "--kernelVerbose", - required=False, - metavar="", - help="\t\t\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)", - default=2, - type=int, - ) ## Analyze Command Line Options ## ---------------------------- @@ -524,11 +514,10 @@ def parse(my_parser): help="\t\tRandomly generate a port to launch GUI application.\n\t\tRegistered Ports range inclusive (1024-49151).", ) analyze_group.add_argument( - "-f", "--kernelVerbose", required=False, metavar="", - help="\t\t\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)", - default=2, + help="\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 5) (DISABLE: 5)", + default=5, type=int, ) diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py index bffe56cc5..428b4de91 100644 --- a/src/utils/csv_converter.py +++ b/src/utils/csv_converter.py @@ -32,90 +32,106 @@ import getpass from pymongo import MongoClient from tqdm import tqdm -import shutil - +import glob cache = dict() + supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"} MAX_SERVER_SEL_DELAY = 5000 # 5 sec connection timeout -def kernel_name_shortener(df, cache, level): - if level >= 5: - return df +def kernel_name_shortener(workload_dir, level): - columnName = "" - if "KernelName" in df: - columnName = "KernelName" - if "Name" in df: - columnName = "Name" + def shorten_file(df, level): + global cache - if columnName == "KernelName" or columnName == "Name": - # loop through all indices - for index in df.index: - original_name = df.loc[index, columnName] - if original_name in cache: - continue + columnName = "" + if "KernelName" in df: + columnName = "KernelName" + if "Name" in df: + columnName = "Name" - cmd = ["/opt/rocm/llvm/bin/llvm-cxxfilt", original_name] + if columnName == "KernelName" or columnName == "Name": + # loop through all indices + for index in df.index: + original_name = df.loc[index, columnName] + if original_name in cache: + continue - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + cmd = ["/opt/rocm/llvm/bin/llvm-cxxfilt", original_name] - demangled_name, e = proc.communicate() - demangled_name = str(demangled_name, "UTF-8").strip() + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - # cache miss, add the shortened name to the dictionary - new_name = "" - matches = "" + demangled_name, e = proc.communicate() + demangled_name = str(demangled_name, "UTF-8").strip() - names_and_args = re.compile(r"(?P[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?") + # cache miss, add the shortened name to the dictionary + new_name = "" + matches = "" - # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd] - if names_and_args.search(demangled_name): - matches = names_and_args.findall(demangled_name) - else: - # Works for first case '__amd_rocclr_fillBuffer.kd' - cache[original_name] = new_name - if new_name == None or new_name == "": - cache[original_name] = demangled_name - continue + names_and_args = re.compile(r"(?P[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?") - current_level = 0 - for name in matches: - ##can cause errors if a function name or argument is equal to 'clone' - if name[0] == "clone": + # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd] + if names_and_args.search(demangled_name): + matches = names_and_args.findall(demangled_name) + else: + # Works for first case '__amd_rocclr_fillBuffer.kd' + cache[original_name] = new_name + if new_name == None or new_name == "": + cache[original_name] = demangled_name continue - if len(name) == 3: - if name[2] == "::": - continue - if current_level < level: - new_name += name[0] - # closing '>' is to be taken account by the while loop - if name[1].count(">") == 0: - if current_level < level: - if not (current_level == level - 1 and name[1].count("<") > 0): - new_name += name[1] - current_level += name[1].count("<") + current_level = 0 + for name in matches: + ##can cause errors if a function name or argument is equal to 'clone' + if name[0] == "clone": + continue + if len(name) == 3: + if name[2] == "::": + continue - curr_index = 0 - # cases include '>' '> >, ' have to go in depth here to not lose account of commas and current level - while name[1].count(">") > 0 and curr_index < len(name[1]): if current_level < level: - new_name += name[1][curr_index:] - current_level -= name[1][curr_index:].count(">") - curr_index = len(name[1]) - elif name[1][curr_index] == (">"): - current_level -= 1 - curr_index += 1 + new_name += name[0] + # closing '>' is to be taken account by the while loop + if name[1].count(">") == 0: + if current_level < level: + if not (current_level == level - 1 and name[1].count("<") > 0): + new_name += name[1] + current_level += name[1].count("<") + + curr_index = 0 + # cases include '>' '> >, ' have to go in depth here to not lose account of commas and current level + while name[1].count(">") > 0 and curr_index < len(name[1]): + if current_level < level: + new_name += name[1][curr_index:] + current_level -= name[1][curr_index:].count(">") + curr_index = len(name[1]) + elif name[1][curr_index] == (">"): + current_level -= 1 + curr_index += 1 - cache[original_name] = new_name - if new_name == None or new_name == "": - cache[original_name] = demangled_name + cache[original_name] = new_name + if new_name == None or new_name == "": + cache[original_name] = demangled_name - df[columnName] = df[columnName].map(cache) + df[columnName] = df[columnName].map(cache) - return df + return df + + # Only shorten if valid shortening level + if level < 5: + for fpath in glob.glob(workload_dir + "/*.csv"): + try: + orig_df = pd.read_csv( + fpath, + on_bad_lines="skip", + engine="python", + ) + modified_df = shorten_file(orig_df, level) + modified_df.to_csv(fpath, index=False) + except pd.errors.EmptyDataError: + print("Skipping empty csv " + str(fpath)) + print("hi") # Verify target directory and setup connection @@ -152,13 +168,6 @@ def parse(args, profileAndExport): db = "omniperf_" + str(args.team) + "_" + str(name) + "_" + soc - # if Extractionlvl >= 5: - # print("KernelName shortening disabled") - # else: - # print("KernelName shortening enabled") - - # print("Kernel name verbose level:", Extractionlvl) - if args.password == "": try: password = getpass.getpass() @@ -203,16 +212,7 @@ def convert_folder(connectionInfo, Extractionlvl): except: print("ERROR: Unable to connect to the server") sys.exit(1) - # Set up directories - # if Extractionlvl < 5: - # newfilepath = connectionInfo["workload"] - # newfilepath_h = newfilepath + "/renamedFiles/" - # if not os.path.exists(newfilepath_h): - # os.mkdir(newfilepath_h) - # newfilepath = newfilepath_h + connectionInfo["db"] + "/" - # if not os.path.exists(newfilepath): - # os.mkdir(newfilepath) - # Upload files + i = 0 file = "blank" for file in tqdm(os.listdir(connectionInfo["workload"])): @@ -220,30 +220,6 @@ def convert_folder(connectionInfo, Extractionlvl): print(connectionInfo["workload"] + "/" + file) try: fileName = file[0 : file.find(".")] - # Only shorten KernelNames if instructed to - # if Extractionlvl < 5: - # t1 = pd.read_csv( - # connectionInfo["workload"] + "/" + file, - # on_bad_lines="skip", - # engine="python", - # ) - - # t2 = kernel_name_shortener(t1, cache, level=Extractionlvl) - # df_saved_file = t2.to_csv(newfilepath + file) - - # cmd = ( - # "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline" - # ).format( - # connectionInfo["username"], - # connectionInfo["password"], - # connectionInfo["host"], - # connectionInfo["port"], - # connectionInfo["db"], - # newfilepath + file, - # fileName, - # ) - # os.system(cmd) - # else: cmd = ( "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline" ).format( @@ -265,8 +241,5 @@ def convert_folder(connectionInfo, Extractionlvl): value = {"name": connectionInfo["db"]} newValue = {"name": connectionInfo["db"]} mycol.replace_one(value, newValue, upsert=True) - # Remove tmp directory if we shortened KernelNames - # if Extractionlvl < 5: - # shutil.rmtree(newfilepath_h) print("{} collections added.".format(i)) print("Workload name uploaded")