From 23682d86ca8542b86786a952b60405f7982abcaa Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Wed, 4 Sep 2024 13:50:21 +0000 Subject: [PATCH 01/17] experimental changes to detect error and print in the end in image test suite --- utilities/test_suite/HIP/runTests.py | 24 ++++++++++++++++++------ utilities/test_suite/HOST/runTests.py | 21 ++++++++++++++++++--- utilities/test_suite/common.py | 26 ++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index 8857e6ac5..14a03b64c 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -41,6 +41,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 92 +errorLog = [] # Get a list of log files based on a flag for preserving output def get_log_file_list(preserveOutput): @@ -66,16 +67,18 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo if case == "40" or case == "41" or case == "49" or case == "54": for kernelSize in range(3, 10, 2): print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) elif case == "8": # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise for noiseType in range(3): print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) elif case == "21" or case == "23" or case == "24" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 @@ -83,22 +86,25 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo interpolationRange = 2 for interpolationType in range(interpolationRange): print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) else: print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) - + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): with open(loggingFolder + "/Tensor_hip_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam)) - process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) + _, stderr_data = process.communicate() + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n") @@ -486,3 +492,9 @@ def rpp_test_suite_parser_and_validator(): if checkFile: print("---------------------------------- Results of QA Test - Tensor_hip ----------------------------------\n") print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_hip") + +if errorLog: + print("\n---------------------------------- Error log - Tensor_hip ----------------------------------\n") + for error in errorLog: + print(error) + print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 2f2d49d94..73c303b32 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -41,6 +41,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 92 +errorLog = [] # Get a list of log files based on a flag for preserving output def get_log_file_list(preserveOutput): @@ -69,6 +70,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) elif case == "21" or case == "23" or case == "24" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 @@ -79,24 +81,31 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) else: print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0") - result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): if qaMode == 1: with open(loggingFolder + "/BatchPD_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: - process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + _, stderr_data = process.communicate() + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n") - process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) + _, stderr_data = process.communicate() + log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n") @@ -473,3 +482,9 @@ def rpp_test_suite_parser_and_validator(): for logFile in logFileList: print_performance_tests_summary(logFile, functionalityGroupList, numRuns) + +if errorLog: + print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n") + for error in errorLog: + print(error) + print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py index e24ee73f6..30b877159 100644 --- a/utilities/test_suite/common.py +++ b/utilities/test_suite/common.py @@ -35,6 +35,8 @@ # Python 2 compatibility FileExistsError = OSError +bitDepthDict = {0 : "_u8_", 1 : "_f16_", 2 : "_f32_", 3: "_u8_f16", 4: "_u8_f32_", 5: "_i8_", 6: "_u8_i8_"} + imageAugmentationMap = { 0: ["brightness", "HOST", "HIP"], 1: ["gamma_correction", "HOST", "HIP"], @@ -388,3 +390,27 @@ def dataframe_to_markdown(df): md += '| ' + ' | '.join([str(value).ljust(column_widths[df.columns[j]]) for j, value in enumerate(row.values)]) + ' |\n' return md + +def get_image_layout_type(layout, outputFormatToggle, backend): + result = "Tensor_" + backend + if layout == 0: + result += "_PKD3" + if outputFormatToggle: + result += "_toPLN3" + else: + result += "_toPKD3" + elif layout == 1: + result += "_PLN3" + if outputFormatToggle: + result += "_toPKD3" + else: + result += "_toPLN3" + else: + result += "_PLN1" + result += "_toPLN1" + return result + +def log_detected_errors(errorData, errorLog, caseName, bitDepth, functionSpecificName): + if errorData.decode(): + msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData.decode() + errorLog.append(msg) \ No newline at end of file From 2aabd274cb741f2f4855adbf4f4f924be9cf16c4 Mon Sep 17 00:00:00 2001 From: dineshbabu-ravichandran Date: Mon, 30 Sep 2024 14:24:16 +0000 Subject: [PATCH 02/17] Error handling added for host side --- utilities/test_suite/HIP/runTests.py | 44 +++++++++++++++-- utilities/test_suite/HOST/runAudioTests.py | 25 +++++++++- utilities/test_suite/HOST/runMiscTests.py | 25 +++++++++- utilities/test_suite/HOST/runTests.py | 55 ++++++++++++++++++++-- utilities/test_suite/HOST/runVoxelTests.py | 26 +++++++++- utilities/test_suite/common.py | 40 ++++++++++++++-- 6 files changed, 196 insertions(+), 19 deletions(-) diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index 14a03b64c..e4c73a9c6 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -24,6 +24,7 @@ import os import sys +import signal sys.dont_write_bytecode = True sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) from common import * @@ -70,7 +71,12 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) elif case == "8": # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise for noiseType in range(3): @@ -78,7 +84,12 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) elif case == "21" or case == "23" or case == "24" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 @@ -89,13 +100,23 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) else: print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): @@ -104,7 +125,12 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) _, stderr_data = process.communicate() - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n") @@ -148,6 +174,14 @@ def run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, s output_str = output.decode('utf-8') logFile.write(output_str) + stdout_data, stderr_data = result.communicate() + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + # Parse and validate command-line arguments for the RPP test suite def rpp_test_suite_parser_and_validator(): parser = argparse.ArgumentParser() diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py index 7e9305176..e62a3c306 100644 --- a/utilities/test_suite/HOST/runAudioTests.py +++ b/utilities/test_suite/HOST/runAudioTests.py @@ -24,6 +24,7 @@ import os import sys +import signal sys.dont_write_bytecode = True sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) from common import * @@ -37,6 +38,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 7 +errorLog = [] # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -46,16 +48,29 @@ def get_log_file_list(): def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath): print("\n./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST") + else: + log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, audioAugmentationMapAugmentationMap[int(case)][0], "_HOST") print(stdout_data.decode()) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): with open(loggingFolder + "/Tensor_audio_host_raw_performance_log.txt", "a") as logFile: logFile.write("./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize) + "\n") - process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) + stdout_data, stderr_data = process.communicate() + exit_code = process.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST") + else: + log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST") print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): @@ -210,3 +225,9 @@ def rpp_test_suite_parser_and_validator(): for log_file in log_file_list: print_performance_tests_summary(log_file, "", numRuns) +if errorLog: + print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n") + for error in errorLog: + print(error) + print("-----------------------------------------------------------------------------------------------") + diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py index d7425f287..ae16443e9 100644 --- a/utilities/test_suite/HOST/runMiscTests.py +++ b/utilities/test_suite/HOST/runMiscTests.py @@ -28,6 +28,7 @@ import datetime import shutil import sys +import signal sys.dont_write_bytecode = True sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) from common import * @@ -39,6 +40,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 2 +errorLog = [] # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -48,16 +50,29 @@ def get_log_file_list(): def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): print("\n./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg)) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, micsAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): with open(loggingFolder + "/Tensor_misc_host_raw_performance_log.txt", "a") as logFile: logFile.write("./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n") - process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) + stdout_data, stderr_data = process.communicate() + exit_code = process.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non- exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg)) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg)) def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg = ""): if testType == 0: @@ -201,3 +216,9 @@ def rpp_test_suite_parser_and_validator(): for logFile in logFileList: print_performance_tests_summary(logFile, functionalityGroupList, numRuns) + +if errorLog: + print("\n---------------------------------- Error log - Tensor_misc_host ----------------------------------\n") + for error in errorLog: + print(error) + print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 73c303b32..2f5bad89c 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -23,6 +23,7 @@ """ import os import sys +import signal sys.dont_write_bytecode = True sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) from common import * @@ -70,7 +71,12 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) elif case == "21" or case == "23" or case == "24" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 @@ -81,13 +87,42 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) else: print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + + # result = subprocess.check_output([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath]) # nosec + # print("Result :",result.decode('utf-8')) + # except subprocess.CalledProcessError as e: + # if(e.returncode < 0): + # # print(f"Error: Command '{e.cmd}' returned non-zero exit status {e.returncode}.Signal : Process died with signal: {signal.Signals(-e.returncode).name} ({-e.returncode})") + # log_detected_errors("Returned non-zero exit status : {e.returncode}.: "+ str({signal.Signals(-e.returncode).name})+str({-e.returncode}), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + # else : + # # print(f"Error: Command '{e.cmd}' returned non-zero exit status {e.returncode}.") + + # except FileNotFoundError as e: + # print(f"Error: {e}") + # log_detected_errors(e, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + + # stdout_data, stderr_data = result.communicate() + # print(stdout_data.decode()) + # print("Error",stderr.decode()) + + # log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) print("------------------------------------------------------------------------------------------") @@ -98,14 +133,24 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d read_from_subprocess_and_write_to_log(process, logFile) log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) _, stderr_data = process.communicate() - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n") process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) _, stderr_data = process.communicate() - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n") diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py index 768f6bdb0..3e7829021 100644 --- a/utilities/test_suite/HOST/runVoxelTests.py +++ b/utilities/test_suite/HOST/runVoxelTests.py @@ -24,6 +24,7 @@ import os import sys +import signal sys.dont_write_bytecode = True sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) from common import * @@ -39,6 +40,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 6 +errorLog = [] # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -59,15 +61,21 @@ def func_group_finder(case_number): def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): print("\n./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + exit_code = result.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST")) print("\n------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): with open(loggingFolder + "/Tensor_voxel_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: logFile.write("./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth) + "\n") - process = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec while True: output = process.stdout.readline() if not output and process.poll() is not None: @@ -82,6 +90,14 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, logFile.write(cleanedOutput + '\n') if "max,min,avg wall times" in output: logFile.write("\n") + + stdout_data, stderr_data = process.communicate() + exit_code = process.returncode + if(exit_code != 0): + if(exit_code < 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST")) + else: + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST")) print("\n------------------------------------------------------------------------------------------") def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): @@ -251,3 +267,9 @@ def rpp_test_suite_parser_and_validator(): for logFile in logFileList: print_performance_tests_summary(logFile, functionalityGroupList, numRuns) + +if errorLog: + print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n") + for error in errorLog: + print(error) + print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py index 30b877159..e322c59dd 100644 --- a/utilities/test_suite/common.py +++ b/utilities/test_suite/common.py @@ -410,7 +410,41 @@ def get_image_layout_type(layout, outputFormatToggle, backend): result += "_toPLN1" return result +def get_misc_func_name(testCase, nDim, additionalArg): + axisMaskCase = 0 + permOrderCase = 0 + if testCase == 1: + axisMaskCase = 1 + elif testCase == 0: + permOrderCase = 1 + additionalParam = 1 + if axisMaskCase or permOrderCase: + additionalParam = additionalArg + axisMask = additionalParam + permOrder = additionalParam + result = "" + if (axisMaskCase): + result = result + "_" + str(nDim) + "d" + "_axisMask" + str(axisMask) + if (permOrderCase): + result =result + "_" + str(nDim) + "d" + "_permOrder" + str(permOrder) + return result + +def get_voxel_layout_type(layout, backend): + result = "Tensor_" + backend + if layout == 0: + result += "_PKD3_toPKD3" + elif layout == 1: + result += "_PLN3_toPLN3" + else: + result += "_PLN1_toPLN1" + return result +# def log_detected_errors(errorData, errorLog, caseName, bitDepth, functionSpecificName): +# if errorData.decode(): +# msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData.decode() +# errorLog.append(msg) def log_detected_errors(errorData, errorLog, caseName, bitDepth, functionSpecificName): - if errorData.decode(): - msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData.decode() - errorLog.append(msg) \ No newline at end of file + msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + errorLog.append(msg) +def log_detected_errors(errorData, errorLog, caseName, functionSpecificName): + msg = caseName + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + errorLog.append(msg) \ No newline at end of file From 94abc0f8fee14ccf6c2322c9395430077248323b Mon Sep 17 00:00:00 2001 From: dineshbabu-ravichandran Date: Tue, 1 Oct 2024 04:13:25 -0700 Subject: [PATCH 03/17] Error detection implemented on HIP SIDE --- utilities/test_suite/HIP/runAudioTests.py | 26 +++++++++-- utilities/test_suite/HIP/runMiscTests.py | 24 +++++++++-- utilities/test_suite/HIP/runTests.py | 39 +++++------------ utilities/test_suite/HIP/runVoxelTests.py | 23 +++++++++- utilities/test_suite/HOST/runAudioTests.py | 15 ++----- utilities/test_suite/HOST/runMiscTests.py | 11 +---- utilities/test_suite/HOST/runTests.py | 50 +++------------------- utilities/test_suite/HOST/runVoxelTests.py | 14 ++---- utilities/test_suite/common.py | 29 +++++++++---- 9 files changed, 111 insertions(+), 120 deletions(-) diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py index 408f7b683..2f17a1a4a 100644 --- a/utilities/test_suite/HIP/runAudioTests.py +++ b/utilities/test_suite/HIP/runAudioTests.py @@ -37,7 +37,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 6 - +errorLog = [] # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -74,16 +74,23 @@ def generate_performance_reports(RESULTS_DIR): def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath): print("\n./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + exit_code = result.returncode + if(exit_code != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): with open(loggingFolder + "/Tensor_audio_hip_raw_performance_log.txt", "a") as logFile: print("./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) - process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) + stdout_data, stderr_data = process.communicate() + exit_code = process.returncode + if(exit_code != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " "+ stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): @@ -91,7 +98,7 @@ def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns os.mkdir(outFilePath + "/case_" + case) with open(loggingFolder + "/Tensor_audio_hip_raw_performance_log.txt", "a") as logFile: print("\nrocprof --basenames on --timestamp on --stats -o " + outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv ./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) - process = subprocess.Popen([ 'rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + case + "/output_case" + case + ".csv", "./Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen([ 'rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + case + "/output_case" + case + ".csv", "./Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec while True: output = process.stdout.readline() if not output and process.poll() is not None: @@ -99,6 +106,11 @@ def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns print(output.strip()) output_str = output.decode('utf-8') logFile.write(output_str) + + stdout_data, stderr_data = process.communicate() + exit_code = process.returncode + if(exit_code != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " "+ stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath, profilingOption = "NO"): @@ -296,3 +308,9 @@ def rpp_test_suite_parser_and_validator(): CONSOLIDATED_FILE + "\n") except IOError: print("Unable to open results in " + CONSOLIDATED_FILE) + +if errorLog: + print("\n---------------------------------- Error log - Tensor_audio_hip ----------------------------------\n") + for error in errorLog: + print(error) + print("-----------------------------------------------------------------------------------------------") diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py index ad1e3bf54..3d397c8f7 100644 --- a/utilities/test_suite/HIP/runMiscTests.py +++ b/utilities/test_suite/HIP/runMiscTests.py @@ -39,6 +39,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 2 +errorLog = [] # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -75,16 +76,23 @@ def generate_performance_reports(RESULTS_DIR): def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): print("\n./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + exit_code = result.returncode + if(exit_code != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): with open(loggingFolder + "/Tensor_misc_hip_raw_performance_log.txt", "a") as logFile: logFile.write("./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n") - process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) + stdout_data, stderr_data = process.communicate() + exit_code = process.returncode + if(exit_code != 0): + log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code)) def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): if not os.path.exists(outFilePath + "/case_" + str(case)): @@ -92,8 +100,12 @@ def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns with open(loggingFolder + "/Tensor_misc_hip_raw_performance_log.txt", "a") as logFile: logFile.write("\nrocprof --basenames on --timestamp on --stats -o " + outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv ./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n") - process = subprocess.Popen(['rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv", "./Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen(['rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv", "./Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) + stdout_data, stderr_data = process.communicate() + exit_code = process.returncode + if(exit_code != 0): + log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg, profilingOption = 'NO'): @@ -286,3 +298,9 @@ def rpp_test_suite_parser_and_validator(): for logFile in logFileList: print_performance_tests_summary(logFile, functionalityGroupList, numRuns) + +if errorLog: + print("\n---------------------------------- Error log - Tensor_misc_hip ----------------------------------\n") + for error in errorLog: + print(error) + print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index e4c73a9c6..43bcc1f1e 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -24,7 +24,6 @@ import os import sys -import signal sys.dont_write_bytecode = True sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) from common import * @@ -73,10 +72,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo print(stdout_data.decode()) exit_code = result.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) elif case == "8": # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise for noiseType in range(3): @@ -86,10 +82,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo print(stdout_data.decode()) exit_code = result.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) elif case == "21" or case == "23" or case == "24" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 @@ -102,21 +95,15 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo print(stdout_data.decode()) exit_code = result.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) else: print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) exit_code = result.returncode - if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + if(exit_code != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): @@ -125,12 +112,9 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) _, stderr_data = process.communicate() - exit_code = result.returncode + exit_code = process.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n") @@ -174,13 +158,10 @@ def run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, s output_str = output.decode('utf-8') logFile.write(output_str) - stdout_data, stderr_data = result.communicate() - exit_code = result.returncode + stdout_data, stderr_data = process.communicate() + exit_code = process.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) # Parse and validate command-line arguments for the RPP test suite def rpp_test_suite_parser_and_validator(): diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py index 43cb2f6b3..f71f09476 100644 --- a/utilities/test_suite/HIP/runVoxelTests.py +++ b/utilities/test_suite/HIP/runVoxelTests.py @@ -39,6 +39,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 6 +errorLog = [] def get_log_file_list(preserveOutput): return [ @@ -58,15 +59,18 @@ def func_group_finder(case_number): def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): print("\n./Tensor_voxel_hip " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + exit_code = result.returncode + if(exit_code != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code)) print("\n------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): with open(loggingFolder + "/Tensor_voxel_hip_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: logFile.write("./Tensor_voxel_hip " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth) + "\n") - process = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec + process = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec while True: output = process.stdout.readline() if not output and process.poll() is not None: @@ -81,6 +85,11 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, logFile.write(cleanedOutput + '\n') if "max,min,avg wall times" in output: logFile.write("\n") + + stdout_data, stderr_data = process.communicate() + exit_code = process.returncode + if(exit_code != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code)) print("\n------------------------------------------------------------------------------------------") def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): @@ -100,6 +109,10 @@ def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerP break print(output.strip()) logFile.write(output.decode('utf-8')) + stdout_data, stderr_data = process.communicate() + exit_code = process.returncode + if(exit_code != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize, profilingOption = 'NO'): @@ -356,3 +369,9 @@ def rpp_test_suite_parser_and_validator(): for logFile in logFileList: print_performance_tests_summary(logFile, functionalityGroupList, numRuns) + +if errorLog: + print("\n---------------------------------- Error log - Tensor_voxel_hip ----------------------------------\n") + for error in errorLog: + print(error) + print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py index e62a3c306..d00b9244a 100644 --- a/utilities/test_suite/HOST/runAudioTests.py +++ b/utilities/test_suite/HOST/runAudioTests.py @@ -24,7 +24,6 @@ import os import sys -import signal sys.dont_write_bytecode = True sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) from common import * @@ -50,13 +49,10 @@ def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath): print("\n./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() + print(stdout_data.decode()) exit_code = result.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST") - else: - log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, audioAugmentationMapAugmentationMap[int(case)][0], "_HOST") - print(stdout_data.decode()) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): @@ -67,10 +63,7 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba stdout_data, stderr_data = process.communicate() exit_code = process.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST") - else: - log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST") + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): @@ -226,7 +219,7 @@ def rpp_test_suite_parser_and_validator(): print_performance_tests_summary(log_file, "", numRuns) if errorLog: - print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n") + print("\n---------------------------------- Error log - Tensor_audio_host ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py index ae16443e9..533200a81 100644 --- a/utilities/test_suite/HOST/runMiscTests.py +++ b/utilities/test_suite/HOST/runMiscTests.py @@ -28,7 +28,6 @@ import datetime import shutil import sys -import signal sys.dont_write_bytecode = True sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) from common import * @@ -55,10 +54,7 @@ def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFi print(stdout_data.decode()) exit_code = result.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg)) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, micsAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg)) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): @@ -69,10 +65,7 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to stdout_data, stderr_data = process.communicate() exit_code = process.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non- exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg)) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg)) + log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code)) def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg = ""): if testType == 0: diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 2f5bad89c..4881f16f4 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -23,7 +23,6 @@ """ import os import sys -import signal sys.dont_write_bytecode = True sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) from common import * @@ -73,10 +72,8 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo print(stdout_data.decode()) exit_code = result.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code)) + elif case == "21" or case == "23" or case == "24" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 @@ -89,10 +86,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo print(stdout_data.decode()) exit_code = result.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code)) else: print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec @@ -100,29 +94,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo print(stdout_data.decode()) exit_code = result.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) - - # result = subprocess.check_output([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath]) # nosec - # print("Result :",result.decode('utf-8')) - # except subprocess.CalledProcessError as e: - # if(e.returncode < 0): - # # print(f"Error: Command '{e.cmd}' returned non-zero exit status {e.returncode}.Signal : Process died with signal: {signal.Signals(-e.returncode).name} ({-e.returncode})") - # log_detected_errors("Returned non-zero exit status : {e.returncode}.: "+ str({signal.Signals(-e.returncode).name})+str({-e.returncode}), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) - # else : - # # print(f"Error: Command '{e.cmd}' returned non-zero exit status {e.returncode}.") - - # except FileNotFoundError as e: - # print(f"Error: {e}") - # log_detected_errors(e, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) - - # stdout_data, stderr_data = result.communicate() - # print(stdout_data.decode()) - # print("Error",stderr.decode()) - - # log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code)) print("------------------------------------------------------------------------------------------") @@ -131,26 +103,18 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d with open(loggingFolder + "/BatchPD_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) _, stderr_data = process.communicate() exit_code = result.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) - + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code)) with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n") process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) _, stderr_data = process.communicate() - exit_code = result.returncode + exit_code = process.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code)) def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n") diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py index 3e7829021..e91c0c694 100644 --- a/utilities/test_suite/HOST/runVoxelTests.py +++ b/utilities/test_suite/HOST/runVoxelTests.py @@ -24,7 +24,6 @@ import os import sys -import signal sys.dont_write_bytecode = True sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) from common import * @@ -66,10 +65,8 @@ def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, print(stdout_data.decode()) exit_code = result.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exit_code)) + print("\n------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): @@ -94,10 +91,7 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, stdout_data, stderr_data = process.communicate() exit_code = process.returncode if(exit_code != 0): - if(exit_code < 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST")) - else: - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST")) + log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exit_code)) print("\n------------------------------------------------------------------------------------------") def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): @@ -269,7 +263,7 @@ def rpp_test_suite_parser_and_validator(): print_performance_tests_summary(logFile, functionalityGroupList, numRuns) if errorLog: - print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n") + print("\n---------------------------------- Error log - Tensor_voxel_host ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py index e322c59dd..c91084386 100644 --- a/utilities/test_suite/common.py +++ b/utilities/test_suite/common.py @@ -28,6 +28,7 @@ import datetime import shutil import pandas as pd +import signal try: from errno import FileExistsError @@ -438,13 +439,23 @@ def get_voxel_layout_type(layout, backend): else: result += "_PLN1_toPLN1" return result -# def log_detected_errors(errorData, errorLog, caseName, bitDepth, functionSpecificName): -# if errorData.decode(): -# msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData.decode() -# errorLog.append(msg) -def log_detected_errors(errorData, errorLog, caseName, bitDepth, functionSpecificName): - msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + +def get_bit_depth(bitDepth): + result = str(bitDepthDict[bitDepth]) + return result + +def get_signal_name_from_return_code(return_code): + result = "" + if return_code < 0: + signal_num = -return_code + result = result + " Signal = " + for signame, signum in signal.__dict__.items(): + if isinstance(signum, int) and signum == signal_num: + signal_name = signame + break + result = result + signal_name + return result + +def log_detected_errors(errorData, errorLog, caseName, functionBitDepth, functionSpecificName, functionSignalName): + msg = caseName + functionBitDepth + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + functionSignalName errorLog.append(msg) -def log_detected_errors(errorData, errorLog, caseName, functionSpecificName): - msg = caseName + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData - errorLog.append(msg) \ No newline at end of file From a2792df758d1297235944124ed0c414788d5bf91 Mon Sep 17 00:00:00 2001 From: dineshbabu-ravichandran Date: Wed, 2 Oct 2024 22:45:32 -0700 Subject: [PATCH 04/17] Voxel test suite changes --- utilities/test_suite/HIP/runVoxelTests.py | 2 +- utilities/test_suite/HOST/runVoxelTests.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py index f71f09476..8db1cb73a 100644 --- a/utilities/test_suite/HIP/runVoxelTests.py +++ b/utilities/test_suite/HIP/runVoxelTests.py @@ -77,7 +77,7 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, break output = output.decode('utf-8') if output: - print(output, end='') + print(output) logFile.write(output) if "Running" in output or "max,min,avg wall times" in output: cleanedOutput = ''.join(char for char in output if 32 <= ord(char) <= 126) # Remove control characters diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py index e91c0c694..1f02d2bc5 100644 --- a/utilities/test_suite/HOST/runVoxelTests.py +++ b/utilities/test_suite/HOST/runVoxelTests.py @@ -79,7 +79,7 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, break output = output.decode('utf-8') if output: - print(output, end='') + print(output) logFile.write(output) if "Running" in output or "max,min,avg wall times" in output: cleanedOutput = ''.join(char for char in output if 32 <= ord(char) <= 126) # Remove control characters From 36ac096010e342b8bf5e6a48d977f88b26aa7eac Mon Sep 17 00:00:00 2001 From: dineshbabu-ravichandran Date: Thu, 3 Oct 2024 08:20:40 -0400 Subject: [PATCH 05/17] Changed variable name to camelCase --- utilities/test_suite/HIP/runAudioTests.py | 26 +++++----- utilities/test_suite/HIP/runMiscTests.py | 26 +++++----- utilities/test_suite/HIP/runTests.py | 56 +++++++++++----------- utilities/test_suite/HIP/runVoxelTests.py | 26 +++++----- utilities/test_suite/HOST/runAudioTests.py | 18 +++---- utilities/test_suite/HOST/runMiscTests.py | 18 +++---- utilities/test_suite/HOST/runTests.py | 46 +++++++++--------- utilities/test_suite/HOST/runVoxelTests.py | 18 +++---- utilities/test_suite/common.py | 14 +++--- 9 files changed, 124 insertions(+), 124 deletions(-) diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py index 2f17a1a4a..044ad07b9 100644 --- a/utilities/test_suite/HIP/runAudioTests.py +++ b/utilities/test_suite/HIP/runAudioTests.py @@ -75,11 +75,11 @@ def generate_performance_reports(RESULTS_DIR): def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath): print("\n./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): @@ -87,10 +87,10 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba print("./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - stdout_data, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " "+ stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " "+ stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): @@ -107,10 +107,10 @@ def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns output_str = output.decode('utf-8') logFile.write(output_str) - stdout_data, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " "+ stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " "+ stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath, profilingOption = "NO"): diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py index 3d397c8f7..a71b1ce50 100644 --- a/utilities/test_suite/HIP/runMiscTests.py +++ b/utilities/test_suite/HIP/runMiscTests.py @@ -77,11 +77,11 @@ def generate_performance_reports(RESULTS_DIR): def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): print("\n./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): @@ -89,10 +89,10 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to logFile.write("./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n") process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - stdout_data, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode)) def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): if not os.path.exists(outFilePath + "/case_" + str(case)): @@ -102,10 +102,10 @@ def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns logFile.write("\nrocprof --basenames on --timestamp on --stats -o " + outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv ./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n") process = subprocess.Popen(['rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv", "./Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - stdout_data, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg, profilingOption = 'NO'): diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index 43bcc1f1e..965839a43 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -68,21 +68,21 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo for kernelSize in range(3, 10, 2): print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) elif case == "8": # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise for noiseType in range(3): print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) elif case == "21" or case == "23" or case == "24" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 @@ -91,19 +91,19 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo for interpolationType in range(interpolationRange): print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) else: print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): @@ -111,10 +111,10 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam)) process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - _, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) + _, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n") @@ -158,10 +158,10 @@ def run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, s output_str = output.decode('utf-8') logFile.write(output_str) - stdout_data, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) # Parse and validate command-line arguments for the RPP test suite def rpp_test_suite_parser_and_validator(): diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py index 8db1cb73a..4129cb7c9 100644 --- a/utilities/test_suite/HIP/runVoxelTests.py +++ b/utilities/test_suite/HIP/runVoxelTests.py @@ -60,11 +60,11 @@ def func_group_finder(case_number): def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): print("\n./Tensor_voxel_hip " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode)) print("\n------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): @@ -86,10 +86,10 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, if "max,min,avg wall times" in output: logFile.write("\n") - stdout_data, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode)) print("\n------------------------------------------------------------------------------------------") def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): @@ -109,10 +109,10 @@ def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerP break print(output.strip()) logFile.write(output.decode('utf-8')) - stdout_data, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize, profilingOption = 'NO'): diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py index d00b9244a..2d3f11686 100644 --- a/utilities/test_suite/HOST/runAudioTests.py +++ b/utilities/test_suite/HOST/runAudioTests.py @@ -48,11 +48,11 @@ def get_log_file_list(): def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath): print("\n./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): @@ -60,10 +60,10 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba logFile.write("./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize) + "\n") process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - stdout_data, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py index 533200a81..732d5644d 100644 --- a/utilities/test_suite/HOST/runMiscTests.py +++ b/utilities/test_suite/HOST/runMiscTests.py @@ -50,11 +50,11 @@ def get_log_file_list(): def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): print("\n./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): @@ -62,10 +62,10 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to logFile.write("./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n") process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - stdout_data, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode)) def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg = ""): if testType == 0: diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 4881f16f4..7c98009ad 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -68,11 +68,11 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo for noiseType in range(3): print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode)) elif case == "21" or case == "23" or case == "24" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular @@ -82,19 +82,19 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo for interpolationType in range(interpolationRange): print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode)) else: print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode)) print("------------------------------------------------------------------------------------------") @@ -103,18 +103,18 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d with open(loggingFolder + "/BatchPD_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - _, stderr_data = process.communicate() - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code)) + _, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode)) with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n") process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - _, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code)) + _, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode)) def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n") diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py index 1f02d2bc5..cf0d7db46 100644 --- a/utilities/test_suite/HOST/runVoxelTests.py +++ b/utilities/test_suite/HOST/runVoxelTests.py @@ -61,11 +61,11 @@ def func_group_finder(case_number): def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): print("\n./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdout_data, stderr_data = result.communicate() - print(stdout_data.decode()) - exit_code = result.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exitCode)) print("\n------------------------------------------------------------------------------------------") @@ -88,10 +88,10 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, if "max,min,avg wall times" in output: logFile.write("\n") - stdout_data, stderr_data = process.communicate() - exit_code = process.returncode - if(exit_code != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exit_code)) + stdoutData, stderrData = process.communicate() + exitCode = process.returncode + if(exitCode != 0): + log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exitCode)) print("\n------------------------------------------------------------------------------------------") def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py index c91084386..77b9e560d 100644 --- a/utilities/test_suite/common.py +++ b/utilities/test_suite/common.py @@ -427,7 +427,7 @@ def get_misc_func_name(testCase, nDim, additionalArg): if (axisMaskCase): result = result + "_" + str(nDim) + "d" + "_axisMask" + str(axisMask) if (permOrderCase): - result =result + "_" + str(nDim) + "d" + "_permOrder" + str(permOrder) + result = result + "_" + str(nDim) + "d" + "_permOrder" + str(permOrder) return result def get_voxel_layout_type(layout, backend): @@ -444,16 +444,16 @@ def get_bit_depth(bitDepth): result = str(bitDepthDict[bitDepth]) return result -def get_signal_name_from_return_code(return_code): +def get_signal_name_from_return_code(returnCode): result = "" - if return_code < 0: - signal_num = -return_code + if returnCode < 0: + signalNum = -returnCode result = result + " Signal = " for signame, signum in signal.__dict__.items(): - if isinstance(signum, int) and signum == signal_num: - signal_name = signame + if isinstance(signum, int) and signum == signalNum: + signalName = signame break - result = result + signal_name + result = result + signalName return result def log_detected_errors(errorData, errorLog, caseName, functionBitDepth, functionSpecificName, functionSignalName): From 464660e7a1ce18338b77e5a68599680be8b5a898 Mon Sep 17 00:00:00 2001 From: dineshbabu-ravichandran Date: Fri, 4 Oct 2024 07:10:56 +0000 Subject: [PATCH 06/17] Changes in srcpath --- utilities/test_suite/HIP/runTests.py | 15 ++++++++++++--- utilities/test_suite/HOST/runTests.py | 10 ++++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index 965839a43..4e0323595 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -311,9 +311,12 @@ def rpp_test_suite_parser_and_validator(): if case == "82" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1): srcPath1 = ricapInFilePath srcPath2 = ricapInFilePath - if case == "26" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1): + elif case == "26" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1): srcPath1 = lensCorrectionInFilePath srcPath2 = lensCorrectionInFilePath + else: + srcPath1 = inFilePath1 + srcPath2 = inFilePath2 # if QA mode is enabled overwrite the input folders with the folders used for generating golden outputs if qaMode == 1 and (case != "82" and case != "26"): srcPath1 = inFilePath1 @@ -341,9 +344,12 @@ def rpp_test_suite_parser_and_validator(): if case == "82" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: srcPath1 = ricapInFilePath srcPath2 = ricapInFilePath - if case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: + elif case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: srcPath1 = lensCorrectionInFilePath srcPath2 = lensCorrectionInFilePath + else: + srcPath1 = inFilePath1 + srcPath2 = inFilePath2 for layout in range(3): dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder) @@ -361,9 +367,12 @@ def rpp_test_suite_parser_and_validator(): if case == "82" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: srcPath1 = ricapInFilePath srcPath2 = ricapInFilePath - if case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: + elif case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: srcPath1 = lensCorrectionInFilePath srcPath2 = lensCorrectionInFilePath + else: + srcPath1 = inFilePath1 + srcPath2 = inFilePath2 for layout in range(3): dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder) diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 7c98009ad..98ae5170e 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -283,9 +283,12 @@ def rpp_test_suite_parser_and_validator(): if case == "82" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1): srcPath1 = ricapInFilePath srcPath2 = ricapInFilePath - if case == "26" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1): + elif case == "26" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1): srcPath1 = lensCorrectionInFilePath srcPath2 = lensCorrectionInFilePath + else : + srcPath1 = inFilePath1 + srcPath2 = inFilePath2 # if QA mode is enabled overwrite the input folders with the folders used for generating golden outputs if qaMode == 1 and (case != "82" and case != "26"): srcPath1 = inFilePath1 @@ -316,9 +319,12 @@ def rpp_test_suite_parser_and_validator(): if case == "82" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: srcPath1 = ricapInFilePath srcPath2 = ricapInFilePath - if case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: + elif case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: srcPath1 = lensCorrectionInFilePath srcPath2 = lensCorrectionInFilePath + else : + srcPath1 = inFilePath1 + srcPath2 = inFilePath2 for layout in range(3): dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder) run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) From 82c20796047fddaf1967493c863dfddb38bc0ae5 Mon Sep 17 00:00:00 2001 From: dineshbabu-ravichandran Date: Fri, 4 Oct 2024 09:06:37 +0000 Subject: [PATCH 07/17] Changes in Voxel test suite for nonQA case --- utilities/test_suite/HIP/Tensor_voxel_hip.cpp | 4 +++- utilities/test_suite/HOST/Tensor_voxel_host.cpp | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp index 16107c608..2db6b169c 100644 --- a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp +++ b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp @@ -50,6 +50,8 @@ int main(int argc, char * argv[]) inputBitDepth = atoi(argv[10]); string scriptPath = argv[11]; + bool nonQACase = (testCase == 6); + if ((layoutType < 0) || (layoutType > 2)) { fprintf(stdout, "\nUsage: %s
\n", argv[0]); @@ -465,7 +467,7 @@ int main(int argc, char * argv[]) /*Compare the output of the function with golden outputs only if 1.QA Flag is set 2.input bit depth 2 (F32)*/ - if(qaFlag && inputBitDepth == 2) + if(qaFlag && inputBitDepth == 2 && !(nonQACase)) compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath); else { diff --git a/utilities/test_suite/HOST/Tensor_voxel_host.cpp b/utilities/test_suite/HOST/Tensor_voxel_host.cpp index 39f3979a9..5f50bd53a 100644 --- a/utilities/test_suite/HOST/Tensor_voxel_host.cpp +++ b/utilities/test_suite/HOST/Tensor_voxel_host.cpp @@ -50,6 +50,8 @@ int main(int argc, char * argv[]) inputBitDepth = atoi(argv[10]); string scriptPath = argv[11]; + bool nonQACase = (testCase == 6); + if ((layoutType < 0) || (layoutType > 2)) { fprintf(stdout, "\nUsage: %s
\n", argv[0]); @@ -446,7 +448,7 @@ int main(int argc, char * argv[]) /*Compare the output of the function with golden outputs only if 1.QA Flag is set 2.input bit depth 2 (F32)*/ - if(qaFlag && inputBitDepth == 2) + if(qaFlag && inputBitDepth == 2 && !(nonQACase)) compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath); else { From 3e30b4d45252c8d783051bff70e1ef918e98b9b7 Mon Sep 17 00:00:00 2001 From: dineshbabu-ravichandran Date: Wed, 20 Nov 2024 13:18:15 +0000 Subject: [PATCH 08/17] Consolidated the repeated code and move it as common code --- utilities/test_suite/HIP/runAudioTests.py | 16 ++-------- utilities/test_suite/HIP/runMiscTests.py | 16 ++-------- utilities/test_suite/HIP/runTests.py | 34 ++++------------------ utilities/test_suite/HIP/runVoxelTests.py | 19 ++++-------- utilities/test_suite/HOST/runAudioTests.py | 11 ++----- utilities/test_suite/HOST/runMiscTests.py | 13 ++------- utilities/test_suite/HOST/runTests.py | 33 +++++---------------- utilities/test_suite/HOST/runVoxelTests.py | 16 ++++------ utilities/test_suite/common.py | 15 ++++++---- 9 files changed, 44 insertions(+), 129 deletions(-) diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py index 2370a529f..e5792c6fe 100644 --- a/utilities/test_suite/HIP/runAudioTests.py +++ b/utilities/test_suite/HIP/runAudioTests.py @@ -75,11 +75,7 @@ def generate_performance_reports(RESULTS_DIR): def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath): print("\n./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, audioAugmentationMap[int(case)][0], get_bit_depth(int(2)), "HIP") print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): @@ -87,10 +83,7 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba print("./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - stdoutData, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " "+ stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, audioAugmentationMap[int(case)][0], get_bit_depth(int(2)), "HIP") print("------------------------------------------------------------------------------------------") def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): @@ -107,10 +100,7 @@ def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns output_str = output.decode('utf-8') logFile.write(output_str) - stdoutData, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " "+ stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, audioAugmentationMap[int(case)][0], get_bit_depth(int(2)), "HIP") print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath, profilingOption = "NO"): diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py index efcd33ac4..d31745bae 100644 --- a/utilities/test_suite/HIP/runMiscTests.py +++ b/utilities/test_suite/HIP/runMiscTests.py @@ -77,11 +77,7 @@ def generate_performance_reports(RESULTS_DIR): def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): print("\n./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, miscAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_misc_func_name(int(case), numDims, additionalArg)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): @@ -89,10 +85,7 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to logFile.write("./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n") process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - stdoutData, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, miscAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_misc_func_name(int(case), numDims, additionalArg)) def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): if not os.path.exists(outFilePath + "/case_" + str(case)): @@ -102,10 +95,7 @@ def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns logFile.write("\nrocprof --basenames on --timestamp on --stats -o " + outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv ./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n") process = subprocess.Popen(['rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv", "./Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - stdoutData, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, miscAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_misc_func_name(int(case), numDims, additionalArg)) print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg, profilingOption = 'NO'): diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index 386df7529..a7d5b7c41 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -68,21 +68,13 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo for kernelSize in range(3, 10, 2): print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) elif case == "8": # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise for noiseType in range(3): print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) elif case == "21" or case == "23" or case == "24" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 @@ -91,19 +83,11 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo for interpolationType in range(interpolationRange): print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) else: print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): @@ -111,10 +95,7 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam)) process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - _, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n") @@ -158,10 +139,7 @@ def run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, s output_str = output.decode('utf-8') logFile.write(output_str) - stdoutData, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) # Parse and validate command-line arguments for the RPP test suite def rpp_test_suite_parser_and_validator(): diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py index fd5c232b0..e0956e454 100644 --- a/utilities/test_suite/HIP/runVoxelTests.py +++ b/utilities/test_suite/HIP/runVoxelTests.py @@ -60,11 +60,7 @@ def func_group_finder(case_number): def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): print("\n./Tensor_voxel_hip " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP")) print("\n------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): @@ -78,18 +74,16 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, output = output.decode('utf-8') if output: print(output) - logFile.write(output) if "Running" in output or "max,min,avg wall times" in output: cleanedOutput = ''.join(char for char in output if 32 <= ord(char) <= 126) # Remove control characters cleanedOutput = cleanedOutput.strip() # Remove leading/trailing whitespace logFile.write(cleanedOutput + '\n') if "max,min,avg wall times" in output: logFile.write("\n") + else: + logFile.write(output) - stdoutData, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP")) print("\n------------------------------------------------------------------------------------------") def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): @@ -109,10 +103,7 @@ def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerP break print(output.strip()) logFile.write(output.decode('utf-8')) - stdoutData, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP")) print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize, profilingOption = 'NO'): diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py index b6e13abb1..441615a15 100644 --- a/utilities/test_suite/HOST/runAudioTests.py +++ b/utilities/test_suite/HOST/runAudioTests.py @@ -48,11 +48,7 @@ def get_log_file_list(): def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath): print("\n./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, audioAugmentationMap[int(case)][0], get_bit_depth(int(2)), "HOST") print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): @@ -60,10 +56,7 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba logFile.write("./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize) + "\n") process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - stdoutData, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, audioAugmentationMap[int(case)][0], get_bit_depth(int(2)), "HOST") print("------------------------------------------------------------------------------------------") def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath): diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py index d1d4489f2..e864f9912 100644 --- a/utilities/test_suite/HOST/runMiscTests.py +++ b/utilities/test_suite/HOST/runMiscTests.py @@ -50,11 +50,7 @@ def get_log_file_list(): def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): print("\n./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, miscAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_misc_func_name(int(case), numDims, additionalArg)) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg): @@ -62,11 +58,8 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to logFile.write("./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n") process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - stdoutData, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode)) - + log_detected(process, errorLog, miscAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_misc_func_name(int(case), numDims, additionalArg)) + def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg = ""): if testType == 0: run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg) diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 5a848fee5..853061ccd 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -67,18 +67,13 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo for kernelSize in range(3, 10, 2): print(f"./Tensor_host {srcPath1} {srcPath2} {dstPathTemp} {bitDepth} {outputFormatToggle} {case} {kernelSize} 0 ") result = subprocess.run([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - print(result.stdout.decode()) + log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) elif case == "8": # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise for noiseType in range(3): print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode)) - + log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) elif case == "21" or case == "23" or case == "24" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 @@ -87,19 +82,11 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo for interpolationType in range(interpolationRange): print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) else: print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode)) + log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) print("------------------------------------------------------------------------------------------") @@ -108,19 +95,13 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d with open(loggingFolder + "/BatchPD_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - _, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n") process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) - _, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode)) - + log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) + def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n") bitDepths = range(7) diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py index cbdf7d265..07de3f699 100644 --- a/utilities/test_suite/HOST/runVoxelTests.py +++ b/utilities/test_suite/HOST/runVoxelTests.py @@ -61,12 +61,7 @@ def func_group_finder(case_number): def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): print("\n./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - stdoutData, stderrData = result.communicate() - print(stdoutData.decode()) - exitCode = result.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exitCode)) - + log_detected(result, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_voxel_layout_type(layout, "HOST")) print("\n------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): @@ -80,18 +75,17 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, output = output.decode('utf-8') if output: print(output) - logFile.write(output) if "Running" in output or "max,min,avg wall times" in output: cleanedOutput = ''.join(char for char in output if 32 <= ord(char) <= 126) # Remove control characters cleanedOutput = cleanedOutput.strip() # Remove leading/trailing whitespace logFile.write(cleanedOutput + '\n') if "max,min,avg wall times" in output: logFile.write("\n") + else: + logFile.write(output) + - stdoutData, stderrData = process.communicate() - exitCode = process.returncode - if(exitCode != 0): - log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exitCode)) + log_detected(process, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_voxel_layout_type(layout, "HOST")) print("\n------------------------------------------------------------------------------------------") def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py index 227ce220c..2ee05f51a 100644 --- a/utilities/test_suite/common.py +++ b/utilities/test_suite/common.py @@ -425,9 +425,9 @@ def get_misc_func_name(testCase, nDim, additionalArg): permOrder = additionalParam result = "" if (axisMaskCase): - result = result + "_" + str(nDim) + "d" + "_axisMask" + str(axisMask) + result = result + str(nDim) + "d" + "_axisMask" + str(axisMask) if (permOrderCase): - result = result + "_" + str(nDim) + "d" + "_permOrder" + str(permOrder) + result = result + str(nDim) + "d" + "_permOrder" + str(permOrder) return result def get_voxel_layout_type(layout, backend): @@ -456,6 +456,11 @@ def get_signal_name_from_return_code(returnCode): result = result + signalName return result -def log_detected_errors(errorData, errorLog, caseName, functionBitDepth, functionSpecificName, functionSignalName): - msg = caseName + functionBitDepth + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + functionSignalName - errorLog.append(msg) +def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificName): + stdoutData, stderrData = result.communicate() + print(stdoutData.decode()) + exitCode = result.returncode + if(exitCode != 0): + errorData = "Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(); + msg = caseName + functionBitDepth + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + get_signal_name_from_return_code(exitCode) + errorLog.append(msg) From 8da86954cb58937e91b3b8ee831e7d6da6e0136a Mon Sep 17 00:00:00 2001 From: dineshbabu-ravichandran Date: Wed, 20 Nov 2024 13:26:27 +0000 Subject: [PATCH 09/17] Minor changes based on review commands --- utilities/test_suite/HOST/runTests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 853061ccd..c455e7941 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -275,7 +275,7 @@ def rpp_test_suite_parser_and_validator(): elif case == "26" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1): srcPath1 = lensCorrectionInFilePath srcPath2 = lensCorrectionInFilePath - else : + else: srcPath1 = inFilePath1 srcPath2 = inFilePath2 # if QA mode is enabled overwrite the input folders with the folders used for generating golden outputs @@ -311,7 +311,7 @@ def rpp_test_suite_parser_and_validator(): elif case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: srcPath1 = lensCorrectionInFilePath srcPath2 = lensCorrectionInFilePath - else : + else: srcPath1 = inFilePath1 srcPath2 = inFilePath2 for layout in range(3): From d49dcc8cdbe7778ea47aa154062db2c0952a0033 Mon Sep 17 00:00:00 2001 From: dineshbabu-ravichandran Date: Thu, 21 Nov 2024 06:12:31 +0000 Subject: [PATCH 10/17] Modification for bitDepth in voxel host --- utilities/test_suite/HOST/runVoxelTests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py index 07de3f699..87625bfe6 100644 --- a/utilities/test_suite/HOST/runVoxelTests.py +++ b/utilities/test_suite/HOST/runVoxelTests.py @@ -61,7 +61,7 @@ def func_group_finder(case_number): def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): print("\n./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec - log_detected(result, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_voxel_layout_type(layout, "HOST")) + log_detected(result, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST")) print("\n------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): @@ -85,7 +85,7 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, logFile.write(output) - log_detected(process, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_voxel_layout_type(layout, "HOST")) + log_detected(process, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST")) print("\n------------------------------------------------------------------------------------------") def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): From bcc193a8378b711ad4ef81e26e445b3c9f81d75e Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Thu, 19 Dec 2024 09:52:46 +0000 Subject: [PATCH 11/17] Merge with develop branch --- .jenkins/common.groovy | 8 +- CHANGELOG.md | 1 + CMakeLists.txt | 26 +- cmake/FindStdFilesystem.cmake | 58 + cmake/FindTurboJpeg.cmake | 61 +- ...mentations_warp_perspective_img150x150.png | Bin 0 -> 7532 bytes docs/sphinx/requirements.in | 2 +- docs/sphinx/requirements.txt | 2 +- include/rppdefs.h | 8 + include/rppt_tensor_geometric_augmentations.h | 44 + src/include/cpu/rpp_cpu_common.hpp | 19 +- src/include/cpu/rpp_cpu_simd.hpp | 57 +- .../host_tensor_geometric_augmentations.hpp | 1 + src/modules/cpu/kernel/warp_perspective.hpp | 2196 +++++++++++++++++ .../hip_tensor_geometric_augmentations.hpp | 1 + src/modules/hip/kernel/warp_perspective.hpp | 461 ++++ .../rppt_tensor_geometric_augmentations.cpp | 189 ++ utilities/examples/brightness/CMakeLists.txt | 4 +- .../HIP_NEW/CMakeLists.txt | 4 +- .../rpp-unittests/HIP_NEW/CMakeLists.txt | 4 +- utilities/test_suite/CMakeLists.txt | 40 +- utilities/test_suite/HIP/CMakeLists.txt | 23 +- utilities/test_suite/HIP/Tensor_hip.cpp | 43 +- utilities/test_suite/HIP/runTests.py | 12 +- utilities/test_suite/HOST/CMakeLists.txt | 19 +- utilities/test_suite/HOST/Tensor_host.cpp | 40 +- utilities/test_suite/HOST/runTests.py | 10 +- utilities/test_suite/common.py | 3 +- utilities/test_suite/rpp_test_suite_image.h | 1 + 29 files changed, 3238 insertions(+), 99 deletions(-) create mode 100644 cmake/FindStdFilesystem.cmake create mode 100644 docs/data/doxygenOutputs/geometric_augmentations_warp_perspective_img150x150.png create mode 100644 src/modules/cpu/kernel/warp_perspective.hpp create mode 100644 src/modules/hip/kernel/warp_perspective.hpp diff --git a/.jenkins/common.groovy b/.jenkins/common.groovy index 7132af9c2..67b572340 100644 --- a/.jenkins/common.groovy +++ b/.jenkins/common.groovy @@ -54,8 +54,10 @@ def runTestCommand (platform, project) { def command = """#!/usr/bin/env bash set -x - cd ${project.paths.project_build_prefix}/build/release - make test ARGS="-VV" + cd ${project.paths.project_build_prefix}/build + mkdir -p test && cd test + cmake /opt/rocm/share/rpp/test + ctest -VV """ platform.runCommand(this, command) @@ -116,8 +118,6 @@ def runPackageCommand(platform, project) { mv rpp-test*.${packageType} package/${osType}-rpp-test.${packageType} mv rpp-dev*.${packageType} package/${osType}-rpp-dev.${packageType} mv ${packageRunTime}.${packageType} package/${osType}-rpp.${packageType} - mv Testing/Temporary/LastTest.log ${osType}-LastTest.log - mv Testing/Temporary/LastTestsFailed.log ${osType}-LastTestsFailed.log ${packageDetail} package/${osType}-rpp-test.${packageType} ${packageDetail} package/${osType}-rpp-dev.${packageType} ${packageDetail} package/${osType}-rpp.${packageType} diff --git a/CHANGELOG.md b/CHANGELOG.md index bd618dad0..7cdc6657f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Full documentation for RPP is available at [https://rocm.docs.amd.com/projects/r * RPP Tensor Gaussian Filter support on HOST * RPP Fog augmentation on HOST and HIP +* RPP Warp Perspective on HOST and HIP ## (Unreleased) RPP 1.9.4 diff --git a/CMakeLists.txt b/CMakeLists.txt index b6f91325d..4743524e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,7 +68,6 @@ endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(DEFAULT_BUILD_TYPE "Release") ### RPP_AUDIO_SUPPORT - default = ON, NOTE: support currently only on Ubuntu - user to set to OFF otherwise option(RPP_AUDIO_SUPPORT "Build RPP with Audio Support" ON) -option(BUILD_WITH_AMD_ADVANCE "Build RPP for advanced AMD GPU Architecture" OFF) # Set message options if(NOT WIN32) @@ -154,7 +153,6 @@ message("-- ${Cyan}RPP Developer Options${ColourReset}") message("-- ${Cyan} -D BACKEND=${BACKEND} [Select RPP Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}") message("-- ${Cyan} -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} [Select RPP build type [options:Debug/Release](default:Release)]${ColourReset}") message("-- ${Cyan} -D RPP_AUDIO_SUPPORT=${RPP_AUDIO_SUPPORT} [Select RPP audio support [options:ON/OFF](default:ON)]${ColourReset}") -message("-- ${Cyan} -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [Turn ON/OFF Build for AMD advanced GPUs(default:OFF)]${ColourReset}") # OpenMP find_package(OpenMP REQUIRED) @@ -227,10 +225,7 @@ if("${BACKEND}" STREQUAL "HIP") list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip) # Set supported GPU Targets - set(DEFAULT_GPU_TARGETS "gfx908;gfx90a;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102") - if (BUILD_WITH_AMD_ADVANCE) - set(DEFAULT_GPU_TARGETS ${DEFAULT_GPU_TARGETS} "gfx1200;gfx1201") - endif() + set(DEFAULT_GPU_TARGETS "gfx908;gfx90a;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201") # Set AMD GPU_TARGETS if((AMDGPU_TARGETS OR DEFINED ENV{AMDGPU_TARGETS}) AND (NOT GPU_TARGETS)) @@ -425,8 +420,19 @@ endif() # Set the dependent packages set(RPP_DEBIAN_PACKAGE_LIST "rocm-hip-runtime") set(RPP_RPM_PACKAGE_LIST "rocm-hip-runtime") -set(RPP_DEBIAN_DEV_PACKAGE_LIST "rocm-hip-runtime-dev, half") -set(RPP_RPM_DEV_PACKAGE_LIST "rocm-hip-runtime-devel, half") +set(RPP_DEBIAN_DEV_PACKAGE_LIST "rocm-hip-runtime-dev, half, libomp-dev") +set(RPP_RPM_DEV_PACKAGE_LIST "rocm-hip-runtime-devel, half, libomp-devel") +set(RPP_DEBIAN_TEST_PACKAGE_LIST "python3-dev, python3-pip, libopencv-dev") +set(RPP_RPM_TEST_PACKAGE_LIST "python3-devel, python3-pip") # TBD: OpenCV Package missing on RPM + +# Add OS specific dependencies +if(EXISTS "/etc/os-release") + file(READ "/etc/os-release" OS_RELEASE) + string(REGEX MATCH "22.04" UBUNTU_22_FOUND ${OS_RELEASE}) + if(UBUNTU_22_FOUND) + set(RPP_DEBIAN_DEV_PACKAGE_LIST "${RPP_DEBIAN_DEV_PACKAGE_LIST}, libstdc++-12-dev") + endif() +endif() # package release set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") @@ -461,7 +467,7 @@ set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "rocm-core, ${RPP_DEBIAN_PACKAGE_LIST}" set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${PROJECT_NAME}-dev") set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "rocm-core, ${PROJECT_NAME}, ${RPP_DEBIAN_DEV_PACKAGE_LIST}") set(CPACK_DEBIAN_TEST_PACKAGE_NAME "${PROJECT_NAME}-test") -set(CPACK_DEBIAN_TEST_PACKAGE_DEPENDS "rocm-core, ${PROJECT_NAME}-dev, clang") +set(CPACK_DEBIAN_TEST_PACKAGE_DEPENDS "rocm-core, ${PROJECT_NAME}-dev, ${RPP_DEBIAN_TEST_PACKAGE_LIST}") set(CPACK_DEBIAN_PACKAGE_LICENSE "MIT" ) # Debian package specific variable for ASAN set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" ) @@ -473,7 +479,7 @@ set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core, ${RPP_RPM_PACKAGE_LIST}") set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel") set(CPACK_RPM_DEV_PACKAGE_REQUIRES "rocm-core, ${PROJECT_NAME}, ${RPP_RPM_DEV_PACKAGE_LIST}") set(CPACK_RPM_TEST_PACKAGE_NAME "${PROJECT_NAME}-test") -set(CPACK_RPM_TEST_PACKAGE_REQUIRES "rocm-core, ${PROJECT_NAME}-devel, clang") +set(CPACK_RPM_TEST_PACKAGE_REQUIRES "rocm-core, ${PROJECT_NAME}-devel, ${RPP_RPM_TEST_PACKAGE_LIST}") set(CPACK_RPM_PACKAGE_LICENSE "MIT" ) # RPM package specific variable for ASAN set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" ) diff --git a/cmake/FindStdFilesystem.cmake b/cmake/FindStdFilesystem.cmake new file mode 100644 index 000000000..1d24e72f9 --- /dev/null +++ b/cmake/FindStdFilesystem.cmake @@ -0,0 +1,58 @@ +################################################################################ +# +# MIT License +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ +include(CheckCXXSourceCompiles) +include(CMakePushCheckState) + +cmake_push_check_state(RESET) + +set(CMAKE_REQUIRED_FLAGS "-std=c++17") + +check_cxx_source_compiles(" +#include +int main() { + std::filesystem::path p; + return 0; +} +" STD_FILESYSTEM_PRESENT) + +cmake_pop_check_state() + +if(NOT STD_FILESYSTEM_PRESENT) + cmake_push_check_state(RESET) + set(CMAKE_REQUIRED_FLAGS "-std=c++17") + + check_cxx_source_compiles(" + #include + int main() { + std::experimental::filesystem::path p; + return 0; + } + " EXPERIMENTAL_FILESYSTEM_PRESENT) + + cmake_pop_check_state() +endif() + +set(FILESYSTEM_FOUND TRUE) \ No newline at end of file diff --git a/cmake/FindTurboJpeg.cmake b/cmake/FindTurboJpeg.cmake index dff5715d7..6264d8916 100644 --- a/cmake/FindTurboJpeg.cmake +++ b/cmake/FindTurboJpeg.cmake @@ -1,27 +1,28 @@ -#[[ -MIT License - -Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -]] - +################################################################################ +# +# MIT License +# +# Copyright (c) 2017 - 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ if(APPLE) set(SHARED_LIB_TYPE ".dylib") else() @@ -48,6 +49,7 @@ find_library(TurboJpeg_LIBRARIES ${TURBO_JPEG_PATH}/lib ${TURBO_JPEG_PATH}/lib64 /usr/lib + #/usr/lib/x86_64-linux-gnu - package install libturbojpeg0-dev /opt/libjpeg-turbo/lib ) mark_as_advanced(TurboJpeg_LIBRARIES) @@ -61,6 +63,7 @@ find_path(TurboJpeg_LIBRARIES_DIRS ${TURBO_JPEG_PATH}/lib ${TURBO_JPEG_PATH}/lib64 /usr/lib + #/usr/lib/x86_64-linux-gnu - package install libturbojpeg0-dev /opt/libjpeg-turbo/lib ) mark_as_advanced(TurboJpeg_LIBRARIES_DIRS) @@ -70,10 +73,12 @@ if(TurboJpeg_LIBRARIES AND TurboJpeg_INCLUDE_DIRS) endif( ) include(FindPackageHandleStandardArgs) -find_package_handle_standard_args( TurboJpeg - FOUND_VAR TurboJpeg_FOUND +find_package_handle_standard_args( + TurboJpeg + FOUND_VAR + TurboJpeg_FOUND REQUIRED_VARS - TurboJpeg_LIBRARIES + TurboJpeg_LIBRARIES TurboJpeg_INCLUDE_DIRS TurboJpeg_LIBRARIES_DIRS ) @@ -90,4 +95,4 @@ else() message(FATAL_ERROR "{Red}FindTurboJpeg -- NOT FOUND${ColourReset}") endif() message( "-- ${Yellow}NOTE: FindTurboJpeg failed to find -- turbojpeg${ColourReset}" ) -endif() \ No newline at end of file +endif() diff --git a/docs/data/doxygenOutputs/geometric_augmentations_warp_perspective_img150x150.png b/docs/data/doxygenOutputs/geometric_augmentations_warp_perspective_img150x150.png new file mode 100644 index 0000000000000000000000000000000000000000..031da91cdf1289eecf2c2c2a5464bfc1ea123e39 GIT binary patch literal 7532 zcmbW4byQT}`|l4eAkru~CnC(El5a8Bi-H1 zkONZ}zu&d){pbF1fA=|SKkJ-zo^#gwwf5T2esBB?eg&XWQ&Lp|2nYax;N}AG^T11h zkl=6p&4ho0nCNdNAtojwCM6*y{m&pHzeP$$PDV<4i}DsZ#oxHOyG==P`|rcwpZxbz zLJ}e(5(+X>vj3F)j}rbTKzj>F1JZ~H9sq>21Vpq1_-+7nqm$%6@@~ZbHwXxch)GDv zZe&p26u@b26iZ~Dk-a|s_E#y(bG3DG_ta`v9+^zaP;)@ z_VM-e4+sm7h>VK<5R;tpF*WT|`sa+i{DQ)7Ma3nhHMMo{`i91)=FYC}Up>8j{R86@ zlYgeBXJ+SC*VZ>Sx3+h7_mC&2XXh7}sH^M0Tm%5of3a@z|CNjOhKum;9FYCxA|Uj= z8AP(s@S5cs)*1XW_A`tZ~hh-eY4(?o}*#CFT!oIG>`0%kzM7lZ!6#F#3?qvCFU@aT2MOZJC3p@ zhgqhm60f^XwcD&hK#}Gl+m#G^y0yN>f|K6p<(zDKu;hr?r?_7yey_Ia>1*7a|{#Wz-_1@aQE88rzTF=qTb+z^a-g?RbQVJam;1`HUw;=FtO?Y zx@2p^1MBB!;>~Zj=!O7m93Xm-H6fp2R9!0eWlpG z@+p^)O!KZ^J02i9gkG^!$!g&N;tB-(&$a5xNUTwOL~%Iz{XgP;M;oHnn_)eOkj6R5 zLtf6T)AtuLyX7h2#pW6oecJMQjYXZaR4TKp(*ye_4%u!ao(3|dmD_e#`Ky%-&<=qh z8@sp3D6zHSOsJPFp^CSh=&NT_vTRje{vVtVS$*DzYA6Z2X=3Q<*LU#1=gLkn3gnFk z`uwVsFOM*AyJMlPbG9S+6PrQm_E zK@bjXvh)Kj0vW}z{jnK?gF<*tjkpu!!~x*9Aax6G3!?g;r@?qJ99!29dmL@)ilBy1**;1YFz z6@=j_hoTl4`|$t`ni&s(*DuAO=j~K@Kt6W~HZaGR*Df#L`gdXQADW}MM-FU)*OwV! zbDnFY@t#bvUo~OmzdY@j!*pA_qfbMx9tzG7elHc1q-YeD&XvIfp<2)m*&ug3Kq7&y zl?9`vQ1(|W?2V>&Yox^)AAQDmaiGYxTSuEA&8lsytvjm`|9DVK+7Cv2u=c`UW53}6 zOHFdfkThv(VG#EiOIK%xUZ!i2Lxx3TvXvb%?RPECWIek?;y0|>pA$q?xihYpXnwA` zOJzA)awJXhw>AgQU$yh+KJA{FLj_S5Df_=e3!!8JKmw*a1D|E2uJvqs((Y!%=N$u# zA8`uZJNq7+dFM+EHVgAhZD*5bLJ4xHpQ6r-Qui9)T}@B?QOY#oA%AG38YP!takj*&{`i$^*BfAZ_TUIwqwtXDDk`Ai&jtZAmd{-N!i2PYU-NwB%aWjb=$_rZo0TfxLsBU8aop{WatJ0z-| zxYW@hc0wuF-Vhn*YuxjgYAt{eHM3<#AoHNCo4QtX2LpSN-rqx z7&?8;_-*&GG*VuF$T6b)9*6F0@r`=njjFeevoUdVY;P-f zjbZjGE4nVW)JrQ^K9GWDOXWEnukVJp%8FwpnUlW2tZ$jr91v}q`L8E?CM&Q@=b9nRF?8r25K-H0 zd$Xs~(--kaO+_!}4zufgww#Qw@&yEw^8M)y9`Ml?E_BKCQEmaUEo%*3R*1`Pg|qlK z659=mF<7q+o0zF1B^v{_Dxm(ry8|qFmsw&k>bJ$?+-6_%bmTq5LWeT3S*YbmJckrE z73>I$!UOP#7FD)HCwFCEhQ#cSUO=3OsoC_W-uaGV) z|4`k&-l^~E_cl;wv0&6!16C)f)Tdpmp@MK!`;eROzQ7_6cH~|c-E4eGg+h^SI+GpzZJcsL1MGy&?7eBY4!p7)e8LZ z?VgboadzrdMs-F`y&bJ*ksbKq>zs~hPKtvk;`KlztO#zq;1ESzTo`!d5Xe@SR-VPs zQJgb9*UTC(Q~fW5(MbInhx?mSkVv@mhz<5pxx$YEk){@wQq>a`*4C}qU4HVU@3Q5E)lQi2MssrX|$Z+tf>o?_4Eb#D89H3 z(&;3NdSYO?`rQ*9jatS>H(Lle341qf;@f`7)V zy47tCQICJCHiD}&ONA-XEnv!B520~0c(G2G(vRZu>=g4KANDe4$u zx272>9!uq^Haf412qGwlr^k|}%_rvR_Xr2DB}B>bZ+I*??+&K9dM^ALEi!ugN@oR@ z0Im6P93u2~3PV;#W!E)6=byYhoMTu=F+)3;(!UY3+-)pt2YYO)y{?hNcVOiMU(hpD z%PxuAzIU7m2?^#u1Tmw`%p*t5Uamc3+3w0{Y4w!rOr>>?h)ekG&v|1b0AwErS1E>M zLYDR2by3vZz7ArlERfYdcb~f|gmCz)hSC-7!;e(V#^R8|>2UzRu zDi46&Enxjx|0`~!9S-W%oYm>J-T!ClKq2N-=|{9GZCurYFl60}APW3iMSe)Y<8>YG z%s*8acj9yec7cdPJ9HrT%pv3Y%f#OhF5fvgswW8XfRj<*v3N%0Xs5(51FuZ5>RH99 z0VjAK~ku5^h*Z_*Mvm+8S9XyVO{dc zFnS|_HCMtfzUGzCfXaxR3<7M3yXChuu)oGph}c>-zAL=!FjzX@ZA}%zI%qqrDs`nH z6X>2hT9Y-VmCC1;eFQ(37n2zLW1!*9RXch*WZrc$KQO9Et7q1&FH^kpC8bHsY#XWE zc`>$F_kk`6dv<8%P`cB@B;Zf>wQ{^_rqY#oY5V7N-p)wh>5rHHo+xnM208XOO%M@k z`B!Y~jlZfV+|YB*puMHShUKqT6{C-C98brtDC%UYzjagIH7y)Yc#)`AtXWw3;&V}n znp*F}Cs|!duQQ1jZ_Jbf=ebFMuIR3f|Ez8%``8FPVQj!QW|4_MK}Yb_NH^_kmgTnRsHHuTf?oZo8MLqeA$kq?NcJ%l7sR)?H|uNty8skACtwC z8UKiO%FwlZG*)Pvwg)f>_Ad|8X-sdnS)?=s#NZ6e6;)5Uu1Ogm5*=s~K&HTO<%4gD zJLVi-{CI%Q&z3yA8uhJnT;<3zn&zA8>V4n)wW;d%N>xKae*iVAjj@PeQp|uh=r&d}a1bVz4 z7M>vmPXfY>C2;`CW*y8x9c^B#oUny*U&FXcJI9qcc4gV!9^D*FZn3*b zt!yNrSP=R8tRE*gGq&FEsWFI*j?JlHDo`nCX=?TXSyM!qcFe$$L%!3ZTqXtAdZ2Px zFZ22&8o9B|@?Ef%f9<6uPZE2PZg=#{k~68THZ%&3rb9KX%GKq_qeKL*BVqSWxhGNt zdE~R|Fziry}nPhK&(kD_?;3w6Y)U|#~c<;pbk6(`3 zxq7A;IhAv619ocZ{N-e7BRa-^FWqCtY8~(?6pvLCX`OSw9DdF zL$1c8jp&DRV-9OW*#xq8>y-f~$84qbm@?>S6Y>aWTvDb>(dUgV@7 zb@HcQQVk|#C5YjHy~;(`*}E5`*bJ}}9w47{gi^Y?=<9sAdpM_*x7^zBj+|Ac|Fvh| zne3g^%O-Qyf%u0MiGM)Mu+_)jd#4kr75>iG{ACvTQeRIm38l+Hgb1+ zbl6`Sgf9JJ_sbXzQDxp;YHe8OJ@~LDL}=9i-4BBFe#O`1$$~?YHJ)AK-dr@<-L+}# z=QB@ji@S36rLw&zzWL6&1@vrj$0ETBq-FRi;E`F0>0(d3fV0w)XOE)Y(yoA#A^(29 zPNvm1+(;}cMTL{Cx7V?p=TlmriM|z2n!jllOUaXHWgc)(z4i;2g^h>fvnHD&ie4uU zeqdj)5&s)*v8#~sk*KINrHkx}LU#6%S zF~6FyKrkLyb!D}0;~Oi7AF^sC91ZWuAv9@Mqqy2%+)*GSrcnUm^yXT{fU&k)QXr!% zoeNo16De$s7SZ*~*8`sRr5F7`l}C%cy!UqxIFpQy4In_H2t2uH&lV(4svk_bXLnKb zUQf&B+mhv@oYw6=i2vaB)9ICV=6Ez3XZv&lZl0YiHp!KvU*_I-^`XZpAEh2xj*&x> zBh3%oku(naVj^j$KUvN;6l9<@A8On*pDRHSea|S(l-Vxz`nYl=iG0Qb;wJmWU4*?7 zdmkrijjn!72FYh2z)(1lO+2RK{5WMP4^JNqb+d$Q0`%E;gZ zOtS^?5cX@dq z4gM2tQD^$XRY*^#m3ivb9k&8_Uw%mg|fnV50wuG z0V)$Iv*H?k*v1(oiWEFbE%}7)ugm)4o^>K~YATuwCe;BIpm?j)L3$0YkY9ZSA}%S_ftI5 z`}X9N<(#f)U1`e`Vci7ZL_6nCNj>fsbFpm%>Mua6K^P`HaFLJA#c`-33d5y)G8wAk zMNja+r_crACt(Zk6R1~By^wb#TdE_}R9m>t_b)!9{J){>dPPlk(%cKnr-$mF4wPLo zZr1KT)|gW%jG96)iM?SwN=X9>Yw$`Xy#3tu!TOT|!d^*fpqES#brf;gXl8o|d8hH4o zY4lrk&}g!J>H<&t#U<$@s}7yM2j%r)uEMvg?c6p3rgyY{@uakE{|W^Acb}BSgQ!m` zcNGJe%2vMF(HWPf@=h@5mCerYPa5nTn*UH3mG+~N$qway4WqF9;cbJmYh zTKHERIr*sM;GA_)ep5(bJ9*A;MpT_LaX?`YY-A{~>bV6x+anQid1nP(RAa>^%UI^@ zzupM|MYc)wy8BY1mNk=&qfxPj6S@7(up&KBV2JeP8~LWoWeVJQ6{_rC^E)V@u+AFkQ(I;?_4K(G~MktGn!A zZ2BduQDioC5!k*mf+ekXWo#WBaa}y^mA?M_!#mX+nxEK>eH>UT27?Ds?Zy4jH92r+ z)zAYIQ?mk{xVI#jV5GTmkRY~1;+WX!wWD<2-MJTo++6mbsbBApZdX=UH0YzG(BimB zdA_JM_QAOw+`~wHkygqY)-sT<{Tp{MG3!OIXhUPSrsRe;9~*=h)Y(NpJ?M2u z+_ZV|9L@>xQ-5~>1GFX`ryhZZvNLG#L91)G(NJ#0q28uobi9E;X|||z#`RS1g&ykH z>JR>IriL7mmYSG;qgQ-P>%T+&@IW&)`pbGX*)A-%RRpp95(!Ee32Qatcoi7yPb`o{ zbBOsT(C56gC_3KIqz8VNDOrj-av3BWh?wrCToa0t{yjAv8yUi%Sdz%4*N?3;8F0iZGZ?}oxU zxVYadGH$Qg@J*`TCCq_bOVd;8!4g4rIGm-jh#j9V?JGtFZmIGPUmBGmgllkmwwStm4 zu`rkk3tuHm>wOK)RD(0O8c#Fi+40Xl7kMsJFou7fRXkaTUMTzSzt#zu>0cnYX2gwu zzJ;2?b!nyY0>Ts-bYj#Q&g}5B{|jo#!wCQY literal 0 HcmV?d00001 diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index b49e04e37..ed04a6309 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core[api_reference]==1.11.0 +rocm-docs-core[api_reference]==1.12.0 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 0ac105557..2dfd59d36 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -132,7 +132,7 @@ requests==2.32.3 # via # pygithub # sphinx -rocm-docs-core[api-reference]==1.11.0 +rocm-docs-core[api-reference]==1.12.0 # via -r requirements.in six==1.16.0 # via python-dateutil diff --git a/include/rppdefs.h b/include/rppdefs.h index a53176630..6bbee49ea 100644 --- a/include/rppdefs.h +++ b/include/rppdefs.h @@ -242,6 +242,14 @@ typedef struct Rpp32f data[6]; } Rpp32f6; +/*! \brief RPP 9 float vector + * \ingroup group_rppdefs + */ +typedef struct +{ + Rpp32f data[9]; +} Rpp32f9; + /*! \brief RPP 24 signed int vector * \ingroup group_rppdefs */ diff --git a/include/rppt_tensor_geometric_augmentations.h b/include/rppt_tensor_geometric_augmentations.h index aa7eaf4d8..97497749e 100644 --- a/include/rppt_tensor_geometric_augmentations.h +++ b/include/rppt_tensor_geometric_augmentations.h @@ -725,6 +725,50 @@ RppStatus rppt_transpose_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDescP /*! @} */ +/*! \brief Warp perspective augmentation on HOST backend for a NCHW/NHWC layout tensor + * \details The warp perspective performs perspective transformations for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.
+ * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127). + * - dstPtr depth ranges - Will be same depth as srcPtr. + * \image html img150x150.png Sample Input + * \image html geometric_augmentations_warp_perspective_img150x150.png Sample Output + * \param [in] srcPtr source tensor in HOST memory + * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3) + * \param [out] dstPtr destination tensor in HOST memory + * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr) + * \param [in] perspectiveTensor perspective matrix values for transformation calculation (2D tensor in HOST memory, of size batchSize * 9 for each image in batch) + * \param [in] interpolationType Interpolation type used (RpptInterpolationType::BILINEAR or RpptRoiType::NEAREST_NEIGHBOR) + * \param [in] roiTensorPtrSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) + * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) + * \param [in] rppHandle RPP HOST handle created with \ref rppCreateWithBatchSize() + * \return A \ref RppStatus enumeration. + * \retval RPP_SUCCESS Successful completion. + * \retval RPP_ERROR* Unsuccessful completion. + */ +RppStatus rppt_warp_perspective_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *perspectiveTensor, RpptInterpolationType interpolationType, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle); + +#ifdef GPU_SUPPORT +/*! \brief Warp perspective augmentation on HIP backend for a NCHW/NHWC layout tensor + * \details The warp perspective augmentation performs perspective transformations for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.
+ * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127). + * - dstPtr depth ranges - Will be same depth as srcPtr. + * \image html img150x150.png Sample Input + * \image html geometric_augmentations_warp_perspective_img150x150.png Sample Output + * \param [in] srcPtr source tensor in HIP memory + * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3) + * \param [out] dstPtr destination tensor in HIP memory + * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr) + * \param [in] perspectiveTensor perspective matrix values for transformation calculation (2D tensor in pinned/HIP memory, of size batchSize * 9 for each image in batch) + * \param [in] interpolationType Interpolation type used (RpptInterpolationType::BILINEAR or RpptRoiType::NEAREST_NEIGHBOR) + * \param [in] roiTensorPtrSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) + * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) + * \param [in] rppHandle RPP HIP handle created with \ref rppCreateWithStreamAndBatchSize() + * \return A \ref RppStatus enumeration. + * \retval RPP_SUCCESS Successful completion. + * \retval RPP_ERROR* Unsuccessful completion. + */ +RppStatus rppt_warp_perspective_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *perspectiveTensor, RpptInterpolationType interpolationType, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle); +#endif // GPU_SUPPORT + #ifdef __cplusplus } #endif diff --git a/src/include/cpu/rpp_cpu_common.hpp b/src/include/cpu/rpp_cpu_common.hpp index 01fb16edd..e9b824f24 100644 --- a/src/include/cpu/rpp_cpu_common.hpp +++ b/src/include/cpu/rpp_cpu_common.hpp @@ -5192,10 +5192,17 @@ inline void compute_generic_bilinear_srclocs_and_interpolate(T *srcPtrChannel, R for (int c = 0; c < srcDescPtr->c; c++) { - dst[c] = (T)std::nearbyintf(((*(srcPtrChannel + srcLoc[0]) * bilinearCoeffs[0]) + // TopRow R01 Pixel * coeff0 - (*(srcPtrChannel + srcLoc[1]) * bilinearCoeffs[1]) + // TopRow R02 Pixel * coeff1 - (*(srcPtrChannel + srcLoc[2]) * bilinearCoeffs[2]) + // BottomRow R01 Pixel * coeff2 - (*(srcPtrChannel + srcLoc[3]) * bilinearCoeffs[3]))); // BottomRow R02 Pixel * coeff3 + if constexpr (std::is_same::value || std::is_same::value) + dst[c] = (T)std::nearbyintf(((*(srcPtrChannel + srcLoc[0]) * bilinearCoeffs[0]) + // TopRow R01 Pixel * coeff0 + (*(srcPtrChannel + srcLoc[1]) * bilinearCoeffs[1]) + // TopRow R02 Pixel * coeff1 + (*(srcPtrChannel + srcLoc[2]) * bilinearCoeffs[2]) + // BottomRow R01 Pixel * coeff2 + (*(srcPtrChannel + srcLoc[3]) * bilinearCoeffs[3]))); // BottomRow R02 Pixel * coeff3 + else if constexpr (std::is_same::value || std::is_same::value) + dst[c] = (T)(((*(srcPtrChannel + srcLoc[0]) * bilinearCoeffs[0]) + // TopRow R01 Pixel * coeff0 + (*(srcPtrChannel + srcLoc[1]) * bilinearCoeffs[1]) + // TopRow R02 Pixel * coeff1 + (*(srcPtrChannel + srcLoc[2]) * bilinearCoeffs[2]) + // BottomRow R01 Pixel * coeff2 + (*(srcPtrChannel + srcLoc[3]) * bilinearCoeffs[3]))); // BottomRow R02 Pixel * coeff3 + srcPtrChannel += srcDescPtr->strides.cStride; } } @@ -5269,7 +5276,9 @@ inline void compute_generic_bilinear_srclocs_3c_avx(__m256 &pSrcY, __m256 &pSrcX template inline void compute_generic_bilinear_interpolation_pkd3_to_pln3(Rpp32f srcY, Rpp32f srcX, RpptROI *roiLTRB, T *dstPtrTempR, T *dstPtrTempG, T *dstPtrTempB, T *srcPtrChannel, RpptDescPtr srcDescPtr) { - if ((srcX < roiLTRB->ltrbROI.lt.x) || (srcY < roiLTRB->ltrbROI.lt.y) || (srcX > roiLTRB->ltrbROI.rb.x) || (srcY > roiLTRB->ltrbROI.rb.y)) + Rpp32s srcXFloor = std::floor(srcX); + Rpp32s srcYFloor = std::floor(srcY); + if ((srcXFloor < roiLTRB->ltrbROI.lt.x) || (srcYFloor < roiLTRB->ltrbROI.lt.y) || (srcXFloor > roiLTRB->ltrbROI.rb.x) || (srcYFloor > roiLTRB->ltrbROI.rb.y)) { *dstPtrTempR = 0; *dstPtrTempG = 0; diff --git a/src/include/cpu/rpp_cpu_simd.hpp b/src/include/cpu/rpp_cpu_simd.hpp index 813d47aa0..c58f207d3 100644 --- a/src/include/cpu/rpp_cpu_simd.hpp +++ b/src/include/cpu/rpp_cpu_simd.hpp @@ -3080,7 +3080,7 @@ inline void rpp_generic_nn_load_u8pln1_avx(Rpp8u *srcPtrChannel, Rpp32s *srcLoc, buffer[i] = *(srcPtrChannel + srcLoc[i]); } __m128i px = _mm_loadu_si128((__m128i *)buffer); - p = _mm256_setr_m128i(px, xmm_px0); + p = _mm256_castsi128_si256(px); } inline void rpp_generic_nn_load_f32pkd3_to_f32pln3(Rpp32f *srcPtrChannel, Rpp32s *srcLoc, Rpp32s *invalidLoad, __m128 *p) @@ -4108,6 +4108,40 @@ inline void rpp_resize_nn_load_f16pkd3_to_f32pln3_avx(Rpp16f *srcRowPtrsForInter (Rpp32f)*(srcRowPtrsForInterp + loc[6] + 2), (Rpp32f)*(srcRowPtrsForInterp + loc[7] + 2)); } +inline void rpp_generic_nn_load_f16pkd3_to_f32pln3_avx(Rpp16f *srcRowPtrsForInterp, Rpp32s *loc, Rpp32s *invalidLoad, __m256 *p) +{ + p[0] = _mm256_setr_ps((!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0]) : 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1]) : 0, + (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2]) : 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3]) : 0, + (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4]) : 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5]) : 0, + (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6]) : 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7]) : 0); + + p[1] = _mm256_setr_ps((!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0] + 1) : 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1] + 1) : 0, + (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2] + 1) : 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3] + 1) : 0, + (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4] + 1) : 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5] + 1) : 0, + (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6] + 1) : 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7] + 1) : 0); + + p[2] = _mm256_setr_ps((!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0] + 2) : 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1] + 2) : 0, + (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2] + 2) : 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3] + 2) : 0, + (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4] + 2) : 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5] + 2) : 0, + (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6] + 2) : 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7] + 2) : 0); +} + +inline void rpp_generic_nn_load_f16pkd3_to_f32pkd3_avx(Rpp16f *srcRowPtrsForInterp, Rpp32s *loc, Rpp32s *invalidLoad, __m256 *p) +{ + p[0] = _mm256_setr_ps((!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0]): 0, (!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0] + 1): 0, // Get R01|G01|B01|R02|B02|G02|R03|G03 + (!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0] + 2): 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1]): 0, // load the values from input using srcLoc buffer if invalidLoad is 0, else set the values to 0 + (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1] + 1): 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1] + 2): 0, + (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2]): 0, (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2] + 1): 0); + p[1] = _mm256_setr_ps((!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2] + 2): 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3]): 0, // Get B03|R04|G04|B04|R05|G05|B05|R06 + (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3] + 1): 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3] + 2): 0, // load the values from input using srcLoc buffer if invalidLoad is 0, else set the values to 0 + (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4]): 0, (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4] + 1): 0, + (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4] + 2): 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5]): 0); + p[2] = _mm256_setr_ps((!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5] + 1): 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5] + 2): 0, // Get G06|B06|R07|G07|B07|R08|G08|B08 + (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6]): 0, (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6] + 1): 0, // load the values from input using srcLoc buffer if invalidLoad is 0, else set the values to 0 + (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6] + 2): 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7]): 0, + (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7] + 1): 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7] + 2): 0); +} + inline void rpp_resize_nn_load_f32pln1(Rpp32f *srcRowPtrsForInterp, Rpp32s *loc, __m128 &p) { __m128 pTemp[4]; @@ -4136,6 +4170,14 @@ inline void rpp_resize_nn_load_f16pln1_avx(Rpp16f *srcRowPtrsForInterp, Rpp32s * (Rpp32f)*(srcRowPtrsForInterp + loc[6]), (Rpp32f)*(srcRowPtrsForInterp + loc[7])); } +inline void rpp_generic_nn_load_f16pln1_avx(Rpp16f *srcRowPtrsForInterp, Rpp32s *loc, Rpp32s *invalidLoad, __m256 &p) +{ + p = _mm256_setr_ps((!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0]) : 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1]) : 0, + (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2]) : 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3]) : 0, + (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4]) : 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5]) : 0, + (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6]) : 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7]) : 0); +} + inline void rpp_resize_nn_load_i8pkd3(Rpp8s *srcRowPtrsForInterp, Rpp32s *loc, __m128i &p) { __m128i px[4]; @@ -4258,8 +4300,6 @@ inline void rpp_store12_i8pkd3_to_i8pln3(Rpp8s* dstPtrR, Rpp8s* dstPtrG, Rpp8s* rpp_storeu_si32((__m128i *)(dstPtrB), _mm_shuffle_epi8(p, xmm_char_maskB)); /* Shuffle and extract the B pixels*/ } - - inline void rpp_store12_i8_to_i8(Rpp8s* dstPtr, __m128i &p) { _mm_storeu_si128((__m128i *)(dstPtr), p); @@ -4304,6 +4344,17 @@ inline void rpp_store24_f32pkd3_to_f32pkd3_avx(Rpp32f* dstPtr, __m256 *p) _mm256_storeu_ps(dstPtr + 16, p[2]); /* Store RGB set 3 */ } +inline void rpp_store24_f32pkd3_to_f16pkd3_avx(Rpp16f* dstPtr, __m256* p) +{ + __m128i px128[3]; + px128[0] = _mm256_cvtps_ph(p[0], _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + px128[1] = _mm256_cvtps_ph(p[1], _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + px128[2] = _mm256_cvtps_ph(p[2], _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + _mm_storeu_si128((__m128i *)dstPtr, px128[0]); + _mm_storeu_si128((__m128i *)(dstPtr + 8), px128[1]); + _mm_storeu_si128((__m128i *)(dstPtr + 16), px128[2]); +} + inline void rpp_convert24_pkd3_to_pln3(__m128i &pxLower, __m128i &pxUpper, __m128i *pxDstChn) { // pxLower = R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5 G5 B5 R6 diff --git a/src/modules/cpu/host_tensor_geometric_augmentations.hpp b/src/modules/cpu/host_tensor_geometric_augmentations.hpp index 9facb0d78..9248e6f2d 100644 --- a/src/modules/cpu/host_tensor_geometric_augmentations.hpp +++ b/src/modules/cpu/host_tensor_geometric_augmentations.hpp @@ -39,5 +39,6 @@ SOFTWARE. #include "kernel/transpose.hpp" #include "kernel/crop_and_patch.hpp" #include "kernel/flip_voxel.hpp" +#include "kernel/warp_perspective.hpp" #endif // HOST_TENSOR_GEOMETRIC_AUGMENTATIONS_HPP diff --git a/src/modules/cpu/kernel/warp_perspective.hpp b/src/modules/cpu/kernel/warp_perspective.hpp new file mode 100644 index 000000000..24d4de178 --- /dev/null +++ b/src/modules/cpu/kernel/warp_perspective.hpp @@ -0,0 +1,2196 @@ +/* +MIT License + +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "rppdefs.h" +#include "rpp_cpu_simd.hpp" +#include "rpp_cpu_common.hpp" + +/************* warp_perspective helpers *************/ + +#if __AVX2__ +inline void compute_warp_perspective_src_loc_next_term_avx(__m256 &plocW, __m256 &plocY, __m256 &plocX, __m256 &pSrcY, __m256 &pSrcX, __m256 &pPerspectiveMatrixTerm6Incr, __m256 &pPerspectiveMatrixTerm3Incr, __m256 &pPerspectiveMatrixTerm0Incr, __m256 &pRoiHalfHeight, __m256 &pRoiHalfWidth) +{ + plocW = _mm256_add_ps(plocW, pPerspectiveMatrixTerm6Incr); + plocY = _mm256_add_ps(plocY, pPerspectiveMatrixTerm3Incr); + plocX = _mm256_add_ps(plocX, pPerspectiveMatrixTerm0Incr); + pSrcY = _mm256_add_ps(_mm256_div_ps(plocY, plocW), pRoiHalfHeight); + pSrcX = _mm256_add_ps(_mm256_div_ps(plocX, plocW), pRoiHalfWidth); +} +inline void compute_warp_perspective_src_loc_first_term_avx(Rpp32f locX, Rpp32f locY, Rpp32f locW, __m256 &plocW, __m256 &plocY, __m256 &plocX, __m256 &pSrcY, __m256 &pSrcX, __m256 &pPerspectiveMatrixTerm6, __m256 &pPerspectiveMatrixTerm3, __m256 &pPerspectiveMatrixTerm0, __m256 &pRoiHalfHeight, __m256 &pRoiHalfWidth) { + plocX = _mm256_add_ps(_mm256_set1_ps(locX), pPerspectiveMatrixTerm0); + plocY = _mm256_add_ps(_mm256_set1_ps(locY), pPerspectiveMatrixTerm3); + plocW = _mm256_add_ps(_mm256_set1_ps(locW), pPerspectiveMatrixTerm6); + pSrcY = _mm256_add_ps(_mm256_div_ps(plocY, plocW), pRoiHalfHeight); + pSrcX = _mm256_add_ps(_mm256_div_ps(plocX, plocW), pRoiHalfWidth); +} +#endif + +inline void compute_warp_perspective_src_loc_params(Rpp32s dstY, Rpp32s dstX, Rpp32f &locW, Rpp32f &locY, Rpp32f &locX, Rpp32f9 *perspectiveMatrix_f9, Rpp32s roiHalfHeight, Rpp32s roiHalfWidth) +{ + dstX -= roiHalfWidth; + dstY -= roiHalfHeight; + locX = std::fma(dstX, perspectiveMatrix_f9->data[0], std::fma(dstY, perspectiveMatrix_f9->data[1], perspectiveMatrix_f9->data[2])); + locY = std::fma(dstX, perspectiveMatrix_f9->data[3], std::fma(dstY, perspectiveMatrix_f9->data[4], perspectiveMatrix_f9->data[5])); + locW = std::fma(dstX, perspectiveMatrix_f9->data[6], std::fma(dstY, perspectiveMatrix_f9->data[7], perspectiveMatrix_f9->data[8])); +} + +inline void compute_warp_perspective_src_loc_next_term(Rpp32s dstX, Rpp32f &locW, Rpp32f &locY, Rpp32f &locX, Rpp32f &srcY, Rpp32f &srcX, Rpp32f9 *perspectiveMatrix_f9, Rpp32s roiHalfHeight, Rpp32s roiHalfWidth) +{ + locW += perspectiveMatrix_f9->data[6]; + locY += perspectiveMatrix_f9->data[3]; // Used in computation of next srcY locations by adding the delta from previous location + locX += perspectiveMatrix_f9->data[0]; // Used in computation of next srcX locations by adding the delta from previous location + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); +} + +/************* NEAREST NEIGHBOR INTERPOLATION *************/ + +RppStatus warp_perspective_nn_u8_u8_host_tensor(Rpp8u *srcPtr, + RpptDescPtr srcDescPtr, + Rpp8u *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) +{ + RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) + { + RpptROI roi, roiLTRB; + RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; + compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + compute_ltrb_from_xywh_host(&roi, &roiLTRB); + Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1; + Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1; + + Rpp32f9 *perspectiveMatrix_f9; + perspectiveMatrix_f9 = reinterpret_cast(perspectiveTensor + batchCount * 9); + + Rpp8u *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage; + srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; + dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; + srcPtrChannel = srcPtrImage; + dstPtrChannel = dstPtrImage; + + Rpp32s vectorIncrementPerChannel = 8; + Rpp32s vectorIncrementPkd = 24; + Rpp32u bufferLength = roi.xywhROI.roiWidth; + Rpp32u alignedLength = bufferLength & ~7; // Align dst width to process 16 dst pixels per iteration + Rpp32s srcLoc[8] = {0}; // Since 4 dst pixels are processed per iteration + Rpp32s invalidLoad[8] = {0}; // Since 4 dst pixels are processed per iteration + +#if __AVX2__ + __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride); + __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7); + __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7); + __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7); + __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8); + __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8); + __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8); + __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight); + __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth); + __m256 pRoiLTRB[4]; + pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x); + pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y); + pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x); + pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y); +#endif + + // Warp perspective with fused output-layout toggle (NHWC -> NCHW) + if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp8u *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8u *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256i pRow; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true); + rpp_simd_load(rpp_generic_nn_load_u8pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow); + rpp_simd_store(rpp_store24_u8pkd3_to_u8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + + // Warp perspective with fused output-layout toggle (NCHW -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp8u *dstPtrRow; + dstPtrRow = dstPtrChannel; + Rpp8u *srcPtrChannelR, *srcPtrChannelG, *srcPtrChannelB; + srcPtrChannelR = srcPtrChannel; + srcPtrChannelG = srcPtrChannelR + srcDescPtr->strides.cStride; + srcPtrChannelB = srcPtrChannelG + srcDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8u *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256i pRow[3]; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad); + rpp_simd_load(rpp_generic_nn_load_u8pln1_avx, srcPtrChannelR, srcLoc, invalidLoad, pRow[0]); + rpp_simd_load(rpp_generic_nn_load_u8pln1_avx, srcPtrChannelG, srcLoc, invalidLoad, pRow[1]); + rpp_simd_load(rpp_generic_nn_load_u8pln1_avx, srcPtrChannelB, srcLoc, invalidLoad, pRow[2]); + rpp_simd_store(rpp_store24_u8pln3_to_u8pkd3_avx, dstPtrTemp, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pln3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + + // Warp perspective without fused output-layout toggle (NHWC -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp8u *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8u *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256i pRow; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true); + rpp_simd_load(rpp_generic_nn_load_u8pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow); + rpp_simd_store(rpp_store24_u8_to_u8_avx, dstPtrTemp, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + // Warp perspective without fused output-layout toggle (NCHW -> NCHW for 1 channel and 3 channel) + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp8u *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8u *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + Rpp8u *dstPtrTempChn, *srcPtrTempChn; + srcPtrTempChn = srcPtrChannel; + dstPtrTempChn = dstPtrTemp; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad); + for (int c = 0; c < srcDescPtr->c; c++) + { + __m256i pRow; + rpp_simd_load(rpp_generic_nn_load_u8pln1_avx, srcPtrTempChn, srcLoc, invalidLoad, pRow); + rpp_storeu_si64(reinterpret_cast<__m128i *>(dstPtrTempChn), _mm256_castsi256_si128(pRow)); + srcPtrTempChn += srcDescPtr->strides.cStride; + dstPtrTempChn += dstDescPtr->strides.cStride; + } + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + } + + return RPP_SUCCESS; +} + + +RppStatus warp_perspective_nn_f32_f32_host_tensor(Rpp32f *srcPtr, + RpptDescPtr srcDescPtr, + Rpp32f *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) +{ + RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) + { + RpptROI roi, roiLTRB; + RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; + compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + compute_ltrb_from_xywh_host(&roi, &roiLTRB); + Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1; + Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1; + + Rpp32f9 *perspectiveMatrix_f9; + perspectiveMatrix_f9 = reinterpret_cast(perspectiveTensor + batchCount * 9); + + Rpp32f *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage; + srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; + dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; + srcPtrChannel = srcPtrImage; + dstPtrChannel = dstPtrImage; + + Rpp32s vectorIncrementPerChannel = 8; + Rpp32s vectorIncrementPkd = 24; + Rpp32u bufferLength = roi.xywhROI.roiWidth; + Rpp32u alignedLength = bufferLength & ~7; // Align dst width to process 16 dst pixels per iteration + Rpp32s srcLoc[8] = {0}; // Since 4 dst pixels are processed per iteration + Rpp32s invalidLoad[8] = {0}; // Since 4 dst pixels are processed per iteration + +#if __AVX2__ + __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride); + __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7); + __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7); + __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7); + __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8); + __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8); + __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8); + __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight); + __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth); + __m256 pRoiLTRB[4]; + pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x); + pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y); + pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x); + pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y); +#endif + + // Warp perspective with fused output-layout toggle (NHWC -> NCHW) + if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp32f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp32f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pRow[3]; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true); + rpp_simd_load(rpp_generic_nn_load_f32pkd3_to_f32pln3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow); + rpp_simd_store(rpp_store24_f32pln3_to_f32pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + + // Warp perspective with fused output-layout toggle (NCHW -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp32f *dstPtrRow; + dstPtrRow = dstPtrChannel; + Rpp32f *srcPtrChannelR, *srcPtrChannelG, *srcPtrChannelB; + srcPtrChannelR = srcPtrChannel; + srcPtrChannelG = srcPtrChannelR + srcDescPtr->strides.cStride; + srcPtrChannelB = srcPtrChannelG + srcDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp32f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pRow[3]; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad); + rpp_simd_load(rpp_generic_nn_load_f32pln1_avx, srcPtrChannelR, srcLoc, invalidLoad, pRow[0]); + rpp_simd_load(rpp_generic_nn_load_f32pln1_avx, srcPtrChannelG, srcLoc, invalidLoad, pRow[1]); + rpp_simd_load(rpp_generic_nn_load_f32pln1_avx, srcPtrChannelB, srcLoc, invalidLoad, pRow[2]); + rpp_simd_store(rpp_store24_f32pln3_to_f32pkd3_avx, dstPtrTemp, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pln3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + + // Warp perspective without fused output-layout toggle (NHWC -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp32f *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp32f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pRow[3]; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true); + rpp_simd_load(rpp_generic_nn_load_f32pkd3_to_f32pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow); + rpp_simd_store(rpp_store24_f32pkd3_to_f32pkd3_avx, dstPtrTemp, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + // Warp perspective without fused output-layout toggle (NCHW -> NCHW for 1 channel and 3 channel) + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp32f *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp32f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + Rpp32f *dstPtrTempChn, *srcPtrTempChn; + srcPtrTempChn = srcPtrChannel; + dstPtrTempChn = dstPtrTemp; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad); + for (int c = 0; c < srcDescPtr->c; c++) + { + __m256 pRow; + rpp_simd_load(rpp_generic_nn_load_f32pln1_avx, srcPtrTempChn, srcLoc, invalidLoad, pRow); + rpp_simd_store(rpp_store8_f32_to_f32_avx, dstPtrTempChn, &pRow); + srcPtrTempChn += srcDescPtr->strides.cStride; + dstPtrTempChn += dstDescPtr->strides.cStride; + } + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + } + + return RPP_SUCCESS; +} + +RppStatus warp_perspective_nn_i8_i8_host_tensor(Rpp8s *srcPtr, + RpptDescPtr srcDescPtr, + Rpp8s *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) +{ + RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) + { + RpptROI roi, roiLTRB; + RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; + compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + compute_ltrb_from_xywh_host(&roi, &roiLTRB); + Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1; + Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1; + + Rpp32f9 *perspectiveMatrix_f9; + perspectiveMatrix_f9 = reinterpret_cast(perspectiveTensor + batchCount * 9); + + Rpp8s *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage; + srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; + dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; + srcPtrChannel = srcPtrImage; + dstPtrChannel = dstPtrImage; + + Rpp32s vectorIncrementPerChannel = 8; + Rpp32s vectorIncrementPkd = 24; + Rpp32u bufferLength = roi.xywhROI.roiWidth; + Rpp32u alignedLength = bufferLength & ~7; // Align dst width to process 16 dst pixels per iteration + Rpp32s srcLoc[8] = {0}; // Since 4 dst pixels are processed per iteration + Rpp32s invalidLoad[8] = {0}; // Since 4 dst pixels are processed per iteration + +#if __AVX2__ + __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride); + __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7); + __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7); + __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7); + __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8); + __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8); + __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8); + __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight); + __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth); + __m256 pRoiLTRB[4]; + pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x); + pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y); + pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x); + pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y); +#endif + + // Warp perspective with fused output-layout toggle (NHWC -> NCHW) + if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp8s *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8s *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256i pRow; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true); + rpp_simd_load(rpp_generic_nn_load_i8pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow); + rpp_simd_store(rpp_store24_i8pkd3_to_i8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + + // Warp perspective with fused output-layout toggle (NCHW -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp8s *dstPtrRow; + dstPtrRow = dstPtrChannel; + Rpp8s *srcPtrChannelR, *srcPtrChannelG, *srcPtrChannelB; + srcPtrChannelR = srcPtrChannel; + srcPtrChannelG = srcPtrChannelR + srcDescPtr->strides.cStride; + srcPtrChannelB = srcPtrChannelG + srcDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8s *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256i pRow[3]; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad); + rpp_simd_load(rpp_generic_nn_load_i8pln1_avx, srcPtrChannelR, srcLoc, invalidLoad, pRow[0]); + rpp_simd_load(rpp_generic_nn_load_i8pln1_avx, srcPtrChannelG, srcLoc, invalidLoad, pRow[1]); + rpp_simd_load(rpp_generic_nn_load_i8pln1_avx, srcPtrChannelB, srcLoc, invalidLoad, pRow[2]); + rpp_simd_store(rpp_store24_i8pln3_to_i8pkd3_avx, dstPtrTemp, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pln3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + + // Warp perspective without fused output-layout toggle (NHWC -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp8s *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8s *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256i pRow; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true); + rpp_simd_load(rpp_generic_nn_load_i8pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow); + rpp_simd_store(rpp_store24_i8_to_i8_avx, dstPtrTemp, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + // Warp perspective without fused output-layout toggle (NCHW -> NCHW for 1 channel and 3 channel) + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp8s *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8s *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + Rpp8s *dstPtrTempChn, *srcPtrTempChn; + srcPtrTempChn = srcPtrChannel; + dstPtrTempChn = dstPtrTemp; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad); + for (int c = 0; c < srcDescPtr->c; c++) + { + __m256i pRow; + rpp_simd_load(rpp_generic_nn_load_i8pln1_avx, srcPtrTempChn, srcLoc, invalidLoad, pRow); + rpp_storeu_si64(reinterpret_cast<__m128i *>(dstPtrTempChn), _mm256_castsi256_si128(pRow)); + srcPtrTempChn += srcDescPtr->strides.cStride; + dstPtrTempChn += dstDescPtr->strides.cStride; + } + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + } + + return RPP_SUCCESS; +} + +RppStatus warp_perspective_nn_f16_f16_host_tensor(Rpp16f *srcPtr, + RpptDescPtr srcDescPtr, + Rpp16f *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) +{ + RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) + { + RpptROI roi, roiLTRB; + RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; + compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + compute_ltrb_from_xywh_host(&roi, &roiLTRB); + Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1; + Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1; + + Rpp32f9 *perspectiveMatrix_f9; + perspectiveMatrix_f9 = reinterpret_cast(perspectiveTensor + batchCount * 9); + + Rpp16f *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage; + srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; + dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; + srcPtrChannel = srcPtrImage; + dstPtrChannel = dstPtrImage; + + Rpp32s vectorIncrementPerChannel = 8; + Rpp32s vectorIncrementPkd = 24; + Rpp32u bufferLength = roi.xywhROI.roiWidth; + Rpp32u alignedLength = bufferLength & ~7; // Align dst width to process 16 dst pixels per iteration + Rpp32s srcLoc[8] = {0}; // Since 4 dst pixels are processed per iteration + Rpp32s invalidLoad[8] = {0}; // Since 4 dst pixels are processed per iteration + +#if __AVX2__ + __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride); + __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7); + __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7); + __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7); + __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8); + __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8); + __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8); + __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight); + __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth); + __m256 pRoiLTRB[4]; + pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x); + pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y); + pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x); + pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y); +#endif + + // Warp perspective with fused output-layout toggle (NHWC -> NCHW) + if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp16f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp16f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pRow[3]; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true); + rpp_simd_load(rpp_generic_nn_load_f16pkd3_to_f32pln3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow); + rpp_simd_store(rpp_store24_f32pln3_to_f16pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + + // Warp perspective with fused output-layout toggle (NCHW -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp16f *dstPtrRow; + dstPtrRow = dstPtrChannel; + Rpp16f *srcPtrChannelR, *srcPtrChannelG, *srcPtrChannelB; + srcPtrChannelR = srcPtrChannel; + srcPtrChannelG = srcPtrChannelR + srcDescPtr->strides.cStride; + srcPtrChannelB = srcPtrChannelG + srcDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp16f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pRow[3]; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad); + rpp_simd_load(rpp_generic_nn_load_f16pln1_avx, srcPtrChannelR, srcLoc, invalidLoad, pRow[0]); + rpp_simd_load(rpp_generic_nn_load_f16pln1_avx, srcPtrChannelG, srcLoc, invalidLoad, pRow[1]); + rpp_simd_load(rpp_generic_nn_load_f16pln1_avx, srcPtrChannelB, srcLoc, invalidLoad, pRow[2]); + rpp_simd_store(rpp_store24_f32pln3_to_f16pkd3_avx, dstPtrTemp, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pln3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + // Warp perspective without fused output-layout toggle (NHWC -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp16f *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp16f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pRow[3]; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true); + rpp_simd_load(rpp_generic_nn_load_f16pkd3_to_f32pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow); + rpp_simd_store(rpp_store24_f32pkd3_to_f16pkd3_avx, dstPtrTemp, pRow); + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + // Warp perspective without fused output-layout toggle (NCHW -> NCHW for 1 channel and 3 channel) + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp16f *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp16f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + Rpp16f *dstPtrTempChn, *srcPtrTempChn; + srcPtrTempChn = srcPtrChannel; + dstPtrTempChn = dstPtrTemp; + compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad); + for (int c = 0; c < srcDescPtr->c; c++) + { + __m256 pRow; + rpp_simd_load(rpp_generic_nn_load_f16pln1_avx, srcPtrTempChn, srcLoc, invalidLoad, pRow); + rpp_simd_store(rpp_store8_f32_to_f16_avx, dstPtrTempChn, &pRow); + srcPtrTempChn += srcDescPtr->strides.cStride; + dstPtrTempChn += dstDescPtr->strides.cStride; + } + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_nn_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + } + + return RPP_SUCCESS; +} + +RppStatus warp_perspective_bilinear_u8_u8_host_tensor(Rpp8u *srcPtr, + RpptDescPtr srcDescPtr, + Rpp8u *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) +{ + RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) + { + RpptROI roi, roiLTRB; + RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; + compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + compute_ltrb_from_xywh_host(&roi, &roiLTRB); + Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1; + Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1; + + Rpp32f9 *perspectiveMatrix_f9; + perspectiveMatrix_f9 = reinterpret_cast(perspectiveTensor + batchCount * 9); + + Rpp8u *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage; + srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; + dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; + srcPtrChannel = srcPtrImage; + dstPtrChannel = dstPtrImage; + + Rpp32s vectorIncrementPerChannel = 8; + Rpp32s vectorIncrementPkd = 24; + Rpp32u bufferLength = roi.xywhROI.roiWidth; + Rpp32u alignedLength = bufferLength & ~7; // Align dst width to process 16 dst pixels per iteration + +#if __AVX2__ + __m256 pBilinearCoeffs[4]; + __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride); + __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7); + __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7); + __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7); + __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8); + __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8); + __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8); + __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight); + __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth); + __m256 pRoiLTRB[4]; + pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x); + pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y); + pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x); + pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y); + + __m256i pxSrcStridesCHW[3]; + pxSrcStridesCHW[0] = _mm256_set1_epi32(srcDescPtr->strides.cStride); + pxSrcStridesCHW[1] = _mm256_set1_epi32(srcDescPtr->strides.hStride); + pxSrcStridesCHW[2] = _mm256_set1_epi32(srcDescPtr->strides.wStride); + RpptBilinearNbhoodLocsVecLen8 srcLocs; +#endif + + // Warp perspective with fused output-layout toggle (NHWC -> NCHW) + if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp8u *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8u *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_u8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + // Warp Perspective with fused output-layout toggle (NCHW -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp8u *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8u *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_u8pkd3_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + // Warp perspective without fused output-layout toggle (NHWC -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp8u *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8u *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_u8pkd3_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + + // Warp perspective without fused output-layout toggle (NCHW -> NCHW) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp8u *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8u *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_u8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTempR++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + + // Warp Perspective without fused output-layout toggle single channel (NCHW -> NCHW) + else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp8u *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8u *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[4], pDst; + compute_generic_bilinear_srclocs_1c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, pRoiLTRB); + rpp_simd_load(rpp_generic_bilinear_load_1c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_1c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store8_f32pln1_to_u8pln1_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + } + + return RPP_SUCCESS; +} + +RppStatus warp_perspective_bilinear_f32_f32_host_tensor(Rpp32f *srcPtr, + RpptDescPtr srcDescPtr, + Rpp32f *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) +{ + RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) + { + RpptROI roi, roiLTRB; + RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; + compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + compute_ltrb_from_xywh_host(&roi, &roiLTRB); + Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1; + Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1; + + Rpp32f9 *perspectiveMatrix_f9; + perspectiveMatrix_f9 = reinterpret_cast(perspectiveTensor + batchCount * 9); + + Rpp32f *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage; + srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; + dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; + srcPtrChannel = srcPtrImage; + dstPtrChannel = dstPtrImage; + + Rpp32s vectorIncrementPerChannel = 8; + Rpp32s vectorIncrementPkd = 24; + Rpp32u bufferLength = roi.xywhROI.roiWidth; + Rpp32u alignedLength = bufferLength & ~7; // Align dst width to process 16 dst pixels per iteration + +#if __AVX2__ + __m256 pBilinearCoeffs[4]; + __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride); + __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7); + __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7); + __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7); + __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8); + __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8); + __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8); + __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight); + __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth); + __m256 pRoiLTRB[4]; + pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x); + pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y); + pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x); + pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y); + + __m256i pxSrcStridesCHW[3]; + pxSrcStridesCHW[0] = _mm256_set1_epi32(srcDescPtr->strides.cStride); + pxSrcStridesCHW[1] = _mm256_set1_epi32(srcDescPtr->strides.hStride); + pxSrcStridesCHW[2] = _mm256_set1_epi32(srcDescPtr->strides.wStride); + RpptBilinearNbhoodLocsVecLen8 srcLocs; +#endif + + // Warp perspective with fused output-layout toggle (NHWC -> NCHW) + if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp32f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp32f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_f32pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + // Warp Perspective with fused output-layout toggle (NCHW -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp32f *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp32f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_f32pkd3_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + // Warp perspective without fused output-layout toggle (NHWC -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp32f *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp32f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_f32pkd3_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + + // Warp perspective without fused output-layout toggle (NCHW -> NCHW) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp32f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp32f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_f32pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTempR++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + + // Warp Perspective without fused output-layout toggle single channel (NCHW -> NCHW) + else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp32f *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp32f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[4], pDst; + compute_generic_bilinear_srclocs_1c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, pRoiLTRB); + rpp_simd_load(rpp_generic_bilinear_load_1c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_1c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store8_f32pln1_to_f32pln1_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + } + + return RPP_SUCCESS; +} + +RppStatus warp_perspective_bilinear_i8_i8_host_tensor(Rpp8s *srcPtr, + RpptDescPtr srcDescPtr, + Rpp8s *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) +{ + RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) + { + RpptROI roi, roiLTRB; + RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; + compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + compute_ltrb_from_xywh_host(&roi, &roiLTRB); + Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1; + Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1; + + Rpp32f9 *perspectiveMatrix_f9; + perspectiveMatrix_f9 = reinterpret_cast(perspectiveTensor + batchCount * 9); + + Rpp8s *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage; + srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; + dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; + srcPtrChannel = srcPtrImage; + dstPtrChannel = dstPtrImage; + + Rpp32s vectorIncrementPerChannel = 8; + Rpp32s vectorIncrementPkd = 24; + Rpp32u bufferLength = roi.xywhROI.roiWidth; + Rpp32u alignedLength = bufferLength & ~7; // Align dst width to process 16 dst pixels per iteration + +#if __AVX2__ + __m256 pBilinearCoeffs[4]; + __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride); + __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7); + __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7); + __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7); + __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8); + __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8); + __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8); + __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight); + __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth); + __m256 pRoiLTRB[4]; + pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x); + pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y); + pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x); + pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y); + + __m256i pxSrcStridesCHW[3]; + pxSrcStridesCHW[0] = _mm256_set1_epi32(srcDescPtr->strides.cStride); + pxSrcStridesCHW[1] = _mm256_set1_epi32(srcDescPtr->strides.hStride); + pxSrcStridesCHW[2] = _mm256_set1_epi32(srcDescPtr->strides.wStride); + RpptBilinearNbhoodLocsVecLen8 srcLocs; +#endif + + // Warp perspective with fused output-layout toggle (NHWC -> NCHW) + if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp8s *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8s *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + compute_offset_i8_3c_avx(pDst); + rpp_simd_store(rpp_store24_f32pln3_to_i8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + // Warp Perspective with fused output-layout toggle (NCHW -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp8s *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8s *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + compute_offset_i8_3c_avx(pDst); + rpp_simd_store(rpp_store24_f32pln3_to_i8pkd3_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + // Warp perspective without fused output-layout toggle (NHWC -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp8s *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8s *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + compute_offset_i8_3c_avx(pDst); + rpp_simd_store(rpp_store24_f32pln3_to_i8pkd3_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + + // Warp perspective without fused output-layout toggle (NCHW -> NCHW) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp8s *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8s *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + compute_offset_i8_3c_avx(pDst); + rpp_simd_store(rpp_store24_f32pln3_to_i8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTempR++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + + // Warp Perspective without fused output-layout toggle single channel (NCHW -> NCHW) + else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp8s *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp8s *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[4], pDst; + compute_generic_bilinear_srclocs_1c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, pRoiLTRB); + rpp_simd_load(rpp_generic_bilinear_load_1c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_1c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + compute_offset_i8_1c_avx(pDst); + rpp_simd_store(rpp_store8_f32pln1_to_i8pln1_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + } + + return RPP_SUCCESS; +} + +RppStatus warp_perspective_bilinear_f16_f16_host_tensor(Rpp16f *srcPtr, + RpptDescPtr srcDescPtr, + Rpp16f *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) +{ + RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) + { + RpptROI roi, roiLTRB; + RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; + compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + compute_ltrb_from_xywh_host(&roi, &roiLTRB); + Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1; + Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1; + + Rpp32f9 *perspectiveMatrix_f9; + perspectiveMatrix_f9 = reinterpret_cast(perspectiveTensor + batchCount * 9); + + Rpp16f *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage; + srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; + dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; + srcPtrChannel = srcPtrImage; + dstPtrChannel = dstPtrImage; + + Rpp32s vectorIncrementPerChannel = 8; + Rpp32s vectorIncrementPkd = 24; + Rpp32u bufferLength = roi.xywhROI.roiWidth; + Rpp32u alignedLength = bufferLength & ~7; // Align dst width to process 16 dst pixels per iteration + +#if __AVX2__ + __m256 pBilinearCoeffs[4]; + __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride); + __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7); + __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7); + __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7); + __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8); + __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8); + __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8); + __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight); + __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth); + __m256 pRoiLTRB[4]; + pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x); + pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y); + pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x); + pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y); + + __m256i pxSrcStridesCHW[3]; + pxSrcStridesCHW[0] = _mm256_set1_epi32(srcDescPtr->strides.cStride); + pxSrcStridesCHW[1] = _mm256_set1_epi32(srcDescPtr->strides.hStride); + pxSrcStridesCHW[2] = _mm256_set1_epi32(srcDescPtr->strides.wStride); + RpptBilinearNbhoodLocsVecLen8 srcLocs; +#endif + + // Warp perspective with fused output-layout toggle (NHWC -> NCHW) + if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp16f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp16f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_f16pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + // Warp Perspective with fused output-layout toggle (NCHW -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp16f *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp16f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_f16pkd3_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + // Warp perspective without fused output-layout toggle (NHWC -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + Rpp16f *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp16f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_f16pkd3_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPkd; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + dstPtrTemp += 3; + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + + // Warp perspective without fused output-layout toggle (NCHW -> NCHW) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp16f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp16f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[12], pDst[3]; + compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false); + rpp_simd_load(rpp_generic_bilinear_load_3c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store24_f32pln3_to_f16pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTempR += vectorIncrementPerChannel; + dstPtrTempG += vectorIncrementPerChannel; + dstPtrTempB += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTempR++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + + // Warp Perspective without fused output-layout toggle single channel (NCHW -> NCHW) + else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + Rpp16f *dstPtrRow; + dstPtrRow = dstPtrChannel; + for (int i = 0; i < roi.xywhROI.roiHeight; i++) + { + Rpp16f *dstPtrTemp; + dstPtrTemp = dstPtrRow; + + int vectorLoopCount = 0; + Rpp32f locX, locY, locW, srcX, srcY; + compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); +#if __AVX2__ + __m256 plocX, plocY, plocW, pSrcX, pSrcY; + compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth); + for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel) + { + __m256 pSrc[4], pDst; + compute_generic_bilinear_srclocs_1c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, pRoiLTRB); + rpp_simd_load(rpp_generic_bilinear_load_1c_avx, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc); // Load input pixels required for bilinear interpolation + compute_bilinear_interpolation_1c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation + rpp_simd_store(rpp_store8_f32pln1_to_f16pln1_avx, dstPtrTemp, pDst); // Store dst pixels + compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth); + dstPtrTemp += vectorIncrementPerChannel; + } +#endif + locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount); + locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount); + locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount); + srcX = ((locX / locW) + roiHalfWidth); + srcY = ((locY / locW) + roiHalfHeight); + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr); + compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth); + } + dstPtrRow += dstDescPtr->strides.hStride; + } + } + } + + return RPP_SUCCESS; +} \ No newline at end of file diff --git a/src/modules/hip/hip_tensor_geometric_augmentations.hpp b/src/modules/hip/hip_tensor_geometric_augmentations.hpp index 102e7d686..80a7ac356 100644 --- a/src/modules/hip/hip_tensor_geometric_augmentations.hpp +++ b/src/modules/hip/hip_tensor_geometric_augmentations.hpp @@ -39,5 +39,6 @@ SOFTWARE. #include "kernel/transpose.hpp" #include "kernel/crop_and_patch.hpp" #include "kernel/flip_voxel.hpp" +#include "kernel/warp_perspective.hpp" #endif // HIP_TENSOR_GEOMETRIC_AUGMENTATIONS_HPP diff --git a/src/modules/hip/kernel/warp_perspective.hpp b/src/modules/hip/kernel/warp_perspective.hpp new file mode 100644 index 000000000..4e6aa4532 --- /dev/null +++ b/src/modules/hip/kernel/warp_perspective.hpp @@ -0,0 +1,461 @@ +#include +#include "rpp_hip_common.hpp" + +// -------------------- Set 0 - warp_perspective device helpers -------------------- + +__device__ void warp_perspective_srclocs_hip_compute(float perspectiveMatrixElement, float4 locHomComponent_f4, float4 roiComponent_f4, d_float8 *locHomW_f8, d_float8 *locSrcPtr_f8) +{ + d_float8 increment_f8; + increment_f8.f4[0] = make_float4(0, perspectiveMatrixElement, perspectiveMatrixElement + perspectiveMatrixElement, perspectiveMatrixElement + perspectiveMatrixElement + perspectiveMatrixElement); + increment_f8.f4[1] = static_cast(perspectiveMatrixElement + increment_f8.f4[0].w) + increment_f8.f4[0]; + locSrcPtr_f8->f4[0] = ((locHomComponent_f4 + increment_f8.f4[0])/locHomW_f8->f4[0]) + roiComponent_f4; //Compute src x/src y locations based on homogeneous coords hom x/hom y and common scale hom w for dst x and dst y locations [0-3] + locSrcPtr_f8->f4[1] = ((locHomComponent_f4 + increment_f8.f4[1])/locHomW_f8->f4[1]) + roiComponent_f4; //Compute src x/src y locations based on homogeneous coords hom x/hom y and common scale hom w for dst x and dst y locations [4-7] +} + +__device__ void warp_perspective_roi_and_srclocs_hip_compute(int4 *srcRoiPtr_i4, int id_x, int id_y, d_float9 *perspectiveMatrix_f9, d_float16 *locSrc_f16) +{ + float2 locDst_f2; + float3 locHom_f3; + float4 locHomW_f4; + d_float8 locHomW_f8, incrementW_f8; + float roiHalfWidth = (srcRoiPtr_i4->z - srcRoiPtr_i4->x + 1) >> 1; + float roiHalfHeight = (srcRoiPtr_i4->w - srcRoiPtr_i4->y + 1) >> 1; + locDst_f2.x = static_cast(id_x - roiHalfWidth); + locDst_f2.y = static_cast(id_y - roiHalfHeight); + locHom_f3.x = fmaf(locDst_f2.x, perspectiveMatrix_f9->f1[0], fmaf(locDst_f2.y, perspectiveMatrix_f9->f1[1], perspectiveMatrix_f9->f1[2])); + locHom_f3.y = fmaf(locDst_f2.x, perspectiveMatrix_f9->f1[3], fmaf(locDst_f2.y, perspectiveMatrix_f9->f1[4], perspectiveMatrix_f9->f1[5])); + locHom_f3.z = fmaf(locDst_f2.x, perspectiveMatrix_f9->f1[6], fmaf(locDst_f2.y, perspectiveMatrix_f9->f1[7], perspectiveMatrix_f9->f1[8])); // Compute first homogenous coords based on which final destination coords are computed + locHomW_f4 = static_cast(locHom_f3.z); + incrementW_f8.f4[0] = make_float4(0, perspectiveMatrix_f9->f1[6], perspectiveMatrix_f9->f1[6] + perspectiveMatrix_f9->f1[6], perspectiveMatrix_f9->f1[6] + perspectiveMatrix_f9->f1[6] + perspectiveMatrix_f9->f1[6]); + incrementW_f8.f4[1] = static_cast(perspectiveMatrix_f9->f1[6] + incrementW_f8.f4[0].w) + incrementW_f8.f4[0]; + locHomW_f8.f4[0] = locHomW_f4 + incrementW_f8.f4[0]; + locHomW_f8.f4[1] = locHomW_f4 + incrementW_f8.f4[1]; // Compute multiple homogenous coords terms using first term and perspective matrix based on which final destination coords are computed + warp_perspective_srclocs_hip_compute(perspectiveMatrix_f9->f1[0], static_cast(locHom_f3.x), static_cast(roiHalfWidth), &locHomW_f8, &(locSrc_f16->f8[0])); // Compute 8 locSrcX + warp_perspective_srclocs_hip_compute(perspectiveMatrix_f9->f1[3], static_cast(locHom_f3.y), static_cast(roiHalfHeight), &locHomW_f8, &(locSrc_f16->f8[1])); // Compute 8 locSrcY +} + +// -------------------- Set 1 - Bilinear Interpolation -------------------- + +template +__global__ void warp_perspective_bilinear_pkd_hip_tensor(T *srcPtr, + uint2 srcStridesNH, + T *dstPtr, + uint2 dstStridesNH, + d_float9 *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + { + return; + } + + uint srcIdx = (id_z * srcStridesNH.x); + uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3; + + d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z]; + int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z]; + d_float16 locSrc_f16; + warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16); + + d_float24 dst_f24; + rpp_hip_interpolate24_bilinear_pkd3(srcPtr + srcIdx, srcStridesNH.y, &locSrc_f16, &srcRoi_i4, &dst_f24); + rpp_hip_pack_float24_pkd3_and_store24_pkd3(dstPtr + dstIdx, &dst_f24); +} + +template +__global__ void warp_perspective_bilinear_pln_hip_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + d_float9 *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + { + return; + } + + uint srcIdx = (id_z * srcStridesNCH.x); + uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x; + + d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z]; + int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z]; + d_float16 locSrc_f16; + warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16); + + d_float8 dst_f8; + rpp_hip_interpolate8_bilinear_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8); + if (channelsDst == 3) + { + srcIdx += srcStridesNCH.y; + dstIdx += dstStridesNCH.y; + + rpp_hip_interpolate8_bilinear_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8); + + srcIdx += srcStridesNCH.y; + dstIdx += dstStridesNCH.y; + + rpp_hip_interpolate8_bilinear_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8); + } +} + +template +__global__ void warp_perspective_bilinear_pkd3_pln3_hip_tensor(T *srcPtr, + uint2 srcStridesNH, + T *dstPtr, + uint3 dstStridesNCH, + d_float9 *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + { + return; + } + + uint srcIdx = (id_z * srcStridesNH.x); + uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x; + + d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z]; + int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z]; + d_float16 locSrc_f16; + warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16); + + d_float24 dst_f24; + rpp_hip_interpolate24_bilinear_pkd3(srcPtr + srcIdx, srcStridesNH.y, &locSrc_f16, &srcRoi_i4, &dst_f24); + rpp_hip_pack_float24_pkd3_and_store24_pln3(dstPtr + dstIdx, dstStridesNCH.y, &dst_f24); +} + +template +__global__ void warp_perspective_bilinear_pln3_pkd3_hip_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint2 dstStridesNH, + d_float9 *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + { + return; + } + + uint srcIdx = (id_z * srcStridesNCH.x); + uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3; + + d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z]; + int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z]; + d_float16 locSrc_f16; + warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16); + + d_float24 dst_f24; + rpp_hip_interpolate24_bilinear_pln3(srcPtr + srcIdx, &srcStridesNCH, &locSrc_f16, &srcRoi_i4, &dst_f24); + rpp_hip_pack_float24_pln3_and_store24_pkd3(dstPtr + dstIdx, &dst_f24); +} + +// -------------------- Set 2 - Nearest Neighbor Interpolation -------------------- + +template +__global__ void warp_perspective_nearest_neighbor_pkd_hip_tensor(T *srcPtr, + uint2 srcStridesNH, + T *dstPtr, + uint2 dstStridesNH, + d_float9 *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + { + return; + } + + uint srcIdx = (id_z * srcStridesNH.x); + uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3; + + d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z]; + int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z]; + d_float16 locSrc_f16; + warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16); + + d_float24 dst_f24; + rpp_hip_interpolate24_nearest_neighbor_pkd3(srcPtr + srcIdx, srcStridesNH.y, &locSrc_f16, &srcRoi_i4, &dst_f24); + rpp_hip_pack_float24_pkd3_and_store24_pkd3(dstPtr + dstIdx, &dst_f24); +} + +template +__global__ void warp_perspective_nearest_neighbor_pln_hip_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + d_float9 *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + { + return; + } + + uint srcIdx = (id_z * srcStridesNCH.x); + uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x; + + d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z]; + int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z]; + d_float16 locSrc_f16; + warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16); + + d_float8 dst_f8; + rpp_hip_interpolate8_nearest_neighbor_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8); + if (channelsDst == 3) + { + srcIdx += srcStridesNCH.y; + dstIdx += dstStridesNCH.y; + + rpp_hip_interpolate8_nearest_neighbor_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8); + + srcIdx += srcStridesNCH.y; + dstIdx += dstStridesNCH.y; + + rpp_hip_interpolate8_nearest_neighbor_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8); + } +} + +template +__global__ void warp_perspective_nearest_neighbor_pkd3_pln3_hip_tensor(T *srcPtr, + uint2 srcStridesNH, + T *dstPtr, + uint3 dstStridesNCH, + d_float9 *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + { + return; + } + + uint srcIdx = (id_z * srcStridesNH.x); + uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x; + + d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z]; + int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z]; + d_float16 locSrc_f16; + warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16); + + d_float24 dst_f24; + rpp_hip_interpolate24_nearest_neighbor_pkd3(srcPtr + srcIdx, srcStridesNH.y, &locSrc_f16, &srcRoi_i4, &dst_f24); + rpp_hip_pack_float24_pkd3_and_store24_pln3(dstPtr + dstIdx, dstStridesNCH.y, &dst_f24); +} + +template +__global__ void warp_perspective_nearest_neighbor_pln3_pkd3_hip_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint2 dstStridesNH, + d_float9 *perspectiveTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + { + return; + } + + uint srcIdx = (id_z * srcStridesNCH.x); + uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3; + + d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z]; + int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z]; + d_float16 locSrc_f16; + warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16); + + d_float24 dst_f24; + rpp_hip_interpolate24_nearest_neighbor_pln3(srcPtr + srcIdx, &srcStridesNCH, &locSrc_f16, &srcRoi_i4, &dst_f24); + rpp_hip_pack_float24_pln3_and_store24_pkd3(dstPtr + dstIdx, &dst_f24); +} + +// -------------------- Set 3 - Kernel Executors -------------------- + +template +RppStatus hip_exec_warp_perspective_tensor(T *srcPtr, + RpptDescPtr srcDescPtr, + T *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptInterpolationType interpolationType, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + rpp::Handle& handle) +{ + if (roiType == RpptRoiType::XYWH) + hip_exec_roi_converison_xywh_to_ltrb(roiTensorPtrSrc, handle); + + int globalThreads_x = (dstDescPtr->w + 7) >> 3; + int globalThreads_y = dstDescPtr->h; + int globalThreads_z = dstDescPtr->n; + + if (interpolationType == RpptInterpolationType::BILINEAR) + { + if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + hipLaunchKernelGGL(warp_perspective_bilinear_pkd_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride), + reinterpret_cast(perspectiveTensor), + roiTensorPtrSrc); + } + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + hipLaunchKernelGGL(warp_perspective_bilinear_pln_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + reinterpret_cast(perspectiveTensor), + roiTensorPtrSrc); + } + else if ((srcDescPtr->c == 3) && (dstDescPtr->c == 3)) + { + if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + hipLaunchKernelGGL(warp_perspective_bilinear_pkd3_pln3_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + reinterpret_cast(perspectiveTensor), + roiTensorPtrSrc); + } + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + globalThreads_x = (srcDescPtr->strides.hStride + 7) >> 3; + hipLaunchKernelGGL(warp_perspective_bilinear_pln3_pkd3_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride), + reinterpret_cast(perspectiveTensor), + roiTensorPtrSrc); + } + } + } + else if (interpolationType == RpptInterpolationType::NEAREST_NEIGHBOR) + { + if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + hipLaunchKernelGGL(warp_perspective_nearest_neighbor_pkd_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride), + reinterpret_cast(perspectiveTensor), + roiTensorPtrSrc); + } + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + hipLaunchKernelGGL(warp_perspective_nearest_neighbor_pln_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + reinterpret_cast(perspectiveTensor), + roiTensorPtrSrc); + } + else if ((srcDescPtr->c == 3) && (dstDescPtr->c == 3)) + { + if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + hipLaunchKernelGGL(warp_perspective_nearest_neighbor_pkd3_pln3_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + reinterpret_cast(perspectiveTensor), + roiTensorPtrSrc); + } + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + globalThreads_x = (srcDescPtr->strides.hStride + 7) >> 3; + hipLaunchKernelGGL(warp_perspective_nearest_neighbor_pln3_pkd3_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride), + reinterpret_cast(perspectiveTensor), + roiTensorPtrSrc); + } + } + } + + return RPP_SUCCESS; +} diff --git a/src/modules/rppt_tensor_geometric_augmentations.cpp b/src/modules/rppt_tensor_geometric_augmentations.cpp index 4b54ed317..c2540473a 100644 --- a/src/modules/rppt_tensor_geometric_augmentations.cpp +++ b/src/modules/rppt_tensor_geometric_augmentations.cpp @@ -1438,6 +1438,126 @@ RppStatus rppt_transpose_host(RppPtr_t srcPtr, return RPP_SUCCESS; } +RppStatus rppt_warp_perspective_host(RppPtr_t srcPtr, + RpptDescPtr srcDescPtr, + RppPtr_t dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptInterpolationType interpolationType, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + rppHandle_t rppHandle) +{ + if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR)) + return RPP_ERROR_NOT_IMPLEMENTED; + + RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c); + + if(interpolationType == RpptInterpolationType::NEAREST_NEIGHBOR) + { + if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) + { + warp_perspective_nn_u8_u8_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + perspectiveTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) + { + warp_perspective_nn_f32_f32_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + perspectiveTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) + { + warp_perspective_nn_i8_i8_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + perspectiveTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) + { + warp_perspective_nn_f16_f16_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + perspectiveTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + } + else if(interpolationType == RpptInterpolationType::BILINEAR) + { + if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) + { + warp_perspective_bilinear_u8_u8_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + perspectiveTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) + { + warp_perspective_bilinear_f32_f32_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + perspectiveTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) + { + warp_perspective_bilinear_i8_i8_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + perspectiveTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) + { + warp_perspective_bilinear_f16_f16_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + perspectiveTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + } + return RPP_SUCCESS; +} + /********************************************************************************************************************/ /*********************************************** RPP_GPU_SUPPORT = ON ***********************************************/ /********************************************************************************************************************/ @@ -1667,6 +1787,75 @@ RppStatus rppt_warp_affine_gpu(RppPtr_t srcPtr, #endif // backend } +RppStatus rppt_warp_perspective_gpu(RppPtr_t srcPtr, + RpptDescPtr srcDescPtr, + RppPtr_t dstPtr, + RpptDescPtr dstDescPtr, + Rpp32f *perspectiveTensor, + RpptInterpolationType interpolationType, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + rppHandle_t rppHandle) +{ +#ifdef HIP_COMPILE + if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR)) + return RPP_ERROR_NOT_IMPLEMENTED; + + if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) + { + hip_exec_warp_perspective_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + perspectiveTensor, + interpolationType, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) + { + hip_exec_warp_perspective_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + perspectiveTensor, + interpolationType, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) + { + hip_exec_warp_perspective_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + perspectiveTensor, + interpolationType, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) + { + hip_exec_warp_perspective_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + perspectiveTensor, + interpolationType, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + + return RPP_SUCCESS; +#elif defined(OCL_COMPILE) + return RPP_ERROR_NOT_IMPLEMENTED; +#endif // backend +} + /******************** flip ********************/ RppStatus rppt_flip_gpu(RppPtr_t srcPtr, diff --git a/utilities/examples/brightness/CMakeLists.txt b/utilities/examples/brightness/CMakeLists.txt index 4c23da688..20367aba4 100644 --- a/utilities/examples/brightness/CMakeLists.txt +++ b/utilities/examples/brightness/CMakeLists.txt @@ -42,7 +42,7 @@ if(NOT WIN32) set(White "${Esc}[37m") endif() -find_package(hip QUIET) +find_package(HIP QUIET) find_package(OpenCV QUIET) find_package(TurboJpeg QUIET) @@ -59,7 +59,7 @@ else() message("-- ${Yellow}Error: TurboJpeg must be installed to install ${PROJECT_NAME} successfully!${ColourReset}") endif() -if (hip_FOUND AND OpenCV_FOUND) +if (HIP_FOUND AND OpenCV_FOUND) message("-- ${Green}${PROJECT_NAME} set to build with rpp, hip and OpenCV${ColourReset}") set(COMPILER_FOR_HIP ${ROCM_PATH}/bin/amdclang++) set(CMAKE_CXX_COMPILER ${COMPILER_FOR_HIP}) diff --git a/utilities/rpp-performancetests/HIP_NEW/CMakeLists.txt b/utilities/rpp-performancetests/HIP_NEW/CMakeLists.txt index cdf861914..57c81ebd9 100644 --- a/utilities/rpp-performancetests/HIP_NEW/CMakeLists.txt +++ b/utilities/rpp-performancetests/HIP_NEW/CMakeLists.txt @@ -42,7 +42,7 @@ if(NOT WIN32) set(White "${Esc}[37m") endif() -find_package(hip QUIET) +find_package(HIP QUIET) find_package(OpenCV QUIET) # OpenMP @@ -50,7 +50,7 @@ find_package(OpenMP REQUIRED) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -if (hip_FOUND AND OpenCV_FOUND) +if(HIP_FOUND AND OpenCV_FOUND) message("-- ${Green}${PROJECT_NAME} set to build with rpp, hip and OpenCV${ColourReset}") set(COMPILER_FOR_HIP ${ROCM_PATH}/bin/amdclang++) set(CMAKE_CXX_COMPILER ${COMPILER_FOR_HIP}) diff --git a/utilities/rpp-unittests/HIP_NEW/CMakeLists.txt b/utilities/rpp-unittests/HIP_NEW/CMakeLists.txt index 18255dc2f..ecb581283 100644 --- a/utilities/rpp-unittests/HIP_NEW/CMakeLists.txt +++ b/utilities/rpp-unittests/HIP_NEW/CMakeLists.txt @@ -44,7 +44,7 @@ if(NOT WIN32) set(White "${Esc}[37m") endif() -find_package(hip QUIET) +find_package(HIP QUIET) find_package(OpenCV QUIET) # OpenMP @@ -52,7 +52,7 @@ find_package(OpenMP REQUIRED) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -if (hip_FOUND AND OpenCV_FOUND) +if(HIP_FOUND AND OpenCV_FOUND) message("-- ${Green}${PROJECT_NAME} set to build with rpp, hip and OpenCV${ColourReset}") set(COMPILER_FOR_HIP ${ROCM_PATH}/bin/amdclang++) set(CMAKE_CXX_COMPILER ${COMPILER_FOR_HIP}) diff --git a/utilities/test_suite/CMakeLists.txt b/utilities/test_suite/CMakeLists.txt index 708cd7c3a..9a9fc90d1 100644 --- a/utilities/test_suite/CMakeLists.txt +++ b/utilities/test_suite/CMakeLists.txt @@ -23,20 +23,27 @@ SOFTWARE. ]] cmake_minimum_required(VERSION 3.10) -project(rpp-test) - -# make test with CTest -enable_testing() -include(CTest) # ROCM Path if(DEFINED ENV{ROCM_PATH}) set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path") elseif(ROCM_PATH) - message("-- ${PROJECT_NAME} INFO:ROCM_PATH Set -- ${ROCM_PATH}") + message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}") else() set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path") endif() +# Set AMD Clang as default compiler +if(NOT DEFINED CMAKE_CXX_COMPILER AND EXISTS "${ROCM_PATH}/bin/amdclang++") + set(CMAKE_C_COMPILER ${ROCM_PATH}/bin/amdclang) + set(CMAKE_CXX_COMPILER ${ROCM_PATH}/bin/amdclang++) +elseif(NOT DEFINED CMAKE_CXX_COMPILER AND NOT EXISTS "${ROCM_PATH}/bin/amdclang++") + set(CMAKE_CXX_COMPILER clang++) +endif() + +project(rpp-test) +# make test with CTest +enable_testing() +include(CTest) # Set message options if(NOT WIN32) @@ -66,6 +73,8 @@ List of high level dependency checks - for RPP QA tests in utilities/test_suite/ - OpenCV (For IMAGE and VOXEL tests - to decode image inputs and write images for unit test outputs) - TurboJPEG (For IMAGE tests - to decode image inputs) ]] +# add find modules +list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/share/rpp/test/cmake) # find RPP find_library(RPP_LIBRARY NAMES rpp HINTS ${ROCM_PATH}/lib) @@ -91,6 +100,7 @@ else() elseif(NOT DEFINED BACKEND) set(BACKEND "CPU") endif() + message("-- ${White}${PROJECT_NAME}: Using RPP Backend: ${BACKEND}${ColourReset}") # RPP installation - Audio support check set(RPP_AUDIO_AUGMENTATIONS_SUPPORT_FOUND 0) @@ -123,7 +133,7 @@ else() if(Python3_FOUND) if (PANDAS_FOUND EQUAL 0) if(OpenMP_FOUND) - + message("-- ${White}${PROJECT_NAME}: Adding RPP HOST tests${ColourReset}") # HOST test set 1 - rpp_qa_tests_tensor_host_all - HOST Tensor-Image QA PASS/FAIL tests if(TurboJpeg_FOUND) if(OpenCV_FOUND) @@ -177,9 +187,17 @@ else() ) if( "${BACKEND}" STREQUAL "HIP") - find_package(hip QUIET) - if(hip_FOUND) - + if(NOT DEFINED HIP_PATH) + if(NOT DEFINED ENV{HIP_PATH}) + set(HIP_PATH ${ROCM_PATH} CACHE PATH "Path to which HIP has been installed") + else() + set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") + endif() + endif() + list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip) + find_package(HIP QUIET) + if(HIP_FOUND) + message("-- ${White}${PROJECT_NAME}: Adding RPP HIP tests${ColourReset}") # HIP test set 1 - rpp_qa_tests_tensor_hip_all - HIP Tensor-Image QA PASS/FAIL tests if(TurboJpeg_FOUND) if(OpenCV_FOUND) @@ -232,7 +250,7 @@ else() WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - endif(hip_FOUND) + endif(HIP_FOUND) elseif( "${BACKEND}" STREQUAL "OCL") # TBD: Add OCL Tests message("-- ${Yellow}${PROJECT_NAME} Warning: OpenCL tests not enabled${ColourReset}") diff --git a/utilities/test_suite/HIP/CMakeLists.txt b/utilities/test_suite/HIP/CMakeLists.txt index 084f0981e..2ffe377e6 100644 --- a/utilities/test_suite/HIP/CMakeLists.txt +++ b/utilities/test_suite/HIP/CMakeLists.txt @@ -23,7 +23,6 @@ SOFTWARE. ]] cmake_minimum_required(VERSION 3.10) -project(test_suite/HIP) # ROCM Path if(DEFINED ENV{ROCM_PATH}) @@ -33,6 +32,15 @@ elseif(ROCM_PATH) else() set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path") endif() +# Set AMD Clang as default compiler +if(NOT DEFINED CMAKE_CXX_COMPILER AND EXISTS "${ROCM_PATH}/bin/amdclang++") + set(CMAKE_C_COMPILER ${ROCM_PATH}/bin/amdclang) + set(CMAKE_CXX_COMPILER ${ROCM_PATH}/bin/amdclang++) +elseif(NOT DEFINED CMAKE_CXX_COMPILER AND NOT EXISTS "${ROCM_PATH}/bin/amdclang++") + set(CMAKE_CXX_COMPILER clang++) +endif() + +project(test_suite/HIP) list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/share/rpp/test/cmake) list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}) @@ -105,8 +113,11 @@ else() set(RPP_AUDIO_AUGMENTATIONS_SUPPORT_FOUND ${CMAKE_MATCH_1}) endif() + # add find modules + list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/share/rpp/test/cmake) + # find required libraries - find_package(hip QUIET) + find_package(HIP QUIET) set(Python3_FIND_VIRTUALENV FIRST) find_package(Python3 QUIET) find_package(NIFTI QUIET) @@ -117,6 +128,7 @@ else() find_package(OpenMP QUIET) find_package(OpenCV QUIET) find_package(TurboJpeg QUIET) + find_package(StdFilesystem QUIET) # find required python3-pip imports execute_process( @@ -132,7 +144,7 @@ else() # ERROR_QUIET # added only on HOST # ) - if(hip_FOUND) + if(HIP_FOUND) message("-- ${Green}HIP found at hip_INCLUDE_DIRS - ${hip_INCLUDE_DIRS} and hip_LIBRARIES - ${hip_LIBRARIES}${ColourReset}") if(Python3_FOUND) @@ -209,9 +221,6 @@ else() link_directories(${SndFile_LIBRARIES_DIR} /usr/local/lib/) message("-- ${Green}${PROJECT_NAME} Tensor-Audio HIP tests set to build with libsnd${ColourReset}") add_executable(Tensor_audio_hip Tensor_audio_hip.cpp) - if(NOT APPLE) - set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} stdc++fs) - endif(NOT APPLE) target_link_libraries(Tensor_audio_hip -lrpp pthread ${libsnd_LIBS} -lsndfile ${LINK_LIBRARY_LIST} ${hip_LIBRARIES}) else() message("-- ${Yellow}Warning: libsnd must be installed to install ${PROJECT_NAME} Tensor-Audio tests successfully!${ColourReset}") @@ -234,7 +243,7 @@ else() endif(Python3_FOUND) else() message("-- ${Red}${PROJECT_NAME} Error: ROCm HIP must be installed to run any RPP test_suite HIP tests successfully${ColourReset}") - endif(hip_FOUND) + endif(HIP_FOUND) else() message("-- ${Red}${PROJECT_NAME} is supported to build and run only with RPP HIP backend installation${ColourReset}") endif("${BACKEND}" STREQUAL "HIP") diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp index 7493fc289..415d06552 100644 --- a/utilities/test_suite/HIP/Tensor_hip.cpp +++ b/utilities/test_suite/HIP/Tensor_hip.cpp @@ -60,12 +60,12 @@ int main(int argc, char **argv) int decoderType = atoi(argv[13]); int batchSize = atoi(argv[14]); - bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23|| testCase == 24 || testCase == 40 || testCase == 41 || testCase == 49 || testCase == 54 || testCase == 79); + bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23|| testCase == 24 || testCase == 28 || testCase == 40 || testCase == 41 || testCase == 49 || testCase == 54 || testCase == 79); bool kernelSizeCase = (testCase == 40 || testCase == 41 || testCase == 49 || testCase == 54); bool dualInputCase = (testCase == 2 || testCase == 30 || testCase == 33 || testCase == 61 || testCase == 63 || testCase == 65 || testCase == 68); bool randomOutputCase = (testCase == 6 || testCase == 8 || testCase == 10 || testCase == 84 || testCase == 49 || testCase == 54); - bool nonQACase = (testCase == 24 || testCase == 54); - bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 79); + bool nonQACase = (testCase == 24 || testCase == 28 || testCase == 54); + bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24|| testCase == 28 || testCase == 79); bool reductionTypeCase = (testCase == 87 || testCase == 88 || testCase == 89 || testCase == 90 || testCase == 91); bool noiseTypeCase = (testCase == 8); bool pln1OutTypeCase = (testCase == 86); @@ -427,6 +427,10 @@ int main(int argc, char **argv) void *d_interDstPtr; if(testCase == 5) CHECK_RETURN_STATUS(hipHostMalloc(&d_interDstPtr, srcDescPtr->strides.nStride * srcDescPtr->n * sizeof(Rpp32f))); + + Rpp32f *perspectiveTensorPtr = NULL; + if(testCase == 28) + CHECK_RETURN_STATUS(hipHostMalloc(&perspectiveTensorPtr, batchSize * 9 * sizeof(Rpp32f))); // case-wise RPP API and measure time script for Unit and Performance test cout << "\nRunning " << func << " " << numRuns << " times (each time with a batch size of " << batchSize << " images) and computing mean statistics..."; @@ -821,6 +825,37 @@ int main(int argc, char **argv) break; } + case 28: + { + testCaseName = "warp_perspective"; + + if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR)) + { + missingFuncFlag = 1; + break; + } + + for (i = 0, j = 0; i < batchSize; i++, j += 9) + { + perspectiveTensorPtr[j + 0] = 0.93; + perspectiveTensorPtr[j + 1] = 0.5; + perspectiveTensorPtr[j + 2] = 0.0; + perspectiveTensorPtr[j + 3] = -0.5; + perspectiveTensorPtr[j + 4] = 0.93; + perspectiveTensorPtr[j + 5] = 0.0; + perspectiveTensorPtr[j + 6] = 0.005; + perspectiveTensorPtr[j + 7] = 0.005; + perspectiveTensorPtr[j + 8] = 1; + } + + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_warp_perspective_gpu(d_input, srcDescPtr, d_output, dstDescPtr, perspectiveTensorPtr, interpolationType, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; + } case 29: { testCaseName = "water"; @@ -1684,6 +1719,8 @@ int main(int argc, char **argv) } if(testCase == 35) CHECK_RETURN_STATUS(hipHostFree(rgbOffsets)); + if(perspectiveTensorPtr != NULL) + CHECK_RETURN_STATUS(hipHostFree(perspectiveTensorPtr)); if (reductionTypeCase) { CHECK_RETURN_STATUS(hipHostFree(reductionFuncResultArr)); diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index c916c8d6b..38d5eff66 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -75,10 +75,10 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) - elif case == "21" or case == "23" or case == "24" or case == "79": + elif case == "21" or case == "23" or case == "24" or case == "28" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 - if case =='79': + if case == '28' or case =='79': interpolationRange = 2 for interpolationType in range(interpolationRange): print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType)) @@ -271,7 +271,7 @@ def rpp_test_suite_parser_and_validator(): subprocess.call(["make", "-j16"], cwd=".") # nosec # List of cases supported -supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '10', '13', '20', '21', '23', '26', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '49', '54', '61', '63', '65', '68', '70', '79', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92'] +supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '10', '13', '20', '21', '23', '24', '26', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '49', '54', '61', '63', '65', '68', '70', '79', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92'] # Create folders based on testType and profilingOption if testType == 1 and profilingOption == "YES": @@ -368,7 +368,7 @@ def rpp_test_suite_parser_and_validator(): # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise for noiseType in range(3): run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, noiseType, "_noiseType", numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) - elif case == "21" or case == "23" or case == "24" or case == "79": + elif case == "21" or case == "23" or case == "24" or case == "28" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular for interpolationType in range(6): run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, interpolationType, "_interpolationType", numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) @@ -426,7 +426,7 @@ def rpp_test_suite_parser_and_validator(): fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter) if fileCheck == False: continue - elif (CASE_NUM == "24" or CASE_NUM == "21" or CASE_NUM == "23" or CASE_NUM == "79") and TYPE.startswith("Tensor"): + elif (CASE_NUM == "24" or CASE_NUM == "21" or CASE_NUM == "23" or CASE_NUM == "28" or CASE_NUM == "79") and TYPE.startswith("Tensor"): INTERPOLATIONTYPE_LIST = [0, 1, 2, 3, 4, 5] # Loop through extra param interpolationType for INTERPOLATIONTYPE in INTERPOLATIONTYPE_LIST: @@ -486,7 +486,7 @@ def rpp_test_suite_parser_and_validator(): print_performance_tests_summary(logFile, functionalityGroupList, numRuns) # print the results of qa tests -nonQACaseList = ['6', '8', '10', '24', '54', '84'] # Add cases present in supportedCaseList, but without QA support +nonQACaseList = ['6', '8', '10', '24', '28', '54', '84'] # Add cases present in supportedCaseList, but without QA support if qaMode and testType == 0: qaFilePath = os.path.join(outFilePath, "QA_results.txt") diff --git a/utilities/test_suite/HOST/CMakeLists.txt b/utilities/test_suite/HOST/CMakeLists.txt index c9becedd3..5ec9f8494 100644 --- a/utilities/test_suite/HOST/CMakeLists.txt +++ b/utilities/test_suite/HOST/CMakeLists.txt @@ -23,16 +23,24 @@ SOFTWARE. ]] cmake_minimum_required(VERSION 3.10) -project(test_suite/HOST) # ROCM Path if(DEFINED ENV{ROCM_PATH}) set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path") elseif(ROCM_PATH) - message("-- ${PROJECT_NAME} INFO:ROCM_PATH Set -- ${ROCM_PATH}") + message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}") else() set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path") endif() +# Set AMD Clang as default compiler +if(NOT DEFINED CMAKE_CXX_COMPILER AND EXISTS "${ROCM_PATH}/bin/amdclang++") + set(CMAKE_C_COMPILER ${ROCM_PATH}/bin/amdclang) + set(CMAKE_CXX_COMPILER ${ROCM_PATH}/bin/amdclang++) +elseif(NOT DEFINED CMAKE_CXX_COMPILER AND NOT EXISTS "${ROCM_PATH}/bin/amdclang++") + set(CMAKE_CXX_COMPILER clang++) +endif() + +project(test_suite/HOST) list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/share/rpp/test/cmake) list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}) @@ -104,6 +112,9 @@ else() set(RPP_AUDIO_AUGMENTATIONS_SUPPORT_FOUND ${CMAKE_MATCH_1}) endif() + # add find modules + list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/share/rpp/test/cmake) + # find required libraries set(Python3_FIND_VIRTUALENV FIRST) find_package(Python3 QUIET) @@ -115,6 +126,7 @@ else() find_package(OpenMP QUIET) find_package(OpenCV QUIET) find_package(TurboJpeg QUIET) + find_package(StdFilesystem QUIET) # find required python3-pip imports execute_process( @@ -208,9 +220,6 @@ else() link_directories(${SndFile_LIBRARIES_DIR} /usr/local/lib/) message("-- ${Green}${PROJECT_NAME} Tensor-Audio HOST tests set to build with libsnd${ColourReset}") add_executable(Tensor_audio_host Tensor_audio_host.cpp) - if(NOT APPLE) - set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} stdc++fs) - endif(NOT APPLE) target_link_libraries(Tensor_audio_host -lrpp pthread ${libsnd_LIBS} -lsndfile ${LINK_LIBRARY_LIST}) else() message("-- ${Yellow}Warning: libsnd must be installed to install ${PROJECT_NAME} Tensor-Audio tests successfully!${ColourReset}") diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index f2626302e..82743ff53 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -60,12 +60,12 @@ int main(int argc, char **argv) int decoderType = atoi(argv[13]); int batchSize = atoi(argv[14]); - bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23 || testCase == 24 || testCase == 49 || testCase ==54 || testCase == 79); + bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23 || testCase == 24 || testCase == 28 || testCase == 49 || testCase ==54 || testCase == 79); bool kernelSizeCase = (testCase == 49 || testCase == 54); bool dualInputCase = (testCase == 2 || testCase == 30 || testCase == 33 || testCase == 61 || testCase == 63 || testCase == 65 || testCase == 68); bool randomOutputCase = (testCase == 6 || testCase == 8 || testCase == 10 || testCase == 84); - bool nonQACase = (testCase == 24); - bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 79); + bool nonQACase = (testCase == 24 || testCase == 28); + bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 28 || testCase == 79); bool reductionTypeCase = (testCase == 87 || testCase == 88 || testCase == 89 || testCase == 90 || testCase == 91); bool noiseTypeCase = (testCase == 8); bool pln1OutTypeCase = (testCase == 86); @@ -785,6 +785,40 @@ int main(int argc, char **argv) break; } + case 28: + { + testCaseName = "warp_perspective"; + + if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR)) + { + missingFuncFlag = 1; + break; + } + + Rpp32f9 perspectiveTensor_f9[batchSize]; + Rpp32f *perspectiveTensor = reinterpret_cast(perspectiveTensor_f9); + for (i = 0; i < batchSize; i++) + { + perspectiveTensor_f9[i].data[0] = 0.93; + perspectiveTensor_f9[i].data[1] = 0.5; + perspectiveTensor_f9[i].data[2] = 0.0; + perspectiveTensor_f9[i].data[3] = -0.5; + perspectiveTensor_f9[i].data[4] = 0.93; + perspectiveTensor_f9[i].data[5] = 0.0; + perspectiveTensor_f9[i].data[6] = 0.005; + perspectiveTensor_f9[i].data[7] = 0.005; + perspectiveTensor_f9[i].data[8] = 1; + } + + startWallTime = omp_get_wtime(); + startCpuTime = clock(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_warp_perspective_host(input, srcDescPtr, output, dstDescPtr, perspectiveTensor, interpolationType, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; + } case 29: { testCaseName = "water"; diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index db550c488..375aa907e 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -74,10 +74,10 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) - elif case == "21" or case == "23" or case == "24" or case == "79": + elif case == "21" or case == "23" or case == "24" or case == "79" or case == "28": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular interpolationRange = 6 - if case =='79': + if case =='79' or case == "28": interpolationRange = 2 for interpolationType in range(interpolationRange): print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0") @@ -120,7 +120,7 @@ def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPa for noiseType in range(3): run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, noiseType, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) print("") - elif case == "21" or case == "23" or case == "24" or case == "79": + elif case == "21" or case == "23" or case == "24" or case == "28" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular for interpolationType in range(6): run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, interpolationType, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) @@ -259,7 +259,7 @@ def rpp_test_suite_parser_and_validator(): subprocess.call(["make", "-j16"], cwd=".") # nosec # List of cases supported -supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '10', '13', '20', '21', '23', '26', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '49', '54', '61', '63', '65', '68', '70', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92'] +supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '10', '13', '20', '21', '23', '24', '26', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '49', '54', '61', '63', '65', '68', '70', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92'] if testType == 0: noCaseSupported = all(case not in supportedCaseList for case in caseList) @@ -319,7 +319,7 @@ def rpp_test_suite_parser_and_validator(): run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) # print the results of qa tests -nonQACaseList = ['6', '8', '10', '24', '54', '84'] # Add cases present in supportedCaseList, but without QA support +nonQACaseList = ['6', '8', '10', '24', '28', '54', '84'] # Add cases present in supportedCaseList, but without QA support if qaMode and testType == 0: qaFilePath = os.path.join(outFilePath, "QA_results.txt") diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py index c0922db65..bcdd2b875 100644 --- a/utilities/test_suite/common.py +++ b/utilities/test_suite/common.py @@ -53,6 +53,7 @@ 23: ["rotate", "HOST", "HIP"], 24: ["warp_affine", "HOST", "HIP"], 26: ["lens_correction", "HOST", "HIP"], + 28: ["warp_perspective", "HOST", "HIP"], 29: ["water", "HOST", "HIP"], 30: ["non_linear_blend", "HOST", "HIP"], 31: ["color_cast", "HOST", "HIP"], @@ -119,7 +120,7 @@ ImageAugmentationGroupMap = { "color_augmentations" : [0, 1, 2, 3, 4, 13, 31, 34, 36, 45, 81], "effects_augmentations" : [5, 6, 8, 10, 29, 30, 32, 35, 46, 82, 83, 84], - "geometric_augmentations" : [20, 21, 23, 24, 26, 33, 37, 38, 39, 63, 79, 80, 92], + "geometric_augmentations" : [20, 21, 23, 24, 26, 28, 33, 37, 38, 39, 63, 79, 80, 92], "filter_augmentations" : [49, 54], "arithmetic_operations" : [61], "logical_operations" : [65, 68], diff --git a/utilities/test_suite/rpp_test_suite_image.h b/utilities/test_suite/rpp_test_suite_image.h index 28f3b651e..336f23ffa 100644 --- a/utilities/test_suite/rpp_test_suite_image.h +++ b/utilities/test_suite/rpp_test_suite_image.h @@ -78,6 +78,7 @@ std::map augmentationMap = {23, "rotate"}, {24, "warp_afffine"}, {26, "lens_correction"}, + {28, "warp_perspective"}, {29, "water"}, {30, "non_linear_blend"}, {31, "color_cast"}, From fde933ce8c7a72ba7da8a4f4712849d7640202cc Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Wed, 8 Jan 2025 14:16:10 +0000 Subject: [PATCH 12/17] Fix on error code display and gaussian Filter --- utilities/test_suite/HOST/runTests.py | 2 +- utilities/test_suite/rpp_test_suite_image.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 375aa907e..8b4e99b70 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -66,7 +66,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo if case == "49" or case == "54": for kernelSize in range(3, 10, 2): print(f"./Tensor_host {srcPath1} {srcPath2} {dstPathTemp} {bitDepth} {outputFormatToggle} {case} {kernelSize} 0 ") - result = subprocess.run([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) elif case == "8": # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise diff --git a/utilities/test_suite/rpp_test_suite_image.h b/utilities/test_suite/rpp_test_suite_image.h index 336f23ffa..277d12212 100644 --- a/utilities/test_suite/rpp_test_suite_image.h +++ b/utilities/test_suite/rpp_test_suite_image.h @@ -1028,7 +1028,7 @@ inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, R func += "_noiseType" + noiseTypeName; binFile += "_noiseType" + noiseTypeName; } - else if(testCase == 49) + else if(testCase == 49 || testCase == 54) { func += "_kernelSize" + std::to_string(additionalParam); binFile += "_kernelSize" + std::to_string(additionalParam); From ea9d991a62308a64ae17827994f287c61abf4c52 Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Fri, 10 Jan 2025 06:25:46 +0000 Subject: [PATCH 13/17] F string bug is resolved --- utilities/test_suite/HOST/runTests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 8b4e99b70..3b3b86864 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -65,7 +65,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo if case == "49" or case == "54": for kernelSize in range(3, 10, 2): - print(f"./Tensor_host {srcPath1} {srcPath2} {dstPathTemp} {bitDepth} {outputFormatToggle} {case} {kernelSize} 0 ") + print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) elif case == "8": From 55c8ddddad99fbac05c04d8af7ee7a4f502ad53e Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Thu, 16 Jan 2025 06:04:31 +0000 Subject: [PATCH 14/17] Fix for CI failure and some improvement in error detection --- include/rppdefs.h | 4 +- utilities/test_suite/HIP/Tensor_image_hip.cpp | 2 +- utilities/test_suite/HIP/runImageTests.py | 28 +++++++------- .../test_suite/HOST/Tensor_image_host.cpp | 2 +- utilities/test_suite/HOST/runImageTests.py | 24 ++++++------ utilities/test_suite/common.py | 38 ++++++++++++++++++- 6 files changed, 68 insertions(+), 30 deletions(-) diff --git a/include/rppdefs.h b/include/rppdefs.h index 6bbee49ea..0ce632b65 100644 --- a/include/rppdefs.h +++ b/include/rppdefs.h @@ -164,7 +164,9 @@ typedef enum /*! \brief Number of src dims is invalid. (Needs to adhere to function specification.) \ingroup group_rppdefs */ RPP_ERROR_INVALID_SRC_DIMS = -23, /*! \brief Number of dst dims is invalid. (Needs to adhere to function specification.) \ingroup group_rppdefs */ - RPP_ERROR_INVALID_DST_DIMS = -24 + RPP_ERROR_INVALID_DST_DIMS = -24, + /*! \brief Cpp API functionality is not implemented. (Needs to adhere to function specification.) \ingroup group_rppdefs */ + RPP_ERROR_CPP_API_NOT_IMPLEMENTED = -25 } RppStatus; /*! \brief RPP rppStatus_t type enums diff --git a/utilities/test_suite/HIP/Tensor_image_hip.cpp b/utilities/test_suite/HIP/Tensor_image_hip.cpp index 1f50908ba..a83f7660a 100644 --- a/utilities/test_suite/HIP/Tensor_image_hip.cpp +++ b/utilities/test_suite/HIP/Tensor_image_hip.cpp @@ -1582,7 +1582,7 @@ int main(int argc, char **argv) if (missingFuncFlag == 1) { cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n"; - return -1; + return RPP_ERROR_CPP_API_NOT_IMPLEMENTED; } maxWallTime = max(maxWallTime, wallTime); diff --git a/utilities/test_suite/HIP/runImageTests.py b/utilities/test_suite/HIP/runImageTests.py index 9bdb31e61..b1e2c653f 100644 --- a/utilities/test_suite/HIP/runImageTests.py +++ b/utilities/test_suite/HIP/runImageTests.py @@ -66,14 +66,14 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo if case == "40" or case == "41" or case == "49" or case == "54": for kernelSize in range(3, 10, 2): - print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + print("./Tensor_image_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize)) + result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) elif case == "8": # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise for noiseType in range(3): - print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + print("./Tensor_image_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType)) + result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) elif case == "21" or case == "23" or case == "24" or case == "28" or case == "79": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular @@ -81,19 +81,19 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo if case == '28' or case =='79': interpolationRange = 2 for interpolationType in range(interpolationRange): - print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + print("./Tensor_image_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType)) + result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) else: - print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout)) - result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + print("./Tensor_image_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout)) + result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) print("------------------------------------------------------------------------------------------") def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): - with open(loggingFolder + "/Tensor_hip_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: - print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam)) - process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + with open(loggingFolder + "/Tensor_image_hip_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: + print("./Tensor_image_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam)) + process = subprocess.Popen([buildFolderPath + "/build/Tensor_image_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP")) @@ -492,11 +492,11 @@ def rpp_test_suite_parser_and_validator(): qaFilePath = os.path.join(outFilePath, "QA_results.txt") checkFile = os.path.isfile(qaFilePath) if checkFile: - print("---------------------------------- Results of QA Test - Tensor_hip ----------------------------------\n") - print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_hip") + print("---------------------------------- Results of QA Test - Tensor_image_hip ----------------------------------\n") + print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_image_hip") if errorLog: - print("\n---------------------------------- Error log - Tensor_hip ----------------------------------\n") + print("\n---------------------------------- Error log - Tensor_image_hip ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") diff --git a/utilities/test_suite/HOST/Tensor_image_host.cpp b/utilities/test_suite/HOST/Tensor_image_host.cpp index 8914e576c..1f00dff7a 100644 --- a/utilities/test_suite/HOST/Tensor_image_host.cpp +++ b/utilities/test_suite/HOST/Tensor_image_host.cpp @@ -1618,7 +1618,7 @@ int main(int argc, char **argv) if (missingFuncFlag == 1) { cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n"; - return -1; + return RPP_ERROR_CPP_API_NOT_IMPLEMENTED; } maxWallTime = std::max(maxWallTime, wallTime); diff --git a/utilities/test_suite/HOST/runImageTests.py b/utilities/test_suite/HOST/runImageTests.py index f2830398d..083c70b0a 100644 --- a/utilities/test_suite/HOST/runImageTests.py +++ b/utilities/test_suite/HOST/runImageTests.py @@ -65,14 +65,14 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo if case == "49" or case == "54": for kernelSize in range(3, 10, 2): - print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize) + " 0") - result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + print("./Tensor_image_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize) + " 0") + result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) elif case == "8": # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise for noiseType in range(3): - print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0") - result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + print("./Tensor_image_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0") + result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) elif case == "21" or case == "23" or case == "24" or case == "79" or case == "28": # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular @@ -80,12 +80,12 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo if case =='79' or case == "28": interpolationRange = 2 for interpolationType in range(interpolationRange): - print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0") - result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + print("./Tensor_image_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0") + result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) else: - print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0") - result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + print("./Tensor_image_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0") + result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) print("------------------------------------------------------------------------------------------") @@ -96,9 +96,9 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) - with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: - logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n") - process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec + with open(loggingFolder + "/Tensor_image_host" + logFileLayout + "_raw_performance_log.txt", "a") as logFile: + logFile.write("./Tensor_image_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n") + process = subprocess.Popen([buildFolderPath + "/build/Tensor_image_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec read_from_subprocess_and_write_to_log(process, logFile) log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST")) @@ -488,7 +488,7 @@ def rpp_test_suite_parser_and_validator(): print_performance_tests_summary(logFile, functionalityGroupList, numRuns) if errorLog: - print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n") + print("\n---------------------------------- Error log - Tensor_image_host ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py index e3c123a74..eda494e8a 100644 --- a/utilities/test_suite/common.py +++ b/utilities/test_suite/common.py @@ -130,6 +130,35 @@ "statistical_operations" : [15, 87, 88, 89, 90, 91] } +StatusMap = { + 0: "RPP_SUCCESS", + -1: "RPP_ERROR", + -2: "RPP_ERROR_INVALID_ARGUMENTS", + -3: "RPP_ERROR_LOW_OFFSET", + -4: "RPP_ERROR_ZERO_DIVISION", + -5: "RPP_ERROR_HIGH_SRC_DIMENSION", + -6: "RPP_ERROR_NOT_IMPLEMENTED", + -7: "RPP_ERROR_INVALID_SRC_CHANNELS", + -8: "RPP_ERROR_INVALID_DST_CHANNELS", + -9: "RPP_ERROR_INVALID_SRC_LAYOUT", + -10: "RPP_ERROR_INVALID_DST_LAYOUT", + -11: "RPP_ERROR_INVALID_SRC_DATATYPE", + -12: "RPP_ERROR_INVALID_DST_DATATYPE", + -13: "RPP_ERROR_INVALID_SRC_OR_DST_DATATYPE", + -14: "RPP_ERROR_INSUFFICIENT_DST_BUFFER_LENGTH", + -15: "RPP_ERROR_INVALID_PARAMETER_DATATYPE", + -16: "RPP_ERROR_NOT_ENOUGH_MEMORY", + -17: "RPP_ERROR_OUT_OF_BOUND_SRC_ROI", + -18: "RPP_ERROR_LAYOUT_MISMATCH", + -19: "RPP_ERROR_INVALID_CHANNELS", + -20: "RPP_ERROR_INVALID_OUTPUT_TILE_LENGTH", + -21: "RPP_ERROR_OUT_OF_BOUND_SHARED_MEMORY_SIZE", + -22: "RPP_ERROR_OUT_OF_BOUND_SCRATCH_MEMORY_SIZE", + -23: "RPP_ERROR_INVALID_SRC_DIMS", + -24: "RPP_ERROR_INVALID_DST_DIMS", + -25: "RPP_ERROR_CPP_API_NOT_IMPLEMENTED", +} + # Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents def validate_and_remove_files(path): if not path: # check if a string is empty @@ -458,6 +487,10 @@ def get_signal_name_from_return_code(returnCode): signalName = signame break result = result + signalName + elif( returnCode > 127): + signalNum = returnCode - 256 + if signalNum in StatusMap.keys(): + result = result + " Error = " +StatusMap[signalNum] return result def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificName): @@ -465,6 +498,9 @@ def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificN print(stdoutData.decode()) exitCode = result.returncode if(exitCode != 0): - errorData = "Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(); + if exitCode > 127: + errorData = "Returned non-zero exit status : "+ str(exitCode - 256) + " " + stderrData.decode() + else: + errorData = "Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode() msg = caseName + functionBitDepth + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + get_signal_name_from_return_code(exitCode) errorLog.append(msg) From 0566127be7dc4f0370579af56a48f2cf51a5b121 Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Thu, 16 Jan 2025 07:33:41 +0000 Subject: [PATCH 15/17] Changes based on review comments --- include/rppdefs.h | 4 +--- utilities/test_suite/HIP/Tensor_image_hip.cpp | 2 +- utilities/test_suite/HIP/runAudioTests.py | 2 +- utilities/test_suite/HIP/runImageTests.py | 2 +- utilities/test_suite/HIP/runMiscTests.py | 2 +- utilities/test_suite/HIP/runVoxelTests.py | 2 +- utilities/test_suite/HOST/Tensor_image_host.cpp | 2 +- utilities/test_suite/HOST/runAudioTests.py | 2 +- utilities/test_suite/HOST/runImageTests.py | 2 +- utilities/test_suite/HOST/runMiscTests.py | 2 +- utilities/test_suite/HOST/runVoxelTests.py | 2 +- utilities/test_suite/common.py | 15 +++++++-------- 12 files changed, 18 insertions(+), 21 deletions(-) diff --git a/include/rppdefs.h b/include/rppdefs.h index 0ce632b65..6bbee49ea 100644 --- a/include/rppdefs.h +++ b/include/rppdefs.h @@ -164,9 +164,7 @@ typedef enum /*! \brief Number of src dims is invalid. (Needs to adhere to function specification.) \ingroup group_rppdefs */ RPP_ERROR_INVALID_SRC_DIMS = -23, /*! \brief Number of dst dims is invalid. (Needs to adhere to function specification.) \ingroup group_rppdefs */ - RPP_ERROR_INVALID_DST_DIMS = -24, - /*! \brief Cpp API functionality is not implemented. (Needs to adhere to function specification.) \ingroup group_rppdefs */ - RPP_ERROR_CPP_API_NOT_IMPLEMENTED = -25 + RPP_ERROR_INVALID_DST_DIMS = -24 } RppStatus; /*! \brief RPP rppStatus_t type enums diff --git a/utilities/test_suite/HIP/Tensor_image_hip.cpp b/utilities/test_suite/HIP/Tensor_image_hip.cpp index a83f7660a..dbcc31005 100644 --- a/utilities/test_suite/HIP/Tensor_image_hip.cpp +++ b/utilities/test_suite/HIP/Tensor_image_hip.cpp @@ -1582,7 +1582,7 @@ int main(int argc, char **argv) if (missingFuncFlag == 1) { cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n"; - return RPP_ERROR_CPP_API_NOT_IMPLEMENTED; + return RPP_ERROR_NOT_IMPLEMENTED; } maxWallTime = max(maxWallTime, wallTime); diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py index e5792c6fe..f79b16091 100644 --- a/utilities/test_suite/HIP/runAudioTests.py +++ b/utilities/test_suite/HIP/runAudioTests.py @@ -301,7 +301,7 @@ def rpp_test_suite_parser_and_validator(): print("Unable to open results in " + CONSOLIDATED_FILE) if errorLog: - print("\n---------------------------------- Error log - Tensor_audio_hip ----------------------------------\n") + print("\n---------------------------------- Log of function variants requested but not run - Tensor_audio_hip ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") diff --git a/utilities/test_suite/HIP/runImageTests.py b/utilities/test_suite/HIP/runImageTests.py index b1e2c653f..71a99967a 100644 --- a/utilities/test_suite/HIP/runImageTests.py +++ b/utilities/test_suite/HIP/runImageTests.py @@ -496,7 +496,7 @@ def rpp_test_suite_parser_and_validator(): print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_image_hip") if errorLog: - print("\n---------------------------------- Error log - Tensor_image_hip ----------------------------------\n") + print("\n---------------------------------- Log of function variants requested but not run - Tensor_image_hip ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py index d31745bae..7d1886aa1 100644 --- a/utilities/test_suite/HIP/runMiscTests.py +++ b/utilities/test_suite/HIP/runMiscTests.py @@ -290,7 +290,7 @@ def rpp_test_suite_parser_and_validator(): print_performance_tests_summary(logFile, functionalityGroupList, numRuns) if errorLog: - print("\n---------------------------------- Error log - Tensor_misc_hip ----------------------------------\n") + print("\n---------------------------------- Log of function variants requested but not run - Tensor_misc_hip ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py index e0956e454..2466b76da 100644 --- a/utilities/test_suite/HIP/runVoxelTests.py +++ b/utilities/test_suite/HIP/runVoxelTests.py @@ -362,7 +362,7 @@ def rpp_test_suite_parser_and_validator(): print_performance_tests_summary(logFile, functionalityGroupList, numRuns) if errorLog: - print("\n---------------------------------- Error log - Tensor_voxel_hip ----------------------------------\n") + print("\n---------------------------------- Log of function variants requested but not run - Tensor_voxel_hip ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HOST/Tensor_image_host.cpp b/utilities/test_suite/HOST/Tensor_image_host.cpp index 1f00dff7a..64e89b7a2 100644 --- a/utilities/test_suite/HOST/Tensor_image_host.cpp +++ b/utilities/test_suite/HOST/Tensor_image_host.cpp @@ -1618,7 +1618,7 @@ int main(int argc, char **argv) if (missingFuncFlag == 1) { cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n"; - return RPP_ERROR_CPP_API_NOT_IMPLEMENTED; + return RPP_ERROR_NOT_IMPLEMENTED; } maxWallTime = std::max(maxWallTime, wallTime); diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py index 441615a15..df01c3822 100644 --- a/utilities/test_suite/HOST/runAudioTests.py +++ b/utilities/test_suite/HOST/runAudioTests.py @@ -212,7 +212,7 @@ def rpp_test_suite_parser_and_validator(): print_performance_tests_summary(log_file, "", numRuns) if errorLog: - print("\n---------------------------------- Error log - Tensor_audio_host ----------------------------------\n") + print("\n---------------------------------- Log of function variants requested but not run - Tensor_audio_host ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") diff --git a/utilities/test_suite/HOST/runImageTests.py b/utilities/test_suite/HOST/runImageTests.py index 083c70b0a..b78f8e761 100644 --- a/utilities/test_suite/HOST/runImageTests.py +++ b/utilities/test_suite/HOST/runImageTests.py @@ -488,7 +488,7 @@ def rpp_test_suite_parser_and_validator(): print_performance_tests_summary(logFile, functionalityGroupList, numRuns) if errorLog: - print("\n---------------------------------- Error log - Tensor_image_host ----------------------------------\n") + print("\n---------------------------------- Log of function variants requested but not run - Tensor_image_host ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py index e864f9912..d41400389 100644 --- a/utilities/test_suite/HOST/runMiscTests.py +++ b/utilities/test_suite/HOST/runMiscTests.py @@ -204,7 +204,7 @@ def rpp_test_suite_parser_and_validator(): print_performance_tests_summary(logFile, functionalityGroupList, numRuns) if errorLog: - print("\n---------------------------------- Error log - Tensor_misc_host ----------------------------------\n") + print("\n---------------------------------- Log of function variants requested but not run - Tensor_misc_host ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py index 87625bfe6..a8cebfec0 100644 --- a/utilities/test_suite/HOST/runVoxelTests.py +++ b/utilities/test_suite/HOST/runVoxelTests.py @@ -257,7 +257,7 @@ def rpp_test_suite_parser_and_validator(): print_performance_tests_summary(logFile, functionalityGroupList, numRuns) if errorLog: - print("\n---------------------------------- Error log - Tensor_voxel_host ----------------------------------\n") + print("\n---------------------------------- Log of function variants requested but not run - Tensor_voxel_host ----------------------------------\n") for error in errorLog: print(error) print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py index eda494e8a..7ca0f12fc 100644 --- a/utilities/test_suite/common.py +++ b/utilities/test_suite/common.py @@ -156,7 +156,6 @@ -22: "RPP_ERROR_OUT_OF_BOUND_SCRATCH_MEMORY_SIZE", -23: "RPP_ERROR_INVALID_SRC_DIMS", -24: "RPP_ERROR_INVALID_DST_DIMS", - -25: "RPP_ERROR_CPP_API_NOT_IMPLEMENTED", } # Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents @@ -481,16 +480,16 @@ def get_signal_name_from_return_code(returnCode): result = "" if returnCode < 0: signalNum = -returnCode - result = result + " Signal = " + result = result + " ( " for signame, signum in signal.__dict__.items(): if isinstance(signum, int) and signum == signalNum: signalName = signame break - result = result + signalName - elif( returnCode > 127): + result = result + signalName + " ) " + elif(returnCode > 127): signalNum = returnCode - 256 if signalNum in StatusMap.keys(): - result = result + " Error = " +StatusMap[signalNum] + result = result + " ( " + StatusMap[signalNum] + " ) " return result def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificName): @@ -499,8 +498,8 @@ def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificN exitCode = result.returncode if(exitCode != 0): if exitCode > 127: - errorData = "Returned non-zero exit status : "+ str(exitCode - 256) + " " + stderrData.decode() + errorData = "Returned non-zero exit status : " + str(exitCode - 256) + " " + stderrData.decode() else: - errorData = "Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode() - msg = caseName + functionBitDepth + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + get_signal_name_from_return_code(exitCode) + errorData = "Returned non-zero exit status : " + str(exitCode) + " " + stderrData.decode() + msg = caseName + functionBitDepth + functionSpecificName + " - " + errorData + get_signal_name_from_return_code(exitCode) errorLog.append(msg) From ff76911a1eb303786647d75adbbf419acbee0fcd Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Thu, 16 Jan 2025 09:46:32 +0000 Subject: [PATCH 16/17] Fix for rain and warp_perspective issue of unable to open file --- utilities/test_suite/HOST/Tensor_image_host.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utilities/test_suite/HOST/Tensor_image_host.cpp b/utilities/test_suite/HOST/Tensor_image_host.cpp index 64e89b7a2..b6f4d2e50 100644 --- a/utilities/test_suite/HOST/Tensor_image_host.cpp +++ b/utilities/test_suite/HOST/Tensor_image_host.cpp @@ -63,9 +63,9 @@ int main(int argc, char **argv) bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23 || testCase == 24 || testCase == 28 || testCase == 49 || testCase ==54 || testCase == 79); bool kernelSizeCase = (testCase == 49 || testCase == 54); bool dualInputCase = (testCase == 2 || testCase == 30 || testCase == 33 || testCase == 61 || testCase == 63 || testCase == 65 || testCase == 68); - bool randomOutputCase = (testCase == 6 || testCase == 8 || testCase == 10 || testCase == 84); - bool nonQACase = (testCase == 24); - bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 79); + bool randomOutputCase = (testCase == 6 || testCase == 8 || testCase == 10 || testCase == 11 || testCase == 84); + bool nonQACase = (testCase == 24 || testCase == 28); + bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 28 || testCase == 79); bool reductionTypeCase = (testCase == 87 || testCase == 88 || testCase == 89 || testCase == 90 || testCase == 91); bool noiseTypeCase = (testCase == 8); bool pln1OutTypeCase = (testCase == 86); From b00c63ae9efa9fefb5134c59a88c5c39bd3b9ccb Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Thu, 16 Jan 2025 10:20:26 +0000 Subject: [PATCH 17/17] Enhanced display for non implimented functionality --- utilities/test_suite/HIP/runAudioTests.py | 10 ++++++---- utilities/test_suite/HIP/runImageTests.py | 10 ++++++---- utilities/test_suite/HIP/runMiscTests.py | 10 ++++++---- utilities/test_suite/HIP/runVoxelTests.py | 10 ++++++---- utilities/test_suite/HOST/runAudioTests.py | 10 ++++++---- utilities/test_suite/HOST/runImageTests.py | 10 ++++++---- utilities/test_suite/HOST/runMiscTests.py | 10 ++++++---- utilities/test_suite/HOST/runVoxelTests.py | 10 ++++++---- utilities/test_suite/common.py | 13 ++++++++----- 9 files changed, 56 insertions(+), 37 deletions(-) diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py index f79b16091..8f8fba351 100644 --- a/utilities/test_suite/HIP/runAudioTests.py +++ b/utilities/test_suite/HIP/runAudioTests.py @@ -37,7 +37,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 7 -errorLog = [] +errorLog = [{"notExecutedFunctionality" : 0}] # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -300,8 +300,10 @@ def rpp_test_suite_parser_and_validator(): except IOError: print("Unable to open results in " + CONSOLIDATED_FILE) -if errorLog: +if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0: print("\n---------------------------------- Log of function variants requested but not run - Tensor_audio_hip ----------------------------------\n") - for error in errorLog: - print(error) + for i in range(1,len(errorLog)): + print(errorLog[i]) + if(errorLog[0]["notExecutedFunctionality"] != 0): + print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_audio_hip were not executed since these sub-variants are not currently supported in RPP.\n") print("-----------------------------------------------------------------------------------------------") diff --git a/utilities/test_suite/HIP/runImageTests.py b/utilities/test_suite/HIP/runImageTests.py index 71a99967a..a986a7e7b 100644 --- a/utilities/test_suite/HIP/runImageTests.py +++ b/utilities/test_suite/HIP/runImageTests.py @@ -41,7 +41,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 92 -errorLog = [] +errorLog = [{"notExecutedFunctionality" : 0}] # Get a list of log files based on a flag for preserving output def get_log_file_list(preserveOutput): @@ -495,8 +495,10 @@ def rpp_test_suite_parser_and_validator(): print("---------------------------------- Results of QA Test - Tensor_image_hip ----------------------------------\n") print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_image_hip") -if errorLog: +if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0: print("\n---------------------------------- Log of function variants requested but not run - Tensor_image_hip ----------------------------------\n") - for error in errorLog: - print(error) + for i in range(1,len(errorLog)): + print(errorLog[i]) + if(errorLog[0]["notExecutedFunctionality"] != 0): + print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_image_hip were not executed since these sub-variants are not currently supported in RPP.\n") print("-----------------------------------------------------------------------------------------------") diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py index 7d1886aa1..9f90a2de6 100644 --- a/utilities/test_suite/HIP/runMiscTests.py +++ b/utilities/test_suite/HIP/runMiscTests.py @@ -39,7 +39,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 2 -errorLog = [] +errorLog = [{"notExecutedFunctionality" : 0}] # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -289,8 +289,10 @@ def rpp_test_suite_parser_and_validator(): for logFile in logFileList: print_performance_tests_summary(logFile, functionalityGroupList, numRuns) -if errorLog: +if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0: print("\n---------------------------------- Log of function variants requested but not run - Tensor_misc_hip ----------------------------------\n") - for error in errorLog: - print(error) + for i in range(1,len(errorLog)): + print(errorLog[i]) + if(errorLog[0]["notExecutedFunctionality"] != 0): + print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_misc_hip were not executed since these sub-variants are not currently supported in RPP.\n") print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py index 2466b76da..e02990801 100644 --- a/utilities/test_suite/HIP/runVoxelTests.py +++ b/utilities/test_suite/HIP/runVoxelTests.py @@ -39,7 +39,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 6 -errorLog = [] +errorLog = [{"notExecutedFunctionality" : 0}] def get_log_file_list(preserveOutput): return [ @@ -361,8 +361,10 @@ def rpp_test_suite_parser_and_validator(): for logFile in logFileList: print_performance_tests_summary(logFile, functionalityGroupList, numRuns) -if errorLog: +if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0: print("\n---------------------------------- Log of function variants requested but not run - Tensor_voxel_hip ----------------------------------\n") - for error in errorLog: - print(error) + for i in range(1,len(errorLog)): + print(errorLog[i]) + if(errorLog[0]["notExecutedFunctionality"] != 0): + print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_voxel_hip were not executed since these sub-variants are not currently supported in RPP.\n") print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py index df01c3822..1466237c0 100644 --- a/utilities/test_suite/HOST/runAudioTests.py +++ b/utilities/test_suite/HOST/runAudioTests.py @@ -37,7 +37,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 7 -errorLog = [] +errorLog = [{"notExecutedFunctionality" : 0}] # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -211,9 +211,11 @@ def rpp_test_suite_parser_and_validator(): for log_file in log_file_list: print_performance_tests_summary(log_file, "", numRuns) -if errorLog: +if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0: print("\n---------------------------------- Log of function variants requested but not run - Tensor_audio_host ----------------------------------\n") - for error in errorLog: - print(error) + for i in range(1,len(errorLog)): + print(errorLog[i]) + if(errorLog[0]["notExecutedFunctionality"] != 0): + print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_audio_host were not executed since these sub-variants are not currently supported in RPP.\n") print("-----------------------------------------------------------------------------------------------") diff --git a/utilities/test_suite/HOST/runImageTests.py b/utilities/test_suite/HOST/runImageTests.py index b78f8e761..4cf5bc6de 100644 --- a/utilities/test_suite/HOST/runImageTests.py +++ b/utilities/test_suite/HOST/runImageTests.py @@ -41,7 +41,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 92 -errorLog = [] +errorLog = [{"notExecutedFunctionality" : 0}] # Get a list of log files based on a flag for preserving output def get_log_file_list(preserveOutput): @@ -487,8 +487,10 @@ def rpp_test_suite_parser_and_validator(): for logFile in logFileList: print_performance_tests_summary(logFile, functionalityGroupList, numRuns) -if errorLog: +if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0: print("\n---------------------------------- Log of function variants requested but not run - Tensor_image_host ----------------------------------\n") - for error in errorLog: - print(error) + for i in range(1,len(errorLog)): + print(errorLog[i]) + if(errorLog[0]["notExecutedFunctionality"] != 0): + print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_image_host were not executed since these sub-variants are not currently supported in RPP.\n") print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py index d41400389..3d5a383e1 100644 --- a/utilities/test_suite/HOST/runMiscTests.py +++ b/utilities/test_suite/HOST/runMiscTests.py @@ -39,7 +39,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 2 -errorLog = [] +errorLog = [{"notExecutedFunctionality" : 0}] # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -203,8 +203,10 @@ def rpp_test_suite_parser_and_validator(): for logFile in logFileList: print_performance_tests_summary(logFile, functionalityGroupList, numRuns) -if errorLog: +if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0: print("\n---------------------------------- Log of function variants requested but not run - Tensor_misc_host ----------------------------------\n") - for error in errorLog: - print(error) + for i in range(1,len(errorLog)): + print(errorLog[i]) + if(errorLog[0]["notExecutedFunctionality"] != 0): + print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_misc_host were not executed since these sub-variants are not currently supported in RPP.\n") print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py index a8cebfec0..54734abfa 100644 --- a/utilities/test_suite/HOST/runVoxelTests.py +++ b/utilities/test_suite/HOST/runVoxelTests.py @@ -39,7 +39,7 @@ buildFolderPath = os.getcwd() caseMin = 0 caseMax = 6 -errorLog = [] +errorLog = [{"notExecutedFunctionality" : 0}] # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -256,8 +256,10 @@ def rpp_test_suite_parser_and_validator(): for logFile in logFileList: print_performance_tests_summary(logFile, functionalityGroupList, numRuns) -if errorLog: +if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0: print("\n---------------------------------- Log of function variants requested but not run - Tensor_voxel_host ----------------------------------\n") - for error in errorLog: - print(error) + for i in range(1,len(errorLog)): + print(errorLog[i]) + if(errorLog[0]["notExecutedFunctionality"] != 0): + print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_voxel_host were not executed since these sub-variants are not currently supported in RPP.\n") print("-----------------------------------------------------------------------------------------------") \ No newline at end of file diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py index 7ca0f12fc..28590ae0b 100644 --- a/utilities/test_suite/common.py +++ b/utilities/test_suite/common.py @@ -497,9 +497,12 @@ def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificN print(stdoutData.decode()) exitCode = result.returncode if(exitCode != 0): - if exitCode > 127: - errorData = "Returned non-zero exit status : " + str(exitCode - 256) + " " + stderrData.decode() + if exitCode == 250: + errorLog[0]["notExecutedFunctionality"] += 1 else: - errorData = "Returned non-zero exit status : " + str(exitCode) + " " + stderrData.decode() - msg = caseName + functionBitDepth + functionSpecificName + " - " + errorData + get_signal_name_from_return_code(exitCode) - errorLog.append(msg) + if exitCode > 127: + errorData = "Returned non-zero exit status : " + str(exitCode - 256) + " " + stderrData.decode() + else: + errorData = "Returned non-zero exit status : " + str(exitCode) + " " + stderrData.decode() + msg = caseName + functionBitDepth + functionSpecificName + " - " + errorData + get_signal_name_from_return_code(exitCode) + errorLog.append(msg)