From 23682d86ca8542b86786a952b60405f7982abcaa Mon Sep 17 00:00:00 2001
From: sampath1117 <sampath.rachumallu@multicorewareinc.com>
Date: Wed, 4 Sep 2024 13:50:21 +0000
Subject: [PATCH 01/17] experimental changes to detect error and print in the 
 end in image test suite

---
 utilities/test_suite/HIP/runTests.py  | 24 ++++++++++++++++++------
 utilities/test_suite/HOST/runTests.py | 21 ++++++++++++++++++---
 utilities/test_suite/common.py        | 26 ++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index 8857e6ac5..14a03b64c 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -41,6 +41,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 92
+errorLog = []
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list(preserveOutput):
@@ -66,16 +67,18 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
             if case == "40" or case == "41" or case == "49" or case == "54":
                 for kernelSize in range(3, 10, 2):
                     print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize))
-                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE)    # nosec
+                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
+                    log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
                     print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType))
-                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE)    # nosec
+                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
+                    log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
@@ -83,22 +86,25 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     interpolationRange = 2
                 for interpolationType in range(interpolationRange):
                     print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType))
-                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE)    # nosec
+                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
+                    log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
             else:
                 print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout))
-                result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE)    # nosec
+                result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                 stdout_data, stderr_data = result.communicate()
                 print(stdout_data.decode())
-
+                log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
             print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     with open(loggingFolder + "/Tensor_hip_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
         print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam))
-        process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)  # nosec
+        process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec        
         read_from_subprocess_and_write_to_log(process, logFile)
+        _, stderr_data = process.communicate()
+        log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n")
@@ -486,3 +492,9 @@ def rpp_test_suite_parser_and_validator():
     if checkFile:
         print("---------------------------------- Results of QA Test - Tensor_hip ----------------------------------\n")
         print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_hip")
+
+if errorLog:
+    print("\n---------------------------------- Error log - Tensor_hip ----------------------------------\n")
+    for error in errorLog:
+        print(error)
+    print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 2f2d49d94..73c303b32 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -41,6 +41,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 92
+errorLog = []
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list(preserveOutput):
@@ -69,6 +70,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
+                    log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
@@ -79,24 +81,31 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
+                    log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
             else:
                 print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0")
-                result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE)    # nosec
+                result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                 stdout_data, stderr_data = result.communicate()
                 print(stdout_data.decode())
+                log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
 
             print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     if qaMode == 1:
         with open(loggingFolder + "/BatchPD_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
-            process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)    # nosec
+            process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
             read_from_subprocess_and_write_to_log(process, logFile)
+            log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+            _, stderr_data = process.communicate()
+            log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
 
     with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
         logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n")
-        process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)    # nosec
+        process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
+        _, stderr_data = process.communicate()
+        log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n")
@@ -473,3 +482,9 @@ def rpp_test_suite_parser_and_validator():
 
     for logFile in logFileList:
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
+
+if errorLog:
+    print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n")
+    for error in errorLog:
+        print(error)
+    print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
index e24ee73f6..30b877159 100644
--- a/utilities/test_suite/common.py
+++ b/utilities/test_suite/common.py
@@ -35,6 +35,8 @@
     # Python 2 compatibility
     FileExistsError = OSError
 
+bitDepthDict = {0 : "_u8_", 1 : "_f16_", 2 : "_f32_", 3: "_u8_f16", 4: "_u8_f32_", 5: "_i8_", 6: "_u8_i8_"}
+
 imageAugmentationMap = {
     0: ["brightness", "HOST", "HIP"],
     1: ["gamma_correction", "HOST", "HIP"],
@@ -388,3 +390,27 @@ def dataframe_to_markdown(df):
         md += '| ' + ' | '.join([str(value).ljust(column_widths[df.columns[j]]) for j, value in enumerate(row.values)]) + ' |\n'
 
     return md
+
+def get_image_layout_type(layout, outputFormatToggle, backend):
+    result = "Tensor_" + backend
+    if layout == 0:
+        result += "_PKD3"
+        if outputFormatToggle:
+            result += "_toPLN3"
+        else:
+            result += "_toPKD3"
+    elif layout == 1:
+        result += "_PLN3"
+        if outputFormatToggle:
+            result += "_toPKD3"
+        else:
+            result += "_toPLN3"
+    else:
+       result += "_PLN1"
+       result += "_toPLN1"
+    return result
+
+def log_detected_errors(errorData, errorLog, caseName, bitDepth, functionSpecificName):
+    if errorData.decode():
+        msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData.decode()
+        errorLog.append(msg)
\ No newline at end of file

From 2aabd274cb741f2f4855adbf4f4f924be9cf16c4 Mon Sep 17 00:00:00 2001
From: dineshbabu-ravichandran <dineshbabu.ravichandran@multicorewareinc.com>
Date: Mon, 30 Sep 2024 14:24:16 +0000
Subject: [PATCH 02/17] Error handling added for host side

---
 utilities/test_suite/HIP/runTests.py       | 44 +++++++++++++++--
 utilities/test_suite/HOST/runAudioTests.py | 25 +++++++++-
 utilities/test_suite/HOST/runMiscTests.py  | 25 +++++++++-
 utilities/test_suite/HOST/runTests.py      | 55 ++++++++++++++++++++--
 utilities/test_suite/HOST/runVoxelTests.py | 26 +++++++++-
 utilities/test_suite/common.py             | 40 ++++++++++++++--
 6 files changed, 196 insertions(+), 19 deletions(-)

diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index 14a03b64c..e4c73a9c6 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -24,6 +24,7 @@
 
 import os
 import sys
+import signal
 sys.dont_write_bytecode = True
 sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
 from common import *
@@ -70,7 +71,12 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
-                    log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                    exit_code = result.returncode
+                    if(exit_code != 0):
+                        if(exit_code < 0):
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                        else:
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
@@ -78,7 +84,12 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
-                    log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                    exit_code = result.returncode
+                    if(exit_code != 0):
+                        if(exit_code < 0):
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                        else:
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
@@ -89,13 +100,23 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
-                    log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                    exit_code = result.returncode
+                    if(exit_code != 0):
+                        if(exit_code < 0):
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                        else:
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
             else:
                 print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout))
                 result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                 stdout_data, stderr_data = result.communicate()
                 print(stdout_data.decode())
-                log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                exit_code = result.returncode
+                    if(exit_code != 0):
+                        if(exit_code < 0):
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                        else:
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
             print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
@@ -104,7 +125,12 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec        
         read_from_subprocess_and_write_to_log(process, logFile)
         _, stderr_data = process.communicate()
-        log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+        exit_code = result.returncode
+        if(exit_code != 0):
+            if(exit_code < 0):
+                log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+            else:
+                log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n")
@@ -148,6 +174,14 @@ def run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, s
             output_str = output.decode('utf-8')
             logFile.write(output_str)
 
+        stdout_data, stderr_data = result.communicate()
+        exit_code = result.returncode
+        if(exit_code != 0):
+            if(exit_code < 0):
+                log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+            else:
+                log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+
 # Parse and validate command-line arguments for the RPP test suite
 def rpp_test_suite_parser_and_validator():
     parser = argparse.ArgumentParser()
diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py
index 7e9305176..e62a3c306 100644
--- a/utilities/test_suite/HOST/runAudioTests.py
+++ b/utilities/test_suite/HOST/runAudioTests.py
@@ -24,6 +24,7 @@
 
 import os
 import sys
+import signal
 sys.dont_write_bytecode = True
 sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
 from common import *
@@ -37,6 +38,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 7
+errorLog = []
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -46,16 +48,29 @@ def get_log_file_list():
 
 def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath):
     print("\n./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
-    result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE)    # nosec
+    result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
     stdout_data, stderr_data = result.communicate()
+    exit_code = result.returncode
+    if(exit_code != 0):
+        if(exit_code < 0):
+            log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST")
+        else:
+            log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, audioAugmentationMapAugmentationMap[int(case)][0], "_HOST")
     print(stdout_data.decode())
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
     with open(loggingFolder + "/Tensor_audio_host_raw_performance_log.txt", "a") as logFile:
         logFile.write("./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize) + "\n")
-        process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)    # nosec
+        process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
+        stdout_data, stderr_data = process.communicate()
+        exit_code = process.returncode
+        if(exit_code != 0):
+            if(exit_code < 0):
+                log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST")
+            else:
+                log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST")
         print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
@@ -210,3 +225,9 @@ def rpp_test_suite_parser_and_validator():
     for log_file in log_file_list:
         print_performance_tests_summary(log_file, "", numRuns)
 
+if errorLog:
+    print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n")
+    for error in errorLog:
+        print(error)
+    print("-----------------------------------------------------------------------------------------------")
+
diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py
index d7425f287..ae16443e9 100644
--- a/utilities/test_suite/HOST/runMiscTests.py
+++ b/utilities/test_suite/HOST/runMiscTests.py
@@ -28,6 +28,7 @@
 import datetime
 import shutil
 import sys
+import signal
 sys.dont_write_bytecode = True
 sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
 from common import *
@@ -39,6 +40,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 2
+errorLog = []
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -48,16 +50,29 @@ def get_log_file_list():
 
 def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     print("\n./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg))
-    result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE)    # nosec
+    result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
+    exit_code = result.returncode
+    if(exit_code != 0):
+        if(exit_code < 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg))
+        else:
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, micsAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     with open(loggingFolder + "/Tensor_misc_host_raw_performance_log.txt", "a") as logFile:
         logFile.write("./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n")
-        process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)    # nosec
+        process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
+        stdout_data, stderr_data = process.communicate()
+        exit_code = process.returncode
+        if(exit_code != 0):
+            if(exit_code < 0):
+                log_detected_errors("Returned non- exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg))
+            else:
+                log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg))
 
 def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg = ""):
     if testType == 0:
@@ -201,3 +216,9 @@ def rpp_test_suite_parser_and_validator():
 
     for logFile in logFileList:
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
+
+if errorLog:
+    print("\n---------------------------------- Error log - Tensor_misc_host ----------------------------------\n")
+    for error in errorLog:
+        print(error)
+    print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 73c303b32..2f5bad89c 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -23,6 +23,7 @@
 """
 import os
 import sys
+import signal
 sys.dont_write_bytecode = True
 sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
 from common import *
@@ -70,7 +71,12 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
-                    log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                    exit_code = result.returncode
+                    if(exit_code != 0):
+                        if(exit_code < 0):
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                        else:
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
@@ -81,13 +87,42 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
-                    log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                    exit_code = result.returncode
+                    if(exit_code != 0):
+                        if(exit_code < 0):
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                        else:
+                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
             else:
                 print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0")
                 result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                 stdout_data, stderr_data = result.communicate()
                 print(stdout_data.decode())
-                log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                exit_code = result.returncode
+                if(exit_code != 0):
+                    if(exit_code < 0):
+                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                    else:
+                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+
+                    # result = subprocess.check_output([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath])    # nosec
+                    # print("Result :",result.decode('utf-8'))
+                # except subprocess.CalledProcessError as e:
+                #     if(e.returncode < 0):
+                #         # print(f"Error: Command '{e.cmd}' returned non-zero exit status {e.returncode}.Signal : Process died with signal: {signal.Signals(-e.returncode).name} ({-e.returncode})")
+                #         log_detected_errors("Returned non-zero exit status : {e.returncode}.: "+ str({signal.Signals(-e.returncode).name})+str({-e.returncode}), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                #     else :
+                #         # print(f"Error: Command '{e.cmd}' returned non-zero exit status {e.returncode}.")
+
+                # except FileNotFoundError as e:
+                #     print(f"Error: {e}")
+                #     log_detected_errors(e, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+
+                # stdout_data, stderr_data = result.communicate()
+                # print(stdout_data.decode())
+                # print("Error",stderr.decode())
+
+                # log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
 
             print("------------------------------------------------------------------------------------------")
 
@@ -98,14 +133,24 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
             read_from_subprocess_and_write_to_log(process, logFile)
             log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
             _, stderr_data = process.communicate()
-            log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+            exit_code = result.returncode
+            if(exit_code != 0):
+                if(exit_code < 0):
+                    log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                else:
+                    log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
 
     with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
         logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n")
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
         _, stderr_data = process.communicate()
-        log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+        exit_code = result.returncode
+        if(exit_code != 0):
+            if(exit_code < 0):
+                log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+            else:
+                log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n")
diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py
index 768f6bdb0..3e7829021 100644
--- a/utilities/test_suite/HOST/runVoxelTests.py
+++ b/utilities/test_suite/HOST/runVoxelTests.py
@@ -24,6 +24,7 @@
 
 import os
 import sys
+import signal
 sys.dont_write_bytecode = True
 sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
 from common import *
@@ -39,6 +40,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 6
+errorLog = []
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -59,15 +61,21 @@ def func_group_finder(case_number):
 
 def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     print("\n./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
-    result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE) # nosec
+    result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
+    exit_code = result.returncode
+    if(exit_code != 0):
+        if(exit_code < 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST"))
+        else:
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST"))
     print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     with open(loggingFolder + "/Tensor_voxel_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
         logFile.write("./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth) + "\n")
-        process = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec
+        process = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec
         while True:
             output = process.stdout.readline()
             if not output and process.poll() is not None:
@@ -82,6 +90,14 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
                 logFile.write(cleanedOutput + '\n')
                 if "max,min,avg wall times" in output:
                     logFile.write("\n")
+
+        stdout_data, stderr_data = process.communicate()
+        exit_code = process.returncode
+        if(exit_code != 0):
+            if(exit_code < 0):
+                log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST"))
+            else:
+                log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST"))
         print("\n------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
@@ -251,3 +267,9 @@ def rpp_test_suite_parser_and_validator():
 
     for logFile in logFileList:
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
+
+if errorLog:
+    print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n")
+    for error in errorLog:
+        print(error)
+    print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
index 30b877159..e322c59dd 100644
--- a/utilities/test_suite/common.py
+++ b/utilities/test_suite/common.py
@@ -410,7 +410,41 @@ def get_image_layout_type(layout, outputFormatToggle, backend):
        result += "_toPLN1"
     return result
 
+def get_misc_func_name(testCase, nDim, additionalArg):
+    axisMaskCase = 0
+    permOrderCase = 0
+    if testCase == 1:
+        axisMaskCase = 1
+    elif testCase == 0:
+        permOrderCase = 1
+    additionalParam = 1
+    if axisMaskCase or permOrderCase:
+        additionalParam = additionalArg
+    axisMask = additionalParam
+    permOrder = additionalParam
+    result = ""
+    if (axisMaskCase):
+        result = result + "_" + str(nDim) + "d" + "_axisMask" + str(axisMask)
+    if (permOrderCase):
+        result =result + "_" + str(nDim) + "d" + "_permOrder" + str(permOrder)
+    return result
+
+def get_voxel_layout_type(layout, backend):
+    result = "Tensor_" + backend
+    if layout == 0:
+        result += "_PKD3_toPKD3"
+    elif layout == 1:
+        result += "_PLN3_toPLN3"
+    else:
+       result += "_PLN1_toPLN1"
+    return result
+# def log_detected_errors(errorData, errorLog, caseName, bitDepth, functionSpecificName):
+#     if errorData.decode():
+#         msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData.decode()
+#         errorLog.append(msg)
 def log_detected_errors(errorData, errorLog, caseName, bitDepth, functionSpecificName):
-    if errorData.decode():
-        msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData.decode()
-        errorLog.append(msg)
\ No newline at end of file
+    msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData
+    errorLog.append(msg)
+def log_detected_errors(errorData, errorLog, caseName, functionSpecificName):
+    msg = caseName  + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData
+    errorLog.append(msg)
\ No newline at end of file

From 94abc0f8fee14ccf6c2322c9395430077248323b Mon Sep 17 00:00:00 2001
From: dineshbabu-ravichandran <dineshbabu.ravichandran@multicorewareinc.com>
Date: Tue, 1 Oct 2024 04:13:25 -0700
Subject: [PATCH 03/17] Error detection implemented on HIP SIDE

---
 utilities/test_suite/HIP/runAudioTests.py  | 26 +++++++++--
 utilities/test_suite/HIP/runMiscTests.py   | 24 +++++++++--
 utilities/test_suite/HIP/runTests.py       | 39 +++++------------
 utilities/test_suite/HIP/runVoxelTests.py  | 23 +++++++++-
 utilities/test_suite/HOST/runAudioTests.py | 15 ++-----
 utilities/test_suite/HOST/runMiscTests.py  | 11 +----
 utilities/test_suite/HOST/runTests.py      | 50 +++-------------------
 utilities/test_suite/HOST/runVoxelTests.py | 14 ++----
 utilities/test_suite/common.py             | 29 +++++++++----
 9 files changed, 111 insertions(+), 120 deletions(-)

diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py
index 408f7b683..2f17a1a4a 100644
--- a/utilities/test_suite/HIP/runAudioTests.py
+++ b/utilities/test_suite/HIP/runAudioTests.py
@@ -37,7 +37,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 6
-
+errorLog = []
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -74,16 +74,23 @@ def generate_performance_reports(RESULTS_DIR):
 
 def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath):
     print("\n./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
-    result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE)    # nosec
+    result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
+    exit_code = result.returncode
+    if(exit_code != 0):
+        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
     with open(loggingFolder + "/Tensor_audio_hip_raw_performance_log.txt", "a") as logFile:
         print("./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
-        process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)    # nosec
+        process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
+        stdout_data, stderr_data = process.communicate()
+        exit_code = process.returncode
+        if(exit_code != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " "+ stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code))
         print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
@@ -91,7 +98,7 @@ def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns
         os.mkdir(outFilePath + "/case_" + case)
     with open(loggingFolder + "/Tensor_audio_hip_raw_performance_log.txt", "a") as logFile:
         print("\nrocprof --basenames on --timestamp on --stats -o " + outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv ./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
-        process = subprocess.Popen([ 'rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + case + "/output_case" + case + ".csv", "./Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)  # nosec
+        process = subprocess.Popen([ 'rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + case + "/output_case" + case + ".csv", "./Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec
         while True:
             output = process.stdout.readline()
             if not output and process.poll() is not None:
@@ -99,6 +106,11 @@ def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns
             print(output.strip())
             output_str = output.decode('utf-8')
             logFile.write(output_str)
+        
+        stdout_data, stderr_data = process.communicate()
+        exit_code = process.returncode
+        if(exit_code != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " "+ stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code))
         print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath, profilingOption = "NO"):
@@ -296,3 +308,9 @@ def rpp_test_suite_parser_and_validator():
                 CONSOLIDATED_FILE + "\n")
     except IOError:
         print("Unable to open results in " + CONSOLIDATED_FILE)
+
+if errorLog:
+    print("\n---------------------------------- Error log - Tensor_audio_hip ----------------------------------\n")
+    for error in errorLog:
+        print(error)
+    print("-----------------------------------------------------------------------------------------------")
diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py
index ad1e3bf54..3d397c8f7 100644
--- a/utilities/test_suite/HIP/runMiscTests.py
+++ b/utilities/test_suite/HIP/runMiscTests.py
@@ -39,6 +39,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 2
+errorLog = []
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -75,16 +76,23 @@ def generate_performance_reports(RESULTS_DIR):
 
 def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     print("\n./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg))
-    result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE)    # nosec
+    result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
+    exit_code = result.returncode
+    if(exit_code != 0):
+        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     with open(loggingFolder + "/Tensor_misc_hip_raw_performance_log.txt", "a") as logFile:
         logFile.write("./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n")
-        process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)    # nosec
+        process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
+        stdout_data, stderr_data = process.communicate()
+        exit_code = process.returncode
+        if(exit_code != 0):
+            log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code))
 
 def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     if not os.path.exists(outFilePath + "/case_" + str(case)):
@@ -92,8 +100,12 @@ def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns
 
     with open(loggingFolder + "/Tensor_misc_hip_raw_performance_log.txt", "a") as logFile:
         logFile.write("\nrocprof --basenames on --timestamp on --stats -o " + outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv ./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n")
-        process = subprocess.Popen(['rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv", "./Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)  # nosec
+        process = subprocess.Popen(['rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv", "./Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
+        stdout_data, stderr_data = process.communicate()
+        exit_code = process.returncode
+        if(exit_code != 0):
+            log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code))
     print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg, profilingOption = 'NO'):
@@ -286,3 +298,9 @@ def rpp_test_suite_parser_and_validator():
 
     for logFile in logFileList:
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
+
+if errorLog:
+    print("\n---------------------------------- Error log - Tensor_misc_hip ----------------------------------\n")
+    for error in errorLog:
+        print(error)
+    print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index e4c73a9c6..43bcc1f1e 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -24,7 +24,6 @@
 
 import os
 import sys
-import signal
 sys.dont_write_bytecode = True
 sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
 from common import *
@@ -73,10 +72,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     print(stdout_data.decode())
                     exit_code = result.returncode
                     if(exit_code != 0):
-                        if(exit_code < 0):
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
-                        else:
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
@@ -86,10 +82,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     print(stdout_data.decode())
                     exit_code = result.returncode
                     if(exit_code != 0):
-                        if(exit_code < 0):
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
-                        else:
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
@@ -102,21 +95,15 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     print(stdout_data.decode())
                     exit_code = result.returncode
                     if(exit_code != 0):
-                        if(exit_code < 0):
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
-                        else:
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
             else:
                 print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout))
                 result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                 stdout_data, stderr_data = result.communicate()
                 print(stdout_data.decode())
                 exit_code = result.returncode
-                    if(exit_code != 0):
-                        if(exit_code < 0):
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
-                        else:
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+                if(exit_code != 0):
+                    log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
             print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
@@ -125,12 +112,9 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec        
         read_from_subprocess_and_write_to_log(process, logFile)
         _, stderr_data = process.communicate()
-        exit_code = result.returncode
+        exit_code = process.returncode
         if(exit_code != 0):
-            if(exit_code < 0):
-                log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
-            else:
-                log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n")
@@ -174,13 +158,10 @@ def run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, s
             output_str = output.decode('utf-8')
             logFile.write(output_str)
 
-        stdout_data, stderr_data = result.communicate()
-        exit_code = result.returncode
+        stdout_data, stderr_data = process.communicate()
+        exit_code = process.returncode
         if(exit_code != 0):
-            if(exit_code < 0):
-                log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
-            else:
-                log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HIP"))
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
 
 # Parse and validate command-line arguments for the RPP test suite
 def rpp_test_suite_parser_and_validator():
diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py
index 43cb2f6b3..f71f09476 100644
--- a/utilities/test_suite/HIP/runVoxelTests.py
+++ b/utilities/test_suite/HIP/runVoxelTests.py
@@ -39,6 +39,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 6
+errorLog = []
 
 def get_log_file_list(preserveOutput):
     return [
@@ -58,15 +59,18 @@ def func_group_finder(case_number):
 
 def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     print("\n./Tensor_voxel_hip " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
-    result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE) # nosec
+    result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
+    exit_code = result.returncode
+    if(exit_code != 0):
+        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code))
     print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
    with open(loggingFolder + "/Tensor_voxel_hip_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
         logFile.write("./Tensor_voxel_hip " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth) + "\n")
-        process = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # nosec
+        process = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec
         while True:
             output = process.stdout.readline()
             if not output and process.poll() is not None:
@@ -81,6 +85,11 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
                 logFile.write(cleanedOutput + '\n')
                 if "max,min,avg wall times" in output:
                     logFile.write("\n")
+        
+        stdout_data, stderr_data = process.communicate()
+        exit_code = process.returncode
+        if(exit_code != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code))
         print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
@@ -100,6 +109,10 @@ def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerP
                     break
                 print(output.strip())
                 logFile.write(output.decode('utf-8'))
+        stdout_data, stderr_data = process.communicate()
+        exit_code = process.returncode
+        if(exit_code != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code))
     print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize, profilingOption = 'NO'):
@@ -356,3 +369,9 @@ def rpp_test_suite_parser_and_validator():
 
     for logFile in logFileList:
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
+
+if errorLog:
+    print("\n---------------------------------- Error log - Tensor_voxel_hip ----------------------------------\n")
+    for error in errorLog:
+        print(error)
+    print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py
index e62a3c306..d00b9244a 100644
--- a/utilities/test_suite/HOST/runAudioTests.py
+++ b/utilities/test_suite/HOST/runAudioTests.py
@@ -24,7 +24,6 @@
 
 import os
 import sys
-import signal
 sys.dont_write_bytecode = True
 sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
 from common import *
@@ -50,13 +49,10 @@ def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath):
     print("\n./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
     stdout_data, stderr_data = result.communicate()
+    print(stdout_data.decode())
     exit_code = result.returncode
     if(exit_code != 0):
-        if(exit_code < 0):
-            log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST")
-        else:
-            log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, audioAugmentationMapAugmentationMap[int(case)][0], "_HOST")
-    print(stdout_data.decode())
+        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exit_code))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
@@ -67,10 +63,7 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba
         stdout_data, stderr_data = process.communicate()
         exit_code = process.returncode
         if(exit_code != 0):
-            if(exit_code < 0):
-                log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST")
-            else:
-                log_detected_audio_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, audioAugmentationMap[int(case)][0], "_HOST")
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exit_code))
         print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
@@ -226,7 +219,7 @@ def rpp_test_suite_parser_and_validator():
         print_performance_tests_summary(log_file, "", numRuns)
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n")
+    print("\n---------------------------------- Error log - Tensor_audio_host ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py
index ae16443e9..533200a81 100644
--- a/utilities/test_suite/HOST/runMiscTests.py
+++ b/utilities/test_suite/HOST/runMiscTests.py
@@ -28,7 +28,6 @@
 import datetime
 import shutil
 import sys
-import signal
 sys.dont_write_bytecode = True
 sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
 from common import *
@@ -55,10 +54,7 @@ def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFi
     print(stdout_data.decode())
     exit_code = result.returncode
     if(exit_code != 0):
-        if(exit_code < 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg))
-        else:
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, micsAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg))
+        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
@@ -69,10 +65,7 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to
         stdout_data, stderr_data = process.communicate()
         exit_code = process.returncode
         if(exit_code != 0):
-            if(exit_code < 0):
-                log_detected_errors("Returned non- exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg))
-            else:
-                log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg))
+            log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code))
 
 def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg = ""):
     if testType == 0:
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 2f5bad89c..4881f16f4 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -23,7 +23,6 @@
 """
 import os
 import sys
-import signal
 sys.dont_write_bytecode = True
 sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
 from common import *
@@ -73,10 +72,8 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     print(stdout_data.decode())
                     exit_code = result.returncode
                     if(exit_code != 0):
-                        if(exit_code < 0):
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
-                        else:
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code))
+
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
@@ -89,10 +86,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     print(stdout_data.decode())
                     exit_code = result.returncode
                     if(exit_code != 0):
-                        if(exit_code < 0):
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
-                        else:
-                            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code))
             else:
                 print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0")
                 result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
@@ -100,29 +94,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 print(stdout_data.decode())
                 exit_code = result.returncode
                 if(exit_code != 0):
-                    if(exit_code < 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
-                    else:
-                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
-
-                    # result = subprocess.check_output([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath])    # nosec
-                    # print("Result :",result.decode('utf-8'))
-                # except subprocess.CalledProcessError as e:
-                #     if(e.returncode < 0):
-                #         # print(f"Error: Command '{e.cmd}' returned non-zero exit status {e.returncode}.Signal : Process died with signal: {signal.Signals(-e.returncode).name} ({-e.returncode})")
-                #         log_detected_errors("Returned non-zero exit status : {e.returncode}.: "+ str({signal.Signals(-e.returncode).name})+str({-e.returncode}), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
-                #     else :
-                #         # print(f"Error: Command '{e.cmd}' returned non-zero exit status {e.returncode}.")
-
-                # except FileNotFoundError as e:
-                #     print(f"Error: {e}")
-                #     log_detected_errors(e, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
-
-                # stdout_data, stderr_data = result.communicate()
-                # print(stdout_data.decode())
-                # print("Error",stderr.decode())
-
-                # log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+                    log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code))
 
             print("------------------------------------------------------------------------------------------")
 
@@ -131,26 +103,18 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
         with open(loggingFolder + "/BatchPD_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
             process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
             read_from_subprocess_and_write_to_log(process, logFile)
-            log_detected_errors(stderr_data, errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
             _, stderr_data = process.communicate()
             exit_code = result.returncode
             if(exit_code != 0):
-                if(exit_code < 0):
-                    log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
-                else:
-                    log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
-
+                log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code))
     with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
         logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n")
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
         _, stderr_data = process.communicate()
-        exit_code = result.returncode
+        exit_code = process.returncode
         if(exit_code != 0):
-            if(exit_code < 0):
-                log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
-            else:
-                log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], bitDepth, get_image_layout_type(layout, outputFormatToggle, "HOST"))
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code))
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n")
diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py
index 3e7829021..e91c0c694 100644
--- a/utilities/test_suite/HOST/runVoxelTests.py
+++ b/utilities/test_suite/HOST/runVoxelTests.py
@@ -24,7 +24,6 @@
 
 import os
 import sys
-import signal
 sys.dont_write_bytecode = True
 sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
 from common import *
@@ -66,10 +65,8 @@ def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns,
     print(stdout_data.decode())
     exit_code = result.returncode
     if(exit_code != 0):
-        if(exit_code < 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST"))
-        else:
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST"))
+        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exit_code))
+    
     print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
@@ -94,10 +91,7 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
         stdout_data, stderr_data = process.communicate()
         exit_code = process.returncode
         if(exit_code != 0):
-            if(exit_code < 0):
-                log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " Signal : "+ str(signal.Signals(-exit_code).name) + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST"))
-            else:
-                log_detected_errors("Returned non-zero exit status : "+ str(exit_code)  + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], bitDepth, get_voxel_layout_type(layout, "HOST"))
+            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exit_code))
         print("\n------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
@@ -269,7 +263,7 @@ def rpp_test_suite_parser_and_validator():
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n")
+    print("\n---------------------------------- Error log - Tensor_voxel_host ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
index e322c59dd..c91084386 100644
--- a/utilities/test_suite/common.py
+++ b/utilities/test_suite/common.py
@@ -28,6 +28,7 @@
 import datetime
 import shutil
 import pandas as pd
+import signal
 
 try:
     from errno import FileExistsError
@@ -438,13 +439,23 @@ def get_voxel_layout_type(layout, backend):
     else:
        result += "_PLN1_toPLN1"
     return result
-# def log_detected_errors(errorData, errorLog, caseName, bitDepth, functionSpecificName):
-#     if errorData.decode():
-#         msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData.decode()
-#         errorLog.append(msg)
-def log_detected_errors(errorData, errorLog, caseName, bitDepth, functionSpecificName):
-    msg = caseName + bitDepthDict[bitDepth] + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData
+
+def get_bit_depth(bitDepth):
+    result = str(bitDepthDict[bitDepth])
+    return result
+
+def get_signal_name_from_return_code(return_code):
+    result = ""
+    if return_code < 0:
+        signal_num = -return_code
+        result = result + " Signal = "
+        for signame, signum in signal.__dict__.items():
+            if isinstance(signum, int) and signum == signal_num:
+                signal_name = signame
+                break
+        result = result + signal_name
+    return result
+
+def log_detected_errors(errorData, errorLog, caseName, functionBitDepth, functionSpecificName, functionSignalName):
+    msg = caseName + functionBitDepth + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + functionSignalName
     errorLog.append(msg)
-def log_detected_errors(errorData, errorLog, caseName, functionSpecificName):
-    msg = caseName  + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData
-    errorLog.append(msg)
\ No newline at end of file

From a2792df758d1297235944124ed0c414788d5bf91 Mon Sep 17 00:00:00 2001
From: dineshbabu-ravichandran <dineshbabu.ravichandran@multicorewareinc.com>
Date: Wed, 2 Oct 2024 22:45:32 -0700
Subject: [PATCH 04/17] Voxel test suite changes

---
 utilities/test_suite/HIP/runVoxelTests.py  | 2 +-
 utilities/test_suite/HOST/runVoxelTests.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py
index f71f09476..8db1cb73a 100644
--- a/utilities/test_suite/HIP/runVoxelTests.py
+++ b/utilities/test_suite/HIP/runVoxelTests.py
@@ -77,7 +77,7 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
                 break
             output = output.decode('utf-8')
             if output:
-                print(output, end='')
+                print(output)
                 logFile.write(output)
             if "Running" in output or "max,min,avg wall times" in output:
                 cleanedOutput = ''.join(char for char in output if 32 <= ord(char) <= 126)  # Remove control characters
diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py
index e91c0c694..1f02d2bc5 100644
--- a/utilities/test_suite/HOST/runVoxelTests.py
+++ b/utilities/test_suite/HOST/runVoxelTests.py
@@ -79,7 +79,7 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
                 break
             output = output.decode('utf-8')
             if output:
-                print(output, end='')
+                print(output)
                 logFile.write(output)
             if "Running" in output or "max,min,avg wall times" in output:
                 cleanedOutput = ''.join(char for char in output if 32 <= ord(char) <= 126)  # Remove control characters

From 36ac096010e342b8bf5e6a48d977f88b26aa7eac Mon Sep 17 00:00:00 2001
From: dineshbabu-ravichandran <dineshbabu.ravichandran@multicorewareinc.com>
Date: Thu, 3 Oct 2024 08:20:40 -0400
Subject: [PATCH 05/17] Changed variable name to camelCase

---
 utilities/test_suite/HIP/runAudioTests.py  | 26 +++++-----
 utilities/test_suite/HIP/runMiscTests.py   | 26 +++++-----
 utilities/test_suite/HIP/runTests.py       | 56 +++++++++++-----------
 utilities/test_suite/HIP/runVoxelTests.py  | 26 +++++-----
 utilities/test_suite/HOST/runAudioTests.py | 18 +++----
 utilities/test_suite/HOST/runMiscTests.py  | 18 +++----
 utilities/test_suite/HOST/runTests.py      | 46 +++++++++---------
 utilities/test_suite/HOST/runVoxelTests.py | 18 +++----
 utilities/test_suite/common.py             | 14 +++---
 9 files changed, 124 insertions(+), 124 deletions(-)

diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py
index 2f17a1a4a..044ad07b9 100644
--- a/utilities/test_suite/HIP/runAudioTests.py
+++ b/utilities/test_suite/HIP/runAudioTests.py
@@ -75,11 +75,11 @@ def generate_performance_reports(RESULTS_DIR):
 def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath):
     print("\n./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-    stdout_data, stderr_data = result.communicate()
-    print(stdout_data.decode())
-    exit_code = result.returncode
-    if(exit_code != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code))
+    stdoutData, stderrData = result.communicate()
+    print(stdoutData.decode())
+    exitCode = result.returncode
+    if(exitCode != 0):
+        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
@@ -87,10 +87,10 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba
         print("./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        stdout_data, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " "+ stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code))
+        stdoutData, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " "+ stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode))
         print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
@@ -107,10 +107,10 @@ def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns
             output_str = output.decode('utf-8')
             logFile.write(output_str)
         
-        stdout_data, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " "+ stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exit_code))
+        stdoutData, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " "+ stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode))
         print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath, profilingOption = "NO"):
diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py
index 3d397c8f7..a71b1ce50 100644
--- a/utilities/test_suite/HIP/runMiscTests.py
+++ b/utilities/test_suite/HIP/runMiscTests.py
@@ -77,11 +77,11 @@ def generate_performance_reports(RESULTS_DIR):
 def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     print("\n./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-    stdout_data, stderr_data = result.communicate()
-    print(stdout_data.decode())
-    exit_code = result.returncode
-    if(exit_code != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code))
+    stdoutData, stderrData = result.communicate()
+    print(stdoutData.decode())
+    exitCode = result.returncode
+    if(exitCode != 0):
+        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
@@ -89,10 +89,10 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to
         logFile.write("./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n")
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        stdout_data, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code))
+        stdoutData, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode))
 
 def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     if not os.path.exists(outFilePath + "/case_" + str(case)):
@@ -102,10 +102,10 @@ def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns
         logFile.write("\nrocprof --basenames on --timestamp on --stats -o " + outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv ./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n")
         process = subprocess.Popen(['rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv", "./Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        stdout_data, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code))
+        stdoutData, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode))
     print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg, profilingOption = 'NO'):
diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index 43bcc1f1e..965839a43 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -68,21 +68,21 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 for kernelSize in range(3, 10, 2):
                     print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize))
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    stdout_data, stderr_data = result.communicate()
-                    print(stdout_data.decode())
-                    exit_code = result.returncode
-                    if(exit_code != 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
+                    stdoutData, stderrData = result.communicate()
+                    print(stdoutData.decode())
+                    exitCode = result.returncode
+                    if(exitCode != 0):
+                        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
                     print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType))
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    stdout_data, stderr_data = result.communicate()
-                    print(stdout_data.decode())
-                    exit_code = result.returncode
-                    if(exit_code != 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
+                    stdoutData, stderrData = result.communicate()
+                    print(stdoutData.decode())
+                    exitCode = result.returncode
+                    if(exitCode != 0):
+                        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
@@ -91,19 +91,19 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 for interpolationType in range(interpolationRange):
                     print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType))
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    stdout_data, stderr_data = result.communicate()
-                    print(stdout_data.decode())
-                    exit_code = result.returncode
-                    if(exit_code != 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
+                    stdoutData, stderrData = result.communicate()
+                    print(stdoutData.decode())
+                    exitCode = result.returncode
+                    if(exitCode != 0):
+                        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
             else:
                 print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout))
                 result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                stdout_data, stderr_data = result.communicate()
-                print(stdout_data.decode())
-                exit_code = result.returncode
-                if(exit_code != 0):
-                    log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
+                stdoutData, stderrData = result.communicate()
+                print(stdoutData.decode())
+                exitCode = result.returncode
+                if(exitCode != 0):
+                    log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
             print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
@@ -111,10 +111,10 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
         print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam))
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec        
         read_from_subprocess_and_write_to_log(process, logFile)
-        _, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
+        _, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n")
@@ -158,10 +158,10 @@ def run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, s
             output_str = output.decode('utf-8')
             logFile.write(output_str)
 
-        stdout_data, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exit_code))
+        stdoutData, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
 
 # Parse and validate command-line arguments for the RPP test suite
 def rpp_test_suite_parser_and_validator():
diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py
index 8db1cb73a..4129cb7c9 100644
--- a/utilities/test_suite/HIP/runVoxelTests.py
+++ b/utilities/test_suite/HIP/runVoxelTests.py
@@ -60,11 +60,11 @@ def func_group_finder(case_number):
 def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     print("\n./Tensor_voxel_hip " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec
-    stdout_data, stderr_data = result.communicate()
-    print(stdout_data.decode())
-    exit_code = result.returncode
-    if(exit_code != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code))
+    stdoutData, stderrData = result.communicate()
+    print(stdoutData.decode())
+    exitCode = result.returncode
+    if(exitCode != 0):
+        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode))
     print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
@@ -86,10 +86,10 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
                 if "max,min,avg wall times" in output:
                     logFile.write("\n")
         
-        stdout_data, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code))
+        stdoutData, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode))
         print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
@@ -109,10 +109,10 @@ def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerP
                     break
                 print(output.strip())
                 logFile.write(output.decode('utf-8'))
-        stdout_data, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exit_code))
+        stdoutData, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode))
     print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize, profilingOption = 'NO'):
diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py
index d00b9244a..2d3f11686 100644
--- a/utilities/test_suite/HOST/runAudioTests.py
+++ b/utilities/test_suite/HOST/runAudioTests.py
@@ -48,11 +48,11 @@ def get_log_file_list():
 def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath):
     print("\n./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-    stdout_data, stderr_data = result.communicate()
-    print(stdout_data.decode())
-    exit_code = result.returncode
-    if(exit_code != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exit_code))
+    stdoutData, stderrData = result.communicate()
+    print(stdoutData.decode())
+    exitCode = result.returncode
+    if(exitCode != 0):
+        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exitCode))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
@@ -60,10 +60,10 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba
         logFile.write("./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize) + "\n")
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        stdout_data, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exit_code))
+        stdoutData, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exitCode))
         print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py
index 533200a81..732d5644d 100644
--- a/utilities/test_suite/HOST/runMiscTests.py
+++ b/utilities/test_suite/HOST/runMiscTests.py
@@ -50,11 +50,11 @@ def get_log_file_list():
 def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     print("\n./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-    stdout_data, stderr_data = result.communicate()
-    print(stdout_data.decode())
-    exit_code = result.returncode
-    if(exit_code != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code))
+    stdoutData, stderrData = result.communicate()
+    print(stdoutData.decode())
+    exitCode = result.returncode
+    if(exitCode != 0):
+        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
@@ -62,10 +62,10 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to
         logFile.write("./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n")
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        stdout_data, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non- exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exit_code))
+        stdoutData, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode))
 
 def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg = ""):
     if testType == 0:
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 4881f16f4..7c98009ad 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -68,11 +68,11 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 for noiseType in range(3):
                     print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0")
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    stdout_data, stderr_data = result.communicate()
-                    print(stdout_data.decode())
-                    exit_code = result.returncode
-                    if(exit_code != 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code))
+                    stdoutData, stderrData = result.communicate()
+                    print(stdoutData.decode())
+                    exitCode = result.returncode
+                    if(exitCode != 0):
+                        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode))
 
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
@@ -82,19 +82,19 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 for interpolationType in range(interpolationRange):
                     print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0")
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    stdout_data, stderr_data = result.communicate()
-                    print(stdout_data.decode())
-                    exit_code = result.returncode
-                    if(exit_code != 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code))
+                    stdoutData, stderrData = result.communicate()
+                    print(stdoutData.decode())
+                    exitCode = result.returncode
+                    if(exitCode != 0):
+                        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode))
             else:
                 print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0")
                 result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                stdout_data, stderr_data = result.communicate()
-                print(stdout_data.decode())
-                exit_code = result.returncode
-                if(exit_code != 0):
-                    log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code))
+                stdoutData, stderrData = result.communicate()
+                print(stdoutData.decode())
+                exitCode = result.returncode
+                if(exitCode != 0):
+                    log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode))
 
             print("------------------------------------------------------------------------------------------")
 
@@ -103,18 +103,18 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
         with open(loggingFolder + "/BatchPD_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
             process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
             read_from_subprocess_and_write_to_log(process, logFile)
-            _, stderr_data = process.communicate()
-            exit_code = result.returncode
-            if(exit_code != 0):
-                log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code))
+            _, stderrData = process.communicate()
+            exitCode = process.returncode
+            if(exitCode != 0):
+                log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode))
     with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
         logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n")
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        _, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exit_code))
+        _, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode))
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n")
diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py
index 1f02d2bc5..cf0d7db46 100644
--- a/utilities/test_suite/HOST/runVoxelTests.py
+++ b/utilities/test_suite/HOST/runVoxelTests.py
@@ -61,11 +61,11 @@ def func_group_finder(case_number):
 def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     print("\n./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec
-    stdout_data, stderr_data = result.communicate()
-    print(stdout_data.decode())
-    exit_code = result.returncode
-    if(exit_code != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exit_code))
+    stdoutData, stderrData = result.communicate()
+    print(stdoutData.decode())
+    exitCode = result.returncode
+    if(exitCode != 0):
+        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exitCode))
     
     print("\n------------------------------------------------------------------------------------------")
 
@@ -88,10 +88,10 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
                 if "max,min,avg wall times" in output:
                     logFile.write("\n")
 
-        stdout_data, stderr_data = process.communicate()
-        exit_code = process.returncode
-        if(exit_code != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exit_code) + " " + stderr_data.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exit_code))
+        stdoutData, stderrData = process.communicate()
+        exitCode = process.returncode
+        if(exitCode != 0):
+            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exitCode))
         print("\n------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
index c91084386..77b9e560d 100644
--- a/utilities/test_suite/common.py
+++ b/utilities/test_suite/common.py
@@ -427,7 +427,7 @@ def get_misc_func_name(testCase, nDim, additionalArg):
     if (axisMaskCase):
         result = result + "_" + str(nDim) + "d" + "_axisMask" + str(axisMask)
     if (permOrderCase):
-        result =result + "_" + str(nDim) + "d" + "_permOrder" + str(permOrder)
+        result = result + "_" + str(nDim) + "d" + "_permOrder" + str(permOrder)
     return result
 
 def get_voxel_layout_type(layout, backend):
@@ -444,16 +444,16 @@ def get_bit_depth(bitDepth):
     result = str(bitDepthDict[bitDepth])
     return result
 
-def get_signal_name_from_return_code(return_code):
+def get_signal_name_from_return_code(returnCode):
     result = ""
-    if return_code < 0:
-        signal_num = -return_code
+    if returnCode < 0:
+        signalNum = -returnCode
         result = result + " Signal = "
         for signame, signum in signal.__dict__.items():
-            if isinstance(signum, int) and signum == signal_num:
-                signal_name = signame
+            if isinstance(signum, int) and signum == signalNum:
+                signalName = signame
                 break
-        result = result + signal_name
+        result = result + signalName
     return result
 
 def log_detected_errors(errorData, errorLog, caseName, functionBitDepth, functionSpecificName, functionSignalName):

From 464660e7a1ce18338b77e5a68599680be8b5a898 Mon Sep 17 00:00:00 2001
From: dineshbabu-ravichandran <dineshbabu.ravichandran@multicorewareinc.com>
Date: Fri, 4 Oct 2024 07:10:56 +0000
Subject: [PATCH 06/17] Changes in srcpath

---
 utilities/test_suite/HIP/runTests.py  | 15 ++++++++++++---
 utilities/test_suite/HOST/runTests.py | 10 ++++++++--
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index 965839a43..4e0323595 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -311,9 +311,12 @@ def rpp_test_suite_parser_and_validator():
         if case == "82" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1):
             srcPath1 = ricapInFilePath
             srcPath2 = ricapInFilePath
-        if case == "26" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1):
+        elif case == "26" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1):
             srcPath1 = lensCorrectionInFilePath
             srcPath2 = lensCorrectionInFilePath
+        else:
+            srcPath1 = inFilePath1
+            srcPath2 = inFilePath2
         # if QA mode is enabled overwrite the input folders with the folders used for generating golden outputs
         if qaMode == 1 and (case != "82" and case != "26"):
             srcPath1 = inFilePath1
@@ -341,9 +344,12 @@ def rpp_test_suite_parser_and_validator():
             if case == "82" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
                 srcPath1 = ricapInFilePath
                 srcPath2 = ricapInFilePath
-            if case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
+            elif case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
                 srcPath1 = lensCorrectionInFilePath
                 srcPath2 = lensCorrectionInFilePath
+            else:
+                srcPath1 = inFilePath1
+                srcPath2 = inFilePath2
             for layout in range(3):
                 dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder)
 
@@ -361,9 +367,12 @@ def rpp_test_suite_parser_and_validator():
             if case == "82" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
                 srcPath1 = ricapInFilePath
                 srcPath2 = ricapInFilePath
-            if case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
+            elif case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
                 srcPath1 = lensCorrectionInFilePath
                 srcPath2 = lensCorrectionInFilePath
+            else:
+                srcPath1 = inFilePath1
+                srcPath2 = inFilePath2
             for layout in range(3):
                 dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder)
 
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 7c98009ad..98ae5170e 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -283,9 +283,12 @@ def rpp_test_suite_parser_and_validator():
         if case == "82" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1):
             srcPath1 = ricapInFilePath
             srcPath2 = ricapInFilePath
-        if case == "26" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1):
+        elif case == "26" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1):
             srcPath1 = lensCorrectionInFilePath
             srcPath2 = lensCorrectionInFilePath
+        else :
+            srcPath1 = inFilePath1
+            srcPath2 = inFilePath2
         # if QA mode is enabled overwrite the input folders with the folders used for generating golden outputs
         if qaMode == 1 and (case != "82" and case != "26"):
             srcPath1 = inFilePath1
@@ -316,9 +319,12 @@ def rpp_test_suite_parser_and_validator():
         if case == "82" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
             srcPath1 = ricapInFilePath
             srcPath2 = ricapInFilePath
-        if case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
+        elif case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
             srcPath1 = lensCorrectionInFilePath
             srcPath2 = lensCorrectionInFilePath
+        else :
+            srcPath1 = inFilePath1
+            srcPath2 = inFilePath2
         for layout in range(3):
             dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder)
             run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)

From 82c20796047fddaf1967493c863dfddb38bc0ae5 Mon Sep 17 00:00:00 2001
From: dineshbabu-ravichandran <dineshbabu.ravichandran@multicorewareinc.com>
Date: Fri, 4 Oct 2024 09:06:37 +0000
Subject: [PATCH 07/17] Changes in Voxel test suite for nonQA case

---
 utilities/test_suite/HIP/Tensor_voxel_hip.cpp   | 4 +++-
 utilities/test_suite/HOST/Tensor_voxel_host.cpp | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
index 16107c608..2db6b169c 100644
--- a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
@@ -50,6 +50,8 @@ int main(int argc, char * argv[])
     inputBitDepth = atoi(argv[10]);
     string scriptPath = argv[11];
 
+    bool nonQACase = (testCase == 6);
+
     if ((layoutType < 0) || (layoutType > 2))
     {
         fprintf(stdout, "\nUsage: %s <header file> <data file> <layoutType = 0 - PKD3/ 1 - PLN3/ 2 - PLN1>\n", argv[0]);
@@ -465,7 +467,7 @@ int main(int argc, char * argv[])
             /*Compare the output of the function with golden outputs only if
             1.QA Flag is set
             2.input bit depth 2 (F32)*/
-            if(qaFlag && inputBitDepth == 2)
+            if(qaFlag && inputBitDepth == 2  && !(nonQACase))
                 compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath);
             else
             {
diff --git a/utilities/test_suite/HOST/Tensor_voxel_host.cpp b/utilities/test_suite/HOST/Tensor_voxel_host.cpp
index 39f3979a9..5f50bd53a 100644
--- a/utilities/test_suite/HOST/Tensor_voxel_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_voxel_host.cpp
@@ -50,6 +50,8 @@ int main(int argc, char * argv[])
     inputBitDepth = atoi(argv[10]);
     string scriptPath = argv[11];
 
+    bool nonQACase = (testCase == 6);
+
     if ((layoutType < 0) || (layoutType > 2))
     {
         fprintf(stdout, "\nUsage: %s <header file> <data file> <layoutType = 0 - PKD3/ 1 - PLN3/ 2 - PLN1>\n", argv[0]);
@@ -446,7 +448,7 @@ int main(int argc, char * argv[])
             /*Compare the output of the function with golden outputs only if
             1.QA Flag is set
             2.input bit depth 2 (F32)*/
-            if(qaFlag && inputBitDepth == 2)
+            if(qaFlag && inputBitDepth == 2  && !(nonQACase))
                 compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath);
             else
             {

From 3e30b4d45252c8d783051bff70e1ef918e98b9b7 Mon Sep 17 00:00:00 2001
From: dineshbabu-ravichandran <dineshbabu.ravichandran@multicoreinc.com>
Date: Wed, 20 Nov 2024 13:18:15 +0000
Subject: [PATCH 08/17] Consolidated the repeated code and move it as common
 code

---
 utilities/test_suite/HIP/runAudioTests.py  | 16 ++--------
 utilities/test_suite/HIP/runMiscTests.py   | 16 ++--------
 utilities/test_suite/HIP/runTests.py       | 34 ++++------------------
 utilities/test_suite/HIP/runVoxelTests.py  | 19 ++++--------
 utilities/test_suite/HOST/runAudioTests.py | 11 ++-----
 utilities/test_suite/HOST/runMiscTests.py  | 13 ++-------
 utilities/test_suite/HOST/runTests.py      | 33 +++++----------------
 utilities/test_suite/HOST/runVoxelTests.py | 16 ++++------
 utilities/test_suite/common.py             | 15 ++++++----
 9 files changed, 44 insertions(+), 129 deletions(-)

diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py
index 2370a529f..e5792c6fe 100644
--- a/utilities/test_suite/HIP/runAudioTests.py
+++ b/utilities/test_suite/HIP/runAudioTests.py
@@ -75,11 +75,7 @@ def generate_performance_reports(RESULTS_DIR):
 def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath):
     print("\n./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-    stdoutData, stderrData = result.communicate()
-    print(stdoutData.decode())
-    exitCode = result.returncode
-    if(exitCode != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode))
+    log_detected(result, errorLog, audioAugmentationMap[int(case)][0], get_bit_depth(int(2)), "HIP")
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
@@ -87,10 +83,7 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba
         print("./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        stdoutData, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " "+ stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode))
+        log_detected(process, errorLog, audioAugmentationMap[int(case)][0], get_bit_depth(int(2)), "HIP")
         print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
@@ -107,10 +100,7 @@ def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns
             output_str = output.decode('utf-8')
             logFile.write(output_str)
         
-        stdoutData, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " "+ stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HIP", get_signal_name_from_return_code(exitCode))
+        log_detected(process, errorLog, audioAugmentationMap[int(case)][0], get_bit_depth(int(2)), "HIP")
         print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath, profilingOption = "NO"):
diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py
index efcd33ac4..d31745bae 100644
--- a/utilities/test_suite/HIP/runMiscTests.py
+++ b/utilities/test_suite/HIP/runMiscTests.py
@@ -77,11 +77,7 @@ def generate_performance_reports(RESULTS_DIR):
 def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     print("\n./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-    stdoutData, stderrData = result.communicate()
-    print(stdoutData.decode())
-    exitCode = result.returncode
-    if(exitCode != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode))
+    log_detected(result, errorLog, miscAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_misc_func_name(int(case), numDims, additionalArg))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
@@ -89,10 +85,7 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to
         logFile.write("./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n")
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        stdoutData, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode))
+        log_detected(process, errorLog, miscAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_misc_func_name(int(case), numDims, additionalArg))
 
 def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     if not os.path.exists(outFilePath + "/case_" + str(case)):
@@ -102,10 +95,7 @@ def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns
         logFile.write("\nrocprof --basenames on --timestamp on --stats -o " + outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv ./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n")
         process = subprocess.Popen(['rocprof', '--basenames', 'on', '--timestamp', 'on', '--stats', '-o', outFilePath + "/case_" + str(case) + "/output_case" + str(case) + ".csv", "./Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        stdoutData, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode))
+        log_detected(process, errorLog, miscAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_misc_func_name(int(case), numDims, additionalArg))
     print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg, profilingOption = 'NO'):
diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index 386df7529..a7d5b7c41 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -68,21 +68,13 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 for kernelSize in range(3, 10, 2):
                     print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize))
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    stdoutData, stderrData = result.communicate()
-                    print(stdoutData.decode())
-                    exitCode = result.returncode
-                    if(exitCode != 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
+                    log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
                     print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType))
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    stdoutData, stderrData = result.communicate()
-                    print(stdoutData.decode())
-                    exitCode = result.returncode
-                    if(exitCode != 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
+                    log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
@@ -91,19 +83,11 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 for interpolationType in range(interpolationRange):
                     print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType))
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    stdoutData, stderrData = result.communicate()
-                    print(stdoutData.decode())
-                    exitCode = result.returncode
-                    if(exitCode != 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
+                    log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
             else:
                 print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout))
                 result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                stdoutData, stderrData = result.communicate()
-                print(stdoutData.decode())
-                exitCode = result.returncode
-                if(exitCode != 0):
-                    log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
+                log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
             print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
@@ -111,10 +95,7 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
         print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam))
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec        
         read_from_subprocess_and_write_to_log(process, logFile)
-        _, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
+        log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n")
@@ -158,10 +139,7 @@ def run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, s
             output_str = output.decode('utf-8')
             logFile.write(output_str)
 
-        stdoutData, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"), get_signal_name_from_return_code(exitCode))
+        log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
 
 # Parse and validate command-line arguments for the RPP test suite
 def rpp_test_suite_parser_and_validator():
diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py
index fd5c232b0..e0956e454 100644
--- a/utilities/test_suite/HIP/runVoxelTests.py
+++ b/utilities/test_suite/HIP/runVoxelTests.py
@@ -60,11 +60,7 @@ def func_group_finder(case_number):
 def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     print("\n./Tensor_voxel_hip " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec
-    stdoutData, stderrData = result.communicate()
-    print(stdoutData.decode())
-    exitCode = result.returncode
-    if(exitCode != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode))
+    log_detected(result, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"))    
     print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
@@ -78,18 +74,16 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
             output = output.decode('utf-8')
             if output:
                 print(output)
-                logFile.write(output)
             if "Running" in output or "max,min,avg wall times" in output:
                 cleanedOutput = ''.join(char for char in output if 32 <= ord(char) <= 126)  # Remove control characters
                 cleanedOutput = cleanedOutput.strip()  # Remove leading/trailing whitespace
                 logFile.write(cleanedOutput + '\n')
                 if "max,min,avg wall times" in output:
                     logFile.write("\n")
+            else:
+                logFile.write(output)
         
-        stdoutData, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode))
+        log_detected(process, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"))    
         print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
@@ -109,10 +103,7 @@ def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerP
                     break
                 print(output.strip())
                 logFile.write(output.decode('utf-8'))
-        stdoutData, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"), get_signal_name_from_return_code(exitCode))
+        log_detected(process, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HIP"))    
     print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize, profilingOption = 'NO'):
diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py
index b6e13abb1..441615a15 100644
--- a/utilities/test_suite/HOST/runAudioTests.py
+++ b/utilities/test_suite/HOST/runAudioTests.py
@@ -48,11 +48,7 @@ def get_log_file_list():
 def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath):
     print("\n./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-    stdoutData, stderrData = result.communicate()
-    print(stdoutData.decode())
-    exitCode = result.returncode
-    if(exitCode != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exitCode))
+    log_detected(result, errorLog, audioAugmentationMap[int(case)][0], get_bit_depth(int(2)), "HOST")
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
@@ -60,10 +56,7 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba
         logFile.write("./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize) + "\n")
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        stdoutData, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", audioAugmentationMap[int(case)][0], "_HOST", get_signal_name_from_return_code(exitCode))
+        log_detected(process, errorLog, audioAugmentationMap[int(case)][0], get_bit_depth(int(2)), "HOST")
         print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py
index d1d4489f2..e864f9912 100644
--- a/utilities/test_suite/HOST/runMiscTests.py
+++ b/utilities/test_suite/HOST/runMiscTests.py
@@ -50,11 +50,7 @@ def get_log_file_list():
 def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
     print("\n./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-    stdoutData, stderrData = result.communicate()
-    print(stdoutData.decode())
-    exitCode = result.returncode
-    if(exitCode != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, miscAugmentationMap[int(case)][0], "", get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode))
+    log_detected(result, errorLog, miscAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_misc_func_name(int(case), numDims, additionalArg))
     print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
@@ -62,11 +58,8 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to
         logFile.write("./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg) + "\n")
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        stdoutData, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non- exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, "", miscAugmentationMap[int(case)][0], get_misc_func_name(int(case), numDims, additionalArg), get_signal_name_from_return_code(exitCode))
-
+        log_detected(process, errorLog, miscAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_misc_func_name(int(case), numDims, additionalArg))
+ 
 def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg = ""):
     if testType == 0:
         run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg)
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 5a848fee5..853061ccd 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -67,18 +67,13 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 for kernelSize in range(3, 10, 2):
                     print(f"./Tensor_host {srcPath1} {srcPath2} {dstPathTemp} {bitDepth} {outputFormatToggle} {case} {kernelSize} 0 ")
                     result = subprocess.run([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    print(result.stdout.decode())
+                    log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
                     print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0")
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    stdoutData, stderrData = result.communicate()
-                    print(stdoutData.decode())
-                    exitCode = result.returncode
-                    if(exitCode != 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode))
-
+                    log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
@@ -87,19 +82,11 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 for interpolationType in range(interpolationRange):
                     print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0")
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                    stdoutData, stderrData = result.communicate()
-                    print(stdoutData.decode())
-                    exitCode = result.returncode
-                    if(exitCode != 0):
-                        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode))
+                    log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
             else:
                 print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0")
                 result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
-                stdoutData, stderrData = result.communicate()
-                print(stdoutData.decode())
-                exitCode = result.returncode
-                if(exitCode != 0):
-                    log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode))
+                log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
 
             print("------------------------------------------------------------------------------------------")
 
@@ -108,19 +95,13 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
         with open(loggingFolder + "/BatchPD_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
             process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
             read_from_subprocess_and_write_to_log(process, logFile)
-            _, stderrData = process.communicate()
-            exitCode = process.returncode
-            if(exitCode != 0):
-                log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode))
+            log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
     with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
         logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n")
         process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
-        _, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"), get_signal_name_from_return_code(exitCode))
-
+        log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
+        
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n")
     bitDepths = range(7)
diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py
index cbdf7d265..07de3f699 100644
--- a/utilities/test_suite/HOST/runVoxelTests.py
+++ b/utilities/test_suite/HOST/runVoxelTests.py
@@ -61,12 +61,7 @@ def func_group_finder(case_number):
 def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     print("\n./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec
-    stdoutData, stderrData = result.communicate()
-    print(stdoutData.decode())
-    exitCode = result.returncode
-    if(exitCode != 0):
-        log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exitCode))
-    
+    log_detected(result, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_voxel_layout_type(layout, "HOST"))    
     print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
@@ -80,18 +75,17 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
             output = output.decode('utf-8')
             if output:
                 print(output)
-                logFile.write(output)
             if "Running" in output or "max,min,avg wall times" in output:
                 cleanedOutput = ''.join(char for char in output if 32 <= ord(char) <= 126)  # Remove control characters
                 cleanedOutput = cleanedOutput.strip()  # Remove leading/trailing whitespace
                 logFile.write(cleanedOutput + '\n')
                 if "max,min,avg wall times" in output:
                     logFile.write("\n")
+            else:
+                logFile.write(output)
+                
 
-        stdoutData, stderrData = process.communicate()
-        exitCode = process.returncode
-        if(exitCode != 0):
-            log_detected_errors("Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode(), errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"), get_signal_name_from_return_code(exitCode))
+        log_detected(process, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_voxel_layout_type(layout, "HOST"))    
         print("\n------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
index 227ce220c..2ee05f51a 100644
--- a/utilities/test_suite/common.py
+++ b/utilities/test_suite/common.py
@@ -425,9 +425,9 @@ def get_misc_func_name(testCase, nDim, additionalArg):
     permOrder = additionalParam
     result = ""
     if (axisMaskCase):
-        result = result + "_" + str(nDim) + "d" + "_axisMask" + str(axisMask)
+        result = result + str(nDim) + "d" + "_axisMask" + str(axisMask)
     if (permOrderCase):
-        result = result + "_" + str(nDim) + "d" + "_permOrder" + str(permOrder)
+        result = result + str(nDim) + "d" + "_permOrder" + str(permOrder)
     return result
 
 def get_voxel_layout_type(layout, backend):
@@ -456,6 +456,11 @@ def get_signal_name_from_return_code(returnCode):
         result = result + signalName
     return result
 
-def log_detected_errors(errorData, errorLog, caseName, functionBitDepth, functionSpecificName, functionSignalName):
-    msg = caseName + functionBitDepth + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + functionSignalName
-    errorLog.append(msg)
+def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificName):
+    stdoutData, stderrData = result.communicate()
+    print(stdoutData.decode())
+    exitCode = result.returncode
+    if(exitCode != 0):
+        errorData = "Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode();
+        msg = caseName + functionBitDepth + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + get_signal_name_from_return_code(exitCode)
+        errorLog.append(msg)

From 8da86954cb58937e91b3b8ee831e7d6da6e0136a Mon Sep 17 00:00:00 2001
From: dineshbabu-ravichandran <dineshbabu.ravichandran@multicoreinc.com>
Date: Wed, 20 Nov 2024 13:26:27 +0000
Subject: [PATCH 09/17] Minor changes based on review commands

---
 utilities/test_suite/HOST/runTests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 853061ccd..c455e7941 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -275,7 +275,7 @@ def rpp_test_suite_parser_and_validator():
         elif case == "26" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1):
             srcPath1 = lensCorrectionInFilePath
             srcPath2 = lensCorrectionInFilePath
-        else :
+        else:
             srcPath1 = inFilePath1
             srcPath2 = inFilePath2
         # if QA mode is enabled overwrite the input folders with the folders used for generating golden outputs
@@ -311,7 +311,7 @@ def rpp_test_suite_parser_and_validator():
         elif case == "26" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
             srcPath1 = lensCorrectionInFilePath
             srcPath2 = lensCorrectionInFilePath
-        else :
+        else:
             srcPath1 = inFilePath1
             srcPath2 = inFilePath2
         for layout in range(3):

From d49dcc8cdbe7778ea47aa154062db2c0952a0033 Mon Sep 17 00:00:00 2001
From: dineshbabu-ravichandran <dineshbabu.ravichandran@multicoreinc.com>
Date: Thu, 21 Nov 2024 06:12:31 +0000
Subject: [PATCH 10/17] Modification for bitDepth in voxel host

---
 utilities/test_suite/HOST/runVoxelTests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py
index 07de3f699..87625bfe6 100644
--- a/utilities/test_suite/HOST/runVoxelTests.py
+++ b/utilities/test_suite/HOST/runVoxelTests.py
@@ -61,7 +61,7 @@ def func_group_finder(case_number):
 def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     print("\n./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # nosec
-    log_detected(result, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_voxel_layout_type(layout, "HOST"))    
+    log_detected(result, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"))    
     print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
@@ -85,7 +85,7 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
                 logFile.write(output)
                 
 
-        log_detected(process, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(2)), get_voxel_layout_type(layout, "HOST"))    
+        log_detected(process, errorLog, voxelAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_voxel_layout_type(layout, "HOST"))    
         print("\n------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):

From bcc193a8378b711ad4ef81e26e445b3c9f81d75e Mon Sep 17 00:00:00 2001
From: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Date: Thu, 19 Dec 2024 09:52:46 +0000
Subject: [PATCH 11/17] Merge with develop branch

---
 .jenkins/common.groovy                        |    8 +-
 CHANGELOG.md                                  |    1 +
 CMakeLists.txt                                |   26 +-
 cmake/FindStdFilesystem.cmake                 |   58 +
 cmake/FindTurboJpeg.cmake                     |   61 +-
 ...mentations_warp_perspective_img150x150.png |  Bin 0 -> 7532 bytes
 docs/sphinx/requirements.in                   |    2 +-
 docs/sphinx/requirements.txt                  |    2 +-
 include/rppdefs.h                             |    8 +
 include/rppt_tensor_geometric_augmentations.h |   44 +
 src/include/cpu/rpp_cpu_common.hpp            |   19 +-
 src/include/cpu/rpp_cpu_simd.hpp              |   57 +-
 .../host_tensor_geometric_augmentations.hpp   |    1 +
 src/modules/cpu/kernel/warp_perspective.hpp   | 2196 +++++++++++++++++
 .../hip_tensor_geometric_augmentations.hpp    |    1 +
 src/modules/hip/kernel/warp_perspective.hpp   |  461 ++++
 .../rppt_tensor_geometric_augmentations.cpp   |  189 ++
 utilities/examples/brightness/CMakeLists.txt  |    4 +-
 .../HIP_NEW/CMakeLists.txt                    |    4 +-
 .../rpp-unittests/HIP_NEW/CMakeLists.txt      |    4 +-
 utilities/test_suite/CMakeLists.txt           |   40 +-
 utilities/test_suite/HIP/CMakeLists.txt       |   23 +-
 utilities/test_suite/HIP/Tensor_hip.cpp       |   43 +-
 utilities/test_suite/HIP/runTests.py          |   12 +-
 utilities/test_suite/HOST/CMakeLists.txt      |   19 +-
 utilities/test_suite/HOST/Tensor_host.cpp     |   40 +-
 utilities/test_suite/HOST/runTests.py         |   10 +-
 utilities/test_suite/common.py                |    3 +-
 utilities/test_suite/rpp_test_suite_image.h   |    1 +
 29 files changed, 3238 insertions(+), 99 deletions(-)
 create mode 100644 cmake/FindStdFilesystem.cmake
 create mode 100644 docs/data/doxygenOutputs/geometric_augmentations_warp_perspective_img150x150.png
 create mode 100644 src/modules/cpu/kernel/warp_perspective.hpp
 create mode 100644 src/modules/hip/kernel/warp_perspective.hpp

diff --git a/.jenkins/common.groovy b/.jenkins/common.groovy
index 7132af9c2..67b572340 100644
--- a/.jenkins/common.groovy
+++ b/.jenkins/common.groovy
@@ -54,8 +54,10 @@ def runTestCommand (platform, project) {
 
     def command = """#!/usr/bin/env bash
                 set -x
-                cd ${project.paths.project_build_prefix}/build/release
-                make test ARGS="-VV"
+                cd ${project.paths.project_build_prefix}/build
+                mkdir -p test && cd test
+                cmake /opt/rocm/share/rpp/test
+                ctest -VV
                 """
 
     platform.runCommand(this, command)
@@ -116,8 +118,6 @@ def runPackageCommand(platform, project) {
                 mv rpp-test*.${packageType} package/${osType}-rpp-test.${packageType}
                 mv rpp-dev*.${packageType} package/${osType}-rpp-dev.${packageType}
                 mv ${packageRunTime}.${packageType} package/${osType}-rpp.${packageType}
-                mv Testing/Temporary/LastTest.log ${osType}-LastTest.log
-                mv Testing/Temporary/LastTestsFailed.log ${osType}-LastTestsFailed.log
                 ${packageDetail} package/${osType}-rpp-test.${packageType}
                 ${packageDetail} package/${osType}-rpp-dev.${packageType}
                 ${packageDetail} package/${osType}-rpp.${packageType}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bd618dad0..7cdc6657f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ Full documentation for RPP is available at [https://rocm.docs.amd.com/projects/r
 
 * RPP Tensor Gaussian Filter support on HOST
 * RPP Fog augmentation on HOST and HIP
+* RPP Warp Perspective on HOST and HIP
 
 ## (Unreleased) RPP 1.9.4
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b6f91325d..4743524e2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,7 +68,6 @@ endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
 set(DEFAULT_BUILD_TYPE "Release")
 ### RPP_AUDIO_SUPPORT - default = ON, NOTE: support currently only on Ubuntu - user to set to OFF otherwise
 option(RPP_AUDIO_SUPPORT      "Build RPP with Audio Support"         ON)
-option(BUILD_WITH_AMD_ADVANCE "Build RPP for advanced AMD GPU Architecture"    OFF)
 
 # Set message options
 if(NOT WIN32)
@@ -154,7 +153,6 @@ message("-- ${Cyan}RPP Developer Options${ColourReset}")
 message("-- ${Cyan}     -D BACKEND=${BACKEND} [Select RPP Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}")
 message("-- ${Cyan}     -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} [Select RPP build type [options:Debug/Release](default:Release)]${ColourReset}")
 message("-- ${Cyan}     -D RPP_AUDIO_SUPPORT=${RPP_AUDIO_SUPPORT} [Select RPP audio support [options:ON/OFF](default:ON)]${ColourReset}")
-message("-- ${Cyan}     -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [Turn ON/OFF Build for AMD advanced GPUs(default:OFF)]${ColourReset}")
 
 # OpenMP
 find_package(OpenMP REQUIRED)
@@ -227,10 +225,7 @@ if("${BACKEND}" STREQUAL "HIP")
     list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip)
 
     # Set supported GPU Targets
-    set(DEFAULT_GPU_TARGETS "gfx908;gfx90a;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102")
-    if (BUILD_WITH_AMD_ADVANCE)
-        set(DEFAULT_GPU_TARGETS ${DEFAULT_GPU_TARGETS} "gfx1200;gfx1201")
-    endif()
+    set(DEFAULT_GPU_TARGETS "gfx908;gfx90a;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201")
 
     # Set AMD GPU_TARGETS
     if((AMDGPU_TARGETS OR DEFINED ENV{AMDGPU_TARGETS}) AND (NOT GPU_TARGETS))
@@ -425,8 +420,19 @@ endif()
 # Set the dependent packages
 set(RPP_DEBIAN_PACKAGE_LIST  "rocm-hip-runtime")
 set(RPP_RPM_PACKAGE_LIST     "rocm-hip-runtime")
-set(RPP_DEBIAN_DEV_PACKAGE_LIST  "rocm-hip-runtime-dev, half")
-set(RPP_RPM_DEV_PACKAGE_LIST     "rocm-hip-runtime-devel, half")
+set(RPP_DEBIAN_DEV_PACKAGE_LIST  "rocm-hip-runtime-dev, half, libomp-dev")
+set(RPP_RPM_DEV_PACKAGE_LIST     "rocm-hip-runtime-devel, half, libomp-devel")
+set(RPP_DEBIAN_TEST_PACKAGE_LIST  "python3-dev, python3-pip, libopencv-dev")
+set(RPP_RPM_TEST_PACKAGE_LIST     "python3-devel, python3-pip") # TBD: OpenCV Package missing on RPM
+
+# Add OS specific dependencies
+if(EXISTS "/etc/os-release")
+    file(READ "/etc/os-release" OS_RELEASE)
+    string(REGEX MATCH "22.04" UBUNTU_22_FOUND ${OS_RELEASE})
+    if(UBUNTU_22_FOUND)
+        set(RPP_DEBIAN_DEV_PACKAGE_LIST "${RPP_DEBIAN_DEV_PACKAGE_LIST}, libstdc++-12-dev")
+    endif()
+endif()
 
 # package release
 set(CPACK_DEBIAN_FILE_NAME       "DEB-DEFAULT")
@@ -461,7 +467,7 @@ set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "rocm-core, ${RPP_DEBIAN_PACKAGE_LIST}"
 set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${PROJECT_NAME}-dev")
 set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "rocm-core, ${PROJECT_NAME}, ${RPP_DEBIAN_DEV_PACKAGE_LIST}")
 set(CPACK_DEBIAN_TEST_PACKAGE_NAME "${PROJECT_NAME}-test")
-set(CPACK_DEBIAN_TEST_PACKAGE_DEPENDS "rocm-core, ${PROJECT_NAME}-dev, clang")
+set(CPACK_DEBIAN_TEST_PACKAGE_DEPENDS "rocm-core, ${PROJECT_NAME}-dev, ${RPP_DEBIAN_TEST_PACKAGE_LIST}")
 set(CPACK_DEBIAN_PACKAGE_LICENSE "MIT" )
 # Debian package specific variable for ASAN
 set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
@@ -473,7 +479,7 @@ set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core, ${RPP_RPM_PACKAGE_LIST}")
 set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel")
 set(CPACK_RPM_DEV_PACKAGE_REQUIRES "rocm-core, ${PROJECT_NAME}, ${RPP_RPM_DEV_PACKAGE_LIST}")
 set(CPACK_RPM_TEST_PACKAGE_NAME "${PROJECT_NAME}-test")
-set(CPACK_RPM_TEST_PACKAGE_REQUIRES "rocm-core, ${PROJECT_NAME}-devel, clang")
+set(CPACK_RPM_TEST_PACKAGE_REQUIRES "rocm-core, ${PROJECT_NAME}-devel, ${RPP_RPM_TEST_PACKAGE_LIST}")
 set(CPACK_RPM_PACKAGE_LICENSE "MIT" )
 # RPM package specific variable for ASAN
 set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
diff --git a/cmake/FindStdFilesystem.cmake b/cmake/FindStdFilesystem.cmake
new file mode 100644
index 000000000..1d24e72f9
--- /dev/null
+++ b/cmake/FindStdFilesystem.cmake
@@ -0,0 +1,58 @@
+################################################################################
+# 
+# MIT License
+# 
+# Copyright (c) 2023 Advanced Micro Devices, Inc.
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# 
+################################################################################
+include(CheckCXXSourceCompiles)
+include(CMakePushCheckState)
+
+cmake_push_check_state(RESET)
+
+set(CMAKE_REQUIRED_FLAGS "-std=c++17")
+
+check_cxx_source_compiles("
+#include <filesystem>
+int main() {
+  std::filesystem::path p;
+  return 0;
+}
+" STD_FILESYSTEM_PRESENT)
+
+cmake_pop_check_state()
+
+if(NOT STD_FILESYSTEM_PRESENT)
+  cmake_push_check_state(RESET)
+  set(CMAKE_REQUIRED_FLAGS "-std=c++17")
+
+  check_cxx_source_compiles("
+  #include <experimental/filesystem>
+  int main() {
+    std::experimental::filesystem::path p;
+    return 0;
+  }
+  " EXPERIMENTAL_FILESYSTEM_PRESENT)
+
+  cmake_pop_check_state()
+endif()
+
+set(FILESYSTEM_FOUND TRUE)
\ No newline at end of file
diff --git a/cmake/FindTurboJpeg.cmake b/cmake/FindTurboJpeg.cmake
index dff5715d7..6264d8916 100644
--- a/cmake/FindTurboJpeg.cmake
+++ b/cmake/FindTurboJpeg.cmake
@@ -1,27 +1,28 @@
-#[[
-MIT License
-
-Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-]]
-
+################################################################################
+# 
+# MIT License
+# 
+# Copyright (c) 2017 - 2024 Advanced Micro Devices, Inc.
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# 
+################################################################################
 if(APPLE)
     set(SHARED_LIB_TYPE ".dylib")
 else()
@@ -48,6 +49,7 @@ find_library(TurboJpeg_LIBRARIES
     ${TURBO_JPEG_PATH}/lib
     ${TURBO_JPEG_PATH}/lib64
     /usr/lib
+    #/usr/lib/x86_64-linux-gnu - package install libturbojpeg0-dev
     /opt/libjpeg-turbo/lib
 )
 mark_as_advanced(TurboJpeg_LIBRARIES)
@@ -61,6 +63,7 @@ find_path(TurboJpeg_LIBRARIES_DIRS
     ${TURBO_JPEG_PATH}/lib
     ${TURBO_JPEG_PATH}/lib64
     /usr/lib
+    #/usr/lib/x86_64-linux-gnu - package install libturbojpeg0-dev
     /opt/libjpeg-turbo/lib
 )
 mark_as_advanced(TurboJpeg_LIBRARIES_DIRS)
@@ -70,10 +73,12 @@ if(TurboJpeg_LIBRARIES AND TurboJpeg_INCLUDE_DIRS)
 endif( )
 
 include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args( TurboJpeg
-    FOUND_VAR  TurboJpeg_FOUND
+find_package_handle_standard_args(
+    TurboJpeg 
+    FOUND_VAR  
+        TurboJpeg_FOUND 
     REQUIRED_VARS
-        TurboJpeg_LIBRARIES
+        TurboJpeg_LIBRARIES 
         TurboJpeg_INCLUDE_DIRS
         TurboJpeg_LIBRARIES_DIRS
 )
@@ -90,4 +95,4 @@ else()
         message(FATAL_ERROR "{Red}FindTurboJpeg -- NOT FOUND${ColourReset}")
     endif()
     message( "-- ${Yellow}NOTE: FindTurboJpeg failed to find -- turbojpeg${ColourReset}" )
-endif()
\ No newline at end of file
+endif()
diff --git a/docs/data/doxygenOutputs/geometric_augmentations_warp_perspective_img150x150.png b/docs/data/doxygenOutputs/geometric_augmentations_warp_perspective_img150x150.png
new file mode 100644
index 0000000000000000000000000000000000000000..031da91cdf1289eecf2c2c2a5464bfc1ea123e39
GIT binary patch
literal 7532
zcmbW4byQT}`|l4eAkru~C<rJZjglk!A^iXn(jnc_4Kqkf4Izl65>nC(El5a8Bi-H1
zkONZ}zu&d){pbF1fA=|SKkJ-zo^#gwwf5T2esBB?eg&XWQ&Lp|2nYax;N}AG^T11h
zkl=6p&4ho0nCNdNAtojwCM6*y{m&pHzeP$$PDV<4i}DsZ#oxHOyG==P`|rcwpZxbz
zLJ}e(5(+X>vj3F)j}rbTKzj>F1JZ~H9sq>21Vpq1_-+7nqm$%6@@~ZbHwXxch)GDv
zZe&p26u@b2<P#Cyh$gvFd{Z5K^By3kC84`7qCiToWkL49ok8?{($`yDFRD8jwZ{<L
zVwN7E<P=PI?=iDH<l*Ie^jKU%^64`v>6iZ~Dk-a|s_E#y(bG3DG_ta`v9+^zaP;)@
z_VM-e4+sm7h>VK<5R;tpF*WT|`sa+i{DQ)7Ma3nhHMMo{`i91)=FYC}Up>8j{R86@
zlYgeBXJ+SC*VZ>Sx3+h7_mC&2XXh7}sH^M0Tm%5of3a@z|CNjOhKum;9FYCxA|Uj=
z8AP<iB=<!~=@hicEZpfIh`ztY@FMAJbq6_@m^OmZ(qoK*iCcX2A@VQUf5`sdz(W7O
z$o@Cj|KXYgZW9sQOdb&}00wZ-oxB);?|(QBZ~%rL2|6S~zUW%5jN(B{H7yEWe3?3a
zY-s#taG(Q9@>(s@S5cs)*1XW_A`tZ~hh-eY4(?o}*#CFT!oIG>`0%kzM7<tolMbI%
zQ{V018r!nfyct8-D6n-u)OI9%1}Opiv^E5_#&J>lZ!6#F#3?qvCFU@aT2MOZJC3p@
zhgqhm60f^XwcD&hK#}Gl+m#G^y0yN>f|K6p<(zDKu;hr?r?_7y<EdY%%Kw~0^G?p~
z;!$B2>ey_Ia>1*7a|{#Wz-_1@aQE88rzTF=qTb+z^a-g?RbQVJam;1`HUw;=FtO?Y
zx@2p^1MBB!;>~Zj<KC!xE#xpj-W+Q_8hku(&Zp4+>=!O7m93Xm-H6fp2R9!0eWlpG
z@+p^)O!KZ^J02i9gkG^!$!g&N;tB-(&$a5xNUTwOL~%Iz{XgP;M;oHnn_)eOkj6R5
zLtf6T)AtuLyX7h2#pW6oecJMQjYXZaR4TKp(*ye_4%u!ao(3|dmD_e#`Ky%-&<=qh
z8@sp3D6zHSOsJPFp^CSh=&NT_vTRje{vVtVS$*DzYA6Z2X=3Q<*LU#1=gLkn3gnFk
z`u<!4UKkI3WHGK&?B_3c&CnX=fjuh!N8qW0T^PM>wVsFOM*AyJMlPbG9S+6PrQm_E
zK@bjXvh)Kj0vW}z{jnK?gF<m!$@V_SfmDxQ6({Hl&kNiiYvhtKSlM8sQG^t+-fu~y
zlKB3kY^3BTvlXmK&*AE%9Sdi7TU&61oIRLMo!%SiccsTXL496{CH^C8hd4GJ7qm`S
zWFIamP<LfjQMgZ}@`dTX!FmP5wH_m+GdMZ`0yozl^}?EU2J`AotTw}&Bfd7(H%7~P
z^!$ue(p&u)sz4S><*tjkpu!!~x*9Aax6G3!?g;r@?qJ99!29dmL@)ilBy1**;1YFz
z6@=j_hoTl4`|$t`ni&s(*DuAO=j~K@Kt6W~HZaGR*Df#L`gdXQADW}MM-FU)*OwV!
zbDnFY@t#bvUo~OmzdY@j!*pA_qfbMx9tzG7elHc1q-YeD&XvIfp<2)m*&ug3Kq7&y
zl?9`vQ1(|W?2V>&Yox^)AAQDmaiGYxTSuEA&8lsytvjm`|9DVK+7Cv2u=c`UW53}6
zOHFdfkThv(VG#EiOIK%xUZ!i2Lxx3TvXvb%?RPECWIek?;y0|>pA$q?xihYpXnwA`
zOJzA)awJXhw>AgQU$yh+KJA{FLj_S5Df_=e3!!8JKmw*a1D|E2uJvqs((Y!%=N$u#
zA8`uZJNq7+dFM+EHVgAhZD*5bLJ4xHpQ6r-Qui9)T}@B?QOY#oA%AG38YP!takj<Y
z&NeAJ!}hk^Yuq9GGgPheNxF=nK0md+r)yChw2VFY@zW~io(WUp3^!1xV?n($=*WFv
z?Khl@wr+J&hbnS?ynb%5e$LA9{d~EM$f+3Z_d9;SM82o9S=J9FTC^}s*d?f)sz}9I
zSRH^__)QGslJ#Qr6n;ba#uOPbqmTtgr<bGWmGt|15EjG~%)8i8I%3jiiit_`P~cLm
zuY-<njKI#vl7Wvn`8i?fbQ7)zOd)<~*Il%x94p@U#QE$XTYmiL5rw*F4eoT4i__yv
zSciau-gC#@j>*&{`i$^*BfAZ_TUIwqwtXDDk`Ai&jtZAmd{-N!i2PYU-NwB%<l?Yp
zYzA*88Mn%qY0q}CLwD<msg(;fh}~CgJel5Ij$Ni|l|q6$FF9DIpNkB~(O#UgT7~kT
z3gM_$uubwjewAxIwdmVHuaFatEDwvqHvR-rI=Iu3R{2#7YQ9T$S=!roGa$37Q;7IE
z41MzuJ2^!4bG<OIQM#sU=-JNLDXJ7Q3zvHbF~nG+z~6%;klEXj-YW0yt!220qD&CV
z_5348XS}DX);|yU&)P4_QS{$}3=!^46JP)NQ+MVd$PL(Pg{9A2b1a=BAhoOd4UK+<
zA!YfDHa0uTj#yuw->aWjb=$_rZo0<RuuWJF;g7b#nN!s>TfxLsBU8aop{WatJ0z-|
zxYW<y1ulIe({=T-I3M>@hc0wuF-Vhn*YuxjgYAt{eHM3<#AoHNCo4QtX2LpSN-rqx
z7&?8;_-*&GG<g=SRdp06<*b`S_r*mP>*VuF$T6b)9*6F0@r`=njjFeevoUdVY;P-f
zjbZjGE4nVW)JrQ^K9<kXE--B|l$J5Iwb%@fLAwSd4JO~o`!?aVsBAdtau&?d8Q9X&
zG|p;lY#*1b&P7uiT}p1t9Rh5m#Ne!Fu%qLMrx7+tX!O?!a{hPWs(}w0N$%hN8r=sL
zSpZf}l>GWDOXWEnukVJp%8FwpnUlW2tZ$jr91v}q`L8E?CM&Q@=b9nRF?8r25K-H0
zd$Xs~(--kaO+_!}4zufgww#Qw@&yEw^8M)y9`Ml?E_BKCQEmaUEo%*3R*1`Pg|qlK
z659=mF<7q+o0zF1B^v{_Dxm(ry8|qFmsw&k>bJ$?+-6_%bmTq5LWeT3S*YbmJckrE
z73>I$!UOP#7FD)HCwFCEhQ#<!EY61;3ze_Ie`LS7cXX^1wil}FbXS|0J{`hXGm(8g
zyPiG?X%la&kD;RPe#&%7Ggll+Zpmv&L#7c$P}{e^YhH-Eb<?Zng7~Tsxe;~2&<+mK
ztQiC2Z?(lU`@+JlEi{jR^uJI#;U2u57<lsMSi7e)Ph;*vHfF}b`7@d+<D;*ZFn1P*
zYGpbeaLf%Io!J*fT|SS14EkNQ#z^3SH$s)8ctCdQQwSbd`NBbsibuG1=F-R#FjkbT
zE^6yGv`kl&XQ?}v7KpY@{C*!fHT;}c4j`Kf3j8uyVNYi;L>cSUO=3OsoC_W-uaGV)
z|4`k&-l^~E_cl;wv0&6<EVeqN*@af<h89{Z?Y!h>!16C)f)Tdpmp@MK!`;e<cS)63
zoQBVHZ}&UNbch>ROzQ7_6cH~|c-E4eGg+h^SI+GpzZJcsL1MGy&?7eBY4!p7)e8LZ
z?VgboadzrdMs-F`y&bJ*ksbKq>zs~hPKtvk;`KlztO#zq;1ESzTo`!d5Xe@SR-VPs
zQJgb9*UTC(Q~fW5(MbInhx?mSkVv@mhz<5pxx<J?&Y!v|;7%%YHCb|9ua_0803Ntw
zU;fpCncZEHzzYhNxTe2pIgZ#cYoyJ`<$CZhYJ$1FW?j#bLKL<7pR?UKbx8R=7lY(C
z9QRD>$YEk){@wQq>a`*4C}qU4HVU@3Q5E)lQi2MssrX|$Z+t<p(?Fp5`}gp&kmYBu
z)r3jjbA~uwl3kEutL3RsWnCGXU9LoXw94Oh=BVm3omS`P(@qao>f>o?_4Eb#D89H3
z(&;3NdSYO?`rQ*9jatS>H(Lle341qf;@f`7<gCk{kC_n4;aiZ%sB$MdTXO64dD>)V
zy47tCQICJC<O3>HiD}&ONA-XEnv<U4fmw7HJw~XuwWS%c9J7_7;*HXh-Ca@|=3(`@
zsxE$Gk`TkJNlZN!?D@PF85RD-MOLJ^V2a%O>!B520~0c(G2G(vRZu>=g4KANDe4$u
zx272>9!uq^Haf412qGwlr^k|}%_rvR_Xr2DB}B>bZ+I*??+&K9dM^ALEi!ugN@oR@
z0Im6P93u2~3PV;#W!E)6=byYhoMTu=F+)3;(!UY3+-)pt2YYO)y{?hNcVOiMU(hpD
z%PxuAzIU7m2?^#u1Tmw`%p*t5Uamc3+3w0{Y4w!rOr>>?h)ekG&v|1b0AwErS1E>M
zLYDR2by3vZz7ArlERfYdcb~f|gmCz)hSC-<Nwgek;Q9gIjeT9-3R(;j3(siiJJ*~c
z!%={~Lxysx&)eTO!&`mL*rw0JC-YP_uRq5*CKGBr*DlU(j+;Kf0|%}*rUF2@t_wx#
zAFpe&)He{{b;8EqM)V4lIgXXk)aGFZ;*7VrWR$tBh_0GAsB>7!;e(V#^R8|>2UzRu
zDi46&Enxjx|0`~!9S-W%oYm>J-T!ClKq2N-=|{9GZCurYFl60}APW3iMSe)Y<8>YG
z%s*8acj9yec7cdPJ9HrT%pv3Y%f#OhF5fvgswW8XfRj<*v3N%0Xs5(51FuZ5>RH99
z0V<d}rj;p&cl_<upMgGRie^bEWwOMsqE+)G^E>jAK~ku5^h*Z_*Mvm+8S9XyVO{dc
zFnS|_HCMtfzUGzCfXaxR3<7M3yXChuu)oGph}c>-zAL=!FjzX@ZA}%zI%qqrDs`nH
z6X>2hT9Y-VmCC1;eFQ(37n2zLW1!*9RXch*WZrc$KQO9Et7q1&FH^kpC8bHsY#XWE
zc`>$F_kk`6dv<8%P`cB@B;Zf>wQ{^_rqY#oY5V7N-p)wh>5rHHo+xnM208XOO%M@k
z`B!Y~jlZfV+|YB*puMHShUKqT6{C-C98brtDC%UYzjagIH7y)Yc#)`AtXWw3;&V}n
znp*F}Cs|!duQQ1jZ_Jbf=ebFMuIR3f|Ez8%``8FPVQj!QW|4_MK}Y<Jph6w2dl|)B
zdFWn%`z<dvb*4#!VpW)Q4l*Z!x9qc&B=L?vjF=r;O`<;PI<}u}e604&Th|*}bG{r2
zt%dfwC`f>b_NH^_kmgTnRsHHuTf?oZo8MLqeA$kq?NcJ%l7sR)?H|uNty8skACtwC
z8UKiO%FwlZG*)Pvwg)f>_Ad|8X-sdnS)?=s#NZ6e6;)5Uu1Ogm5*=s~K&HTO<%4gD
zJLVi-{CI%Q&z3yA8uhJnT;<3zn&zA8>V4n)w<VJSnS^^=ZNu2ib|+XwE^`pC+*EA2
zLdkq9Kh<I<qM}xq)mn3vv%w_bymBP>W;d%N>xKae*iVAjj@PeQp|uh=r&d}a1bVz4
z7M>vmPXfY>C2;`CW*y8x9c^B#oUny*U&FXcJI9qcc<rh7jJ{A2zN6XqD+{0sx{%??
zH#<=qX_ou<iREtDE2}fgb+QASm67Nis!y5-M1rY|cH&U5&&gHW+qVizd%AIitZ`Nn
z2yh&P9`hWV)T$%-xx7p6;)6h3TEn}ieA*g}(JKk7EtgwA!Y<sfZMgfmVH`jB4%n$&
z==Io}8AZ}^K-A&VM$<FYN=iW3eKa9V;{0$DT)TYuW`!K1ys}v{>4gV!9^D*FZn3*b
zt!yNrSP=R8tRE*gGq&FEsWFI*j?JlHDo`nCX=?TXSyM!qcFe$$L%!3ZTqXtAdZ2Px
zFZ22&8o9B|@?Ef%f9<6uPZE2PZg=#{k~68THZ%&3rb9KX%GKq_qeKL*BVqSWxhGNt
z<?`a#du3ap8e#L#knL<34n$>dE~R|Fziry}nPhK&(kD_?;3w6Y)U|#~c<;pbk6(`3
zxq7<aZj!G3`HVc9a-L>A;IhAv619ocZ{N-e7BRa-^FWqCtY8~(?6pvLCX`OSw9DdF
zL$1c8jp&DRV-9OW*#xq8>y-f~$8<qbuoshXb~?0bE8NLGO^l2F6dc@^*K~Za6MtoF
z(*NLBh`~Bh(>4qbm<HB~c;-uJXUb-raXo8P2aSO`T<5q}?GBR=q04KqF3RH2VG(az
z19HIvkiTItS>@?>S6Y<i_2Pg;ZHR{QfSVK2?m`J$4!IBMhtfH>>aWTvDb>(dUgV@7
zb@HcQQVk|#C5YjHy~;(`*}E5`*bJ}}9w47{gi^Y?=<9sAdpM_*x7^zBj+|Ac|Fvh|
zne3g^%O-Qyf%u0MiGM)Mu+_)jd#4kr75>iG{ACvTQe<QgiWhd)p8>RIm38l+Hgb1+
zbl6`Sgf9JJ_sbXzQDxp;YHe8OJ@~LDL}=9i-4BBFe#O`1$$~?YHJ)AK-dr@<-L+}#
z=QB@ji@S36rLw&zzWL6&1@vrj$0ETBq-FRi;E`F0>0(d3fV0w)XOE)Y(yoA#A^(29
zPNvm1+(;}cMTL{Cx7V?p=TlmriM|z2n!jllOUaXHWgc)(z4i;2g^h>fvnHD&ie4uU
zeqdj)5&s)*v8#~s<H&Xz?~zD(kvgPLlsl(AH{l*}ihWZ^q8U>k*KINrHkx}LU#6%S
zF~6FyKrkLyb!D}0;~Oi7AF^sC91ZWuAv9@Mqqy2%+)*GSrcnUm^yXT{fU&k)QXr!%
zoeNo16De$s7SZ*~*8`sRr5F7`l}C%cy!UqxIFpQy4In_H2t2uH&lV(4svk_bXLnKb
zUQf&B+mhv@oYw6=i2vaB)9ICV=6Ez3XZv&lZl0YiHp!KvU*_I-^`XZpAEh2xj*&x>
zBh3%oku(naVj^j$KUvN;6l9<@A8On*pDRHSea|S(l-Vxz`nYl=iG0Qb;wJmWU4*?7
zdmkrijjn!72<OPp(ib$5dpx$8JeDzDsh!elcx{6gfp+{8M2+Nda*fPosiQ_IVswb9
zSQSeMXq@fxgJ)mI<(hxOX2348uqm>FYh2z)(1lO+2RK{5WMP4^JNqb+d$Q0`%E;gZ
zOtS^?5cX@<TTW0`2KiFSoW`K{JxRaJlSAZaFW)v-eQ?WdkPV6)t&GvT;UR56Z1>dq
z4gM2tQD^$XRY<EY&WpkTgCgT-?D340I5o)1_{Nl976~c7DBhy!?oDo3^1ulr<WrGC
zF9RpuP6gCwy4N2`S@QG48m(;P(4OY(L3&|&2%l=69P!U@la0%B%*Gk%ef;E5xYX<Y
zuYN@rf!I=U)mGO+ZVnLArhQN{m-EE-<0-CQY(Jr0YaW()W8p_aGEA3ZOLhXbBMa5F
zpN|xo-g2WP`pJw@G^1u&)k*%AqvRJySE8B0+a>*^#m3ivb9k&8_Uw%mg|fnV50wuG
z0V)$Iv*H?k*v1(oiWEFbE%}7)ugm)4o^>K~YAT<CW*<bXzc5wS%cNWU-9S&S`%X_z
zH<%0*;R&L*o2Z*V4W<jJcTbIA4Kbb!@Rc|~YuF7D^<M<l)WN25QM@3`-OAUfPvtw%
z+L1$3JH~=bK2A7NRtV1R4dP2xrp=)qy3_>uwCe;BIpm?j)L3$0YkY9ZSA}%S_ftI5
z`}X9N<(#f)U1`e`Vci7ZL_6nCNj>fsbFpm%>Mua6K^P`HaFLJA#c`-33d5y)G8wAk
zMNja+r_crACt(Zk6R1~By^wb#TdE_}R9m>t_b)!9{J){>dPPlk(%cKnr-$mF4wPLo
zZr1KT)|g<T@@lUUI!r-JKo-piSwd0GRQ%m~dmL!xNuG`;emtTO1UFXMed&i#FBAxu
zcFwqr+>W%jG96)iM?SwN=X9>Yw$`Xy#3tu!TOT|!d^*fpqES#brf;gXl8o|d8hH4o
zY4lrk&}g!J>H<&t#U<$@s}7yM2j%r)uEMvg?c6p3rgyY{@uakE{|W^Acb}BSgQ!m`
zcNGJe%2vMF(HWPf@=h@5mC<t)N0FQ0l<f0ja9&^tcU^1C#L-%NRdX`=N|JS?y-8Wm
zo(Hqa2F-KMgw%`NgH5D1bu5lB;lhWq)B!CBx1XKxN+u!JV04F@C(FMk;xFzq$&KNM
zqo|RwS}2M2X!y{j=&moQ<<9}9n=A_9Q$^L`#p`j#o4W9I?wi^Ub)%Z;Po;hp;*GE&
zY03D-g2t#UH(krLL9OaZ=e0?JclnK>erYPa5nTn*UH3mG+~N$qway4WqF9;cbJmYh
zTKHERIr*sM;GA_)ep5(bJ9*A;MpT_LaX?`YY-A{~>bV6x+anQid1nP(RAa>^%UI^@
zzupM|MYc)wy8BY1mNk=&qfxPj6S@7(up&KBV2JeP8~LWoWeVJQ6{_rC^E)V<F7io=
zK=Fe=v_Na!I<xgL%9l8R<qG&74e@#`hT)BADsueP>@u+AFkQ(I;?_4K(G~MktGn!A
zZ2BduQDioC5!k*mf+ekXWo#WBaa}y^mA?M_!#mX+nxEK>eH>UT27?Ds?Zy4jH92r+
z)zAYIQ?mk{xVI#jV5GTmkRY~1;+WX!wWD<2-MJTo++6mbsbBApZdX=UH0YzG(BimB
zdA_JM_QAOw+`~wHkygqY)-<loq>sT<{Tp{MG3!OIXhUPSrsRe;9~*=h)Y(NpJ?M2u
z+_ZV|9L@>xQ-5~>1GFX`ryhZZvNLG#L91)G(NJ#0q28uobi9E;X|||z#`RS1g&ykH
z>JR>IriL7mmYSG;qgQ-P>%T+&@IW&)`pbGX*)A-%RRpp95(!Ee32Qatcoi7yPb`o{
zbBOsT(C56gC_3KIqz8VNDOrj-av3BWh?wrCToa0t{yjAv8yUi%Sd<mg)mS+WosGp&
zUaeIU`rVhMMRs*+4bevYBOPO1<xCrSaf!LMZF^dU4LkPYtI&dXrwp<l*tj=@J~NsA
zgld1DtM((9+PLmbpzq`PW?}Z;uYDDbmj1j)VmTEYdec@HI$cnNE_n8G%{yHB;OUrg
zpRr4~Q=;xQ4Rn})TpT*LzoNeN`A%Mtt9{9X<l-YDBJ+s0dr0w&hd&cKz5D-t-_DAh
z-`8<8zgt--YRLX#RqM`kQ5t<YC7GD(1JFIoo!1v0Q@w-h&z#7kONLWFqr1G(+8Cl*
zm7-?kpS*(IX3-i_9YqMaDl$+%S%m7-ha%Q{n;b9HaJDhWFb=&}R`H1M#!ZNL1}$^(
z;1DfE27<Gq10AH<)~RYEN;B|w+;xzLnSqAnfdOR&DjHz>z%4*N?3;8F0iZGZ?}oxU
zxVYadGH$Qg@J*`TCC<hJ6%pZ$u5BKw5|&TBO$Yo069UfiV$$n3D)a@MvE)R24}+?I
zvY*Z+D!TGOwB<i)Y0#yPC8MG!waeiTx!(pOr*ex-9a-M2qzxRdxph&oCMhz0x6E?x
znSb3Z=lT;|R~frf>q_bOVd;8!4g4rIGm-jh#j9V?JGtFZmIGPUmBGmgllkmwwStm4
zu`rkk3tuHm>wOK)RD(0O8c#Fi+40Xl7kMsJFou7fRXkaTUMTzSzt#zu>0cnYX2gwu
zzJ;2?b!ny<a)sL7;&IMylC_QDAL-Q4sNX`hho8X6ja7qfGh#J#W#brwMaI+cfFe5R
t#DAQye;BtBprshd%ApJ4XVzb0G&rU#=&~>Y0>Ts-bYj#Q&g}5B{|jo#!wCQY

literal 0
HcmV?d00001

diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in
index b49e04e37..ed04a6309 100644
--- a/docs/sphinx/requirements.in
+++ b/docs/sphinx/requirements.in
@@ -1 +1 @@
-rocm-docs-core[api_reference]==1.11.0
+rocm-docs-core[api_reference]==1.12.0
diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt
index 0ac105557..2dfd59d36 100644
--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -132,7 +132,7 @@ requests==2.32.3
     # via
     #   pygithub
     #   sphinx
-rocm-docs-core[api-reference]==1.11.0
+rocm-docs-core[api-reference]==1.12.0
     # via -r requirements.in
 six==1.16.0
     # via python-dateutil
diff --git a/include/rppdefs.h b/include/rppdefs.h
index a53176630..6bbee49ea 100644
--- a/include/rppdefs.h
+++ b/include/rppdefs.h
@@ -242,6 +242,14 @@ typedef struct
     Rpp32f data[6];
 } Rpp32f6;
 
+/*! \brief RPP 9 float vector
+ * \ingroup group_rppdefs
+ */
+typedef struct
+{
+    Rpp32f data[9];
+} Rpp32f9;
+
 /*! \brief RPP 24 signed int vector
  * \ingroup group_rppdefs
  */
diff --git a/include/rppt_tensor_geometric_augmentations.h b/include/rppt_tensor_geometric_augmentations.h
index aa7eaf4d8..97497749e 100644
--- a/include/rppt_tensor_geometric_augmentations.h
+++ b/include/rppt_tensor_geometric_augmentations.h
@@ -725,6 +725,50 @@ RppStatus rppt_transpose_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDescP
 /*! @}
  */
 
+/*! \brief Warp perspective augmentation on HOST backend for a NCHW/NHWC layout tensor
+ * \details The warp perspective performs perspective transformations for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
+ * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
+ * - dstPtr depth ranges - Will be same depth as srcPtr.
+ * \image html img150x150.png Sample Input
+ * \image html geometric_augmentations_warp_perspective_img150x150.png Sample Output
+ * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
+ * \param [out] dstPtr destination tensor in HOST memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
+ * \param [in] perspectiveTensor perspective matrix values for transformation calculation (2D tensor in HOST memory, of size batchSize * 9 for each image in batch)
+ * \param [in] interpolationType Interpolation type used (RpptInterpolationType::BILINEAR or RpptRoiType::NEAREST_NEIGHBOR)
+ * \param [in] roiTensorPtrSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
+ * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
+ * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_warp_perspective_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *perspectiveTensor, RpptInterpolationType interpolationType, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
+
+#ifdef GPU_SUPPORT
+/*! \brief Warp perspective augmentation on HIP backend for a NCHW/NHWC layout tensor
+ * \details The warp perspective augmentation performs perspective transformations for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
+ * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
+ * - dstPtr depth ranges - Will be same depth as srcPtr.
+ * \image html img150x150.png Sample Input
+ * \image html geometric_augmentations_warp_perspective_img150x150.png Sample Output
+ * \param [in] srcPtr source tensor in HIP memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
+ * \param [out] dstPtr destination tensor in HIP memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
+ * \param [in] perspectiveTensor perspective matrix values for transformation calculation (2D tensor in pinned/HIP memory, of size batchSize * 9 for each image in batch)
+ * \param [in] interpolationType Interpolation type used (RpptInterpolationType::BILINEAR or RpptRoiType::NEAREST_NEIGHBOR)
+ * \param [in] roiTensorPtrSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
+ * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
+ * \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_warp_perspective_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *perspectiveTensor, RpptInterpolationType interpolationType, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
+#endif // GPU_SUPPORT
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/include/cpu/rpp_cpu_common.hpp b/src/include/cpu/rpp_cpu_common.hpp
index 01fb16edd..e9b824f24 100644
--- a/src/include/cpu/rpp_cpu_common.hpp
+++ b/src/include/cpu/rpp_cpu_common.hpp
@@ -5192,10 +5192,17 @@ inline void compute_generic_bilinear_srclocs_and_interpolate(T *srcPtrChannel, R
 
     for (int c = 0; c < srcDescPtr->c; c++)
     {
-        dst[c] = (T)std::nearbyintf(((*(srcPtrChannel + srcLoc[0]) * bilinearCoeffs[0]) +        // TopRow R01 Pixel * coeff0
-                    (*(srcPtrChannel + srcLoc[1]) * bilinearCoeffs[1]) +        // TopRow R02 Pixel * coeff1
-                    (*(srcPtrChannel + srcLoc[2]) * bilinearCoeffs[2]) +        // BottomRow R01 Pixel * coeff2
-                    (*(srcPtrChannel + srcLoc[3]) * bilinearCoeffs[3])));        // BottomRow R02 Pixel * coeff3
+        if constexpr (std::is_same<T, Rpp8s>::value || std::is_same<T, Rpp8u>::value)
+          dst[c] = (T)std::nearbyintf(((*(srcPtrChannel + srcLoc[0]) * bilinearCoeffs[0]) +        // TopRow R01 Pixel * coeff0
+                    (*(srcPtrChannel + srcLoc[1]) * bilinearCoeffs[1]) +                           // TopRow R02 Pixel * coeff1
+                    (*(srcPtrChannel + srcLoc[2]) * bilinearCoeffs[2]) +                           // BottomRow R01 Pixel * coeff2
+                    (*(srcPtrChannel + srcLoc[3]) * bilinearCoeffs[3])));                          // BottomRow R02 Pixel * coeff3
+        else if constexpr (std::is_same<T, Rpp32f>::value || std::is_same<T, Rpp16f>::value)
+          dst[c] = (T)(((*(srcPtrChannel + srcLoc[0]) * bilinearCoeffs[0]) +                       // TopRow R01 Pixel * coeff0
+                    (*(srcPtrChannel + srcLoc[1]) * bilinearCoeffs[1]) +                           // TopRow R02 Pixel * coeff1
+                    (*(srcPtrChannel + srcLoc[2]) * bilinearCoeffs[2]) +                           // BottomRow R01 Pixel * coeff2
+                    (*(srcPtrChannel + srcLoc[3]) * bilinearCoeffs[3])));                          // BottomRow R02 Pixel * coeff3
+
         srcPtrChannel += srcDescPtr->strides.cStride;
     }
 }
@@ -5269,7 +5276,9 @@ inline void compute_generic_bilinear_srclocs_3c_avx(__m256 &pSrcY, __m256 &pSrcX
 template <typename T>
 inline void compute_generic_bilinear_interpolation_pkd3_to_pln3(Rpp32f srcY, Rpp32f srcX, RpptROI *roiLTRB, T *dstPtrTempR, T *dstPtrTempG, T *dstPtrTempB, T *srcPtrChannel, RpptDescPtr srcDescPtr)
 {
-    if ((srcX < roiLTRB->ltrbROI.lt.x) || (srcY < roiLTRB->ltrbROI.lt.y) || (srcX > roiLTRB->ltrbROI.rb.x) || (srcY > roiLTRB->ltrbROI.rb.y))
+    Rpp32s srcXFloor = std::floor(srcX);
+    Rpp32s srcYFloor = std::floor(srcY);
+    if ((srcXFloor < roiLTRB->ltrbROI.lt.x) || (srcYFloor < roiLTRB->ltrbROI.lt.y) || (srcXFloor > roiLTRB->ltrbROI.rb.x) || (srcYFloor > roiLTRB->ltrbROI.rb.y))
     {
         *dstPtrTempR = 0;
         *dstPtrTempG = 0;
diff --git a/src/include/cpu/rpp_cpu_simd.hpp b/src/include/cpu/rpp_cpu_simd.hpp
index 813d47aa0..c58f207d3 100644
--- a/src/include/cpu/rpp_cpu_simd.hpp
+++ b/src/include/cpu/rpp_cpu_simd.hpp
@@ -3080,7 +3080,7 @@ inline void rpp_generic_nn_load_u8pln1_avx(Rpp8u *srcPtrChannel, Rpp32s *srcLoc,
             buffer[i] = *(srcPtrChannel + srcLoc[i]);
     }
     __m128i px = _mm_loadu_si128((__m128i *)buffer);
-    p = _mm256_setr_m128i(px, xmm_px0);
+    p = _mm256_castsi128_si256(px);
 }
 
 inline void rpp_generic_nn_load_f32pkd3_to_f32pln3(Rpp32f *srcPtrChannel, Rpp32s *srcLoc, Rpp32s *invalidLoad, __m128 *p)
@@ -4108,6 +4108,40 @@ inline void rpp_resize_nn_load_f16pkd3_to_f32pln3_avx(Rpp16f *srcRowPtrsForInter
                           (Rpp32f)*(srcRowPtrsForInterp + loc[6] + 2), (Rpp32f)*(srcRowPtrsForInterp + loc[7] + 2));
 }
 
+inline void rpp_generic_nn_load_f16pkd3_to_f32pln3_avx(Rpp16f *srcRowPtrsForInterp, Rpp32s *loc, Rpp32s *invalidLoad, __m256 *p)
+{
+    p[0] = _mm256_setr_ps((!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0]) : 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1]) : 0,
+                          (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2]) : 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3]) : 0,
+                          (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4]) : 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5]) : 0,
+                          (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6]) : 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7]) : 0);
+
+    p[1] = _mm256_setr_ps((!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0] + 1) : 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1] + 1) : 0,
+                          (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2] + 1) : 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3] + 1) : 0,
+                          (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4] + 1) : 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5] + 1) : 0,
+                          (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6] + 1) : 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7] + 1) : 0);
+
+    p[2] = _mm256_setr_ps((!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0] + 2) : 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1] + 2) : 0,
+                          (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2] + 2) : 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3] + 2) : 0,
+                          (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4] + 2) : 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5] + 2) : 0,
+                          (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6] + 2) : 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7] + 2) : 0);
+}
+
+inline void rpp_generic_nn_load_f16pkd3_to_f32pkd3_avx(Rpp16f *srcRowPtrsForInterp, Rpp32s *loc, Rpp32s *invalidLoad, __m256 *p)
+{
+    p[0] = _mm256_setr_ps((!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0]): 0, (!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0] + 1): 0,        // Get R01|G01|B01|R02|B02|G02|R03|G03
+                          (!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0] + 2): 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1]): 0,        // load the values from input using srcLoc buffer if invalidLoad is 0, else set the values to 0
+                          (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1] + 1): 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1] + 2): 0,
+                          (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2]): 0, (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2] + 1): 0);
+    p[1] = _mm256_setr_ps((!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2] + 2): 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3]): 0,        // Get B03|R04|G04|B04|R05|G05|B05|R06
+                          (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3] + 1): 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3] + 2): 0,    // load the values from input using srcLoc buffer if invalidLoad is 0, else set the values to 0
+                          (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4]): 0, (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4] + 1): 0,
+                          (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4] + 2): 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5]): 0);
+    p[2] = _mm256_setr_ps((!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5] + 1): 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5] + 2): 0,    // Get G06|B06|R07|G07|B07|R08|G08|B08
+                          (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6]): 0, (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6] + 1): 0,        // load the values from input using srcLoc buffer if invalidLoad is 0, else set the values to 0
+                          (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6] + 2): 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7]): 0,
+                          (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7] + 1): 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7] + 2): 0);
+}
+
 inline void rpp_resize_nn_load_f32pln1(Rpp32f *srcRowPtrsForInterp, Rpp32s *loc, __m128 &p)
 {
     __m128 pTemp[4];
@@ -4136,6 +4170,14 @@ inline void rpp_resize_nn_load_f16pln1_avx(Rpp16f *srcRowPtrsForInterp, Rpp32s *
                        (Rpp32f)*(srcRowPtrsForInterp + loc[6]), (Rpp32f)*(srcRowPtrsForInterp + loc[7]));
 }
 
+inline void rpp_generic_nn_load_f16pln1_avx(Rpp16f *srcRowPtrsForInterp, Rpp32s *loc, Rpp32s *invalidLoad, __m256 &p)
+{
+    p = _mm256_setr_ps((!invalidLoad[0]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[0]) : 0, (!invalidLoad[1]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[1]) : 0,
+                       (!invalidLoad[2]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[2]) : 0, (!invalidLoad[3]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[3]) : 0,
+                        (!invalidLoad[4]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[4]) : 0, (!invalidLoad[5]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[5]) : 0,
+                        (!invalidLoad[6]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[6]) : 0, (!invalidLoad[7]) ? (Rpp32f)*(srcRowPtrsForInterp + loc[7]) : 0);
+}
+
 inline void rpp_resize_nn_load_i8pkd3(Rpp8s *srcRowPtrsForInterp, Rpp32s *loc, __m128i &p)
 {
     __m128i px[4];
@@ -4258,8 +4300,6 @@ inline void rpp_store12_i8pkd3_to_i8pln3(Rpp8s* dstPtrR, Rpp8s* dstPtrG, Rpp8s*
     rpp_storeu_si32((__m128i *)(dstPtrB), _mm_shuffle_epi8(p, xmm_char_maskB)); /* Shuffle and extract the B pixels*/
 }
 
-
-
 inline void rpp_store12_i8_to_i8(Rpp8s* dstPtr, __m128i &p)
 {
     _mm_storeu_si128((__m128i *)(dstPtr), p);
@@ -4304,6 +4344,17 @@ inline void rpp_store24_f32pkd3_to_f32pkd3_avx(Rpp32f* dstPtr, __m256 *p)
     _mm256_storeu_ps(dstPtr + 16, p[2]); /* Store RGB set 3 */
 }
 
+inline void rpp_store24_f32pkd3_to_f16pkd3_avx(Rpp16f* dstPtr, __m256* p)
+{
+    __m128i px128[3];
+    px128[0] = _mm256_cvtps_ph(p[0], _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+    px128[1] = _mm256_cvtps_ph(p[1], _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+    px128[2] = _mm256_cvtps_ph(p[2], _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+    _mm_storeu_si128((__m128i *)dstPtr, px128[0]);
+    _mm_storeu_si128((__m128i *)(dstPtr + 8), px128[1]);
+    _mm_storeu_si128((__m128i *)(dstPtr + 16), px128[2]);
+}
+
 inline void rpp_convert24_pkd3_to_pln3(__m128i &pxLower, __m128i &pxUpper, __m128i *pxDstChn)
 {
     // pxLower = R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5 G5 B5 R6
diff --git a/src/modules/cpu/host_tensor_geometric_augmentations.hpp b/src/modules/cpu/host_tensor_geometric_augmentations.hpp
index 9facb0d78..9248e6f2d 100644
--- a/src/modules/cpu/host_tensor_geometric_augmentations.hpp
+++ b/src/modules/cpu/host_tensor_geometric_augmentations.hpp
@@ -39,5 +39,6 @@ SOFTWARE.
 #include "kernel/transpose.hpp"
 #include "kernel/crop_and_patch.hpp"
 #include "kernel/flip_voxel.hpp"
+#include "kernel/warp_perspective.hpp"
 
 #endif // HOST_TENSOR_GEOMETRIC_AUGMENTATIONS_HPP
diff --git a/src/modules/cpu/kernel/warp_perspective.hpp b/src/modules/cpu/kernel/warp_perspective.hpp
new file mode 100644
index 000000000..24d4de178
--- /dev/null
+++ b/src/modules/cpu/kernel/warp_perspective.hpp
@@ -0,0 +1,2196 @@
+/*
+MIT License
+
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include "rppdefs.h"
+#include "rpp_cpu_simd.hpp"
+#include "rpp_cpu_common.hpp"
+
+/************* warp_perspective helpers *************/
+
+#if __AVX2__
+inline void compute_warp_perspective_src_loc_next_term_avx(__m256 &plocW, __m256 &plocY, __m256 &plocX, __m256 &pSrcY, __m256 &pSrcX, __m256 &pPerspectiveMatrixTerm6Incr, __m256 &pPerspectiveMatrixTerm3Incr, __m256 &pPerspectiveMatrixTerm0Incr, __m256 &pRoiHalfHeight, __m256 &pRoiHalfWidth)
+{
+    plocW = _mm256_add_ps(plocW, pPerspectiveMatrixTerm6Incr);
+    plocY = _mm256_add_ps(plocY, pPerspectiveMatrixTerm3Incr);
+    plocX = _mm256_add_ps(plocX, pPerspectiveMatrixTerm0Incr);
+    pSrcY = _mm256_add_ps(_mm256_div_ps(plocY, plocW), pRoiHalfHeight);
+    pSrcX = _mm256_add_ps(_mm256_div_ps(plocX, plocW), pRoiHalfWidth);
+}
+inline void compute_warp_perspective_src_loc_first_term_avx(Rpp32f locX, Rpp32f locY, Rpp32f locW, __m256 &plocW, __m256 &plocY, __m256 &plocX, __m256 &pSrcY, __m256 &pSrcX, __m256 &pPerspectiveMatrixTerm6, __m256 &pPerspectiveMatrixTerm3, __m256 &pPerspectiveMatrixTerm0, __m256 &pRoiHalfHeight, __m256 &pRoiHalfWidth) {
+    plocX = _mm256_add_ps(_mm256_set1_ps(locX), pPerspectiveMatrixTerm0);
+    plocY = _mm256_add_ps(_mm256_set1_ps(locY), pPerspectiveMatrixTerm3);
+    plocW = _mm256_add_ps(_mm256_set1_ps(locW), pPerspectiveMatrixTerm6);
+    pSrcY = _mm256_add_ps(_mm256_div_ps(plocY, plocW), pRoiHalfHeight);
+    pSrcX = _mm256_add_ps(_mm256_div_ps(plocX, plocW), pRoiHalfWidth);
+}
+#endif
+
+inline void compute_warp_perspective_src_loc_params(Rpp32s dstY, Rpp32s dstX, Rpp32f &locW, Rpp32f &locY, Rpp32f &locX, Rpp32f9 *perspectiveMatrix_f9, Rpp32s roiHalfHeight, Rpp32s roiHalfWidth)
+{
+    dstX -= roiHalfWidth;
+    dstY -= roiHalfHeight;
+    locX = std::fma(dstX, perspectiveMatrix_f9->data[0], std::fma(dstY, perspectiveMatrix_f9->data[1], perspectiveMatrix_f9->data[2]));
+    locY = std::fma(dstX, perspectiveMatrix_f9->data[3], std::fma(dstY, perspectiveMatrix_f9->data[4], perspectiveMatrix_f9->data[5]));
+    locW = std::fma(dstX, perspectiveMatrix_f9->data[6], std::fma(dstY, perspectiveMatrix_f9->data[7], perspectiveMatrix_f9->data[8]));
+}
+
+inline void compute_warp_perspective_src_loc_next_term(Rpp32s dstX, Rpp32f &locW, Rpp32f &locY, Rpp32f &locX, Rpp32f &srcY, Rpp32f &srcX, Rpp32f9 *perspectiveMatrix_f9, Rpp32s roiHalfHeight, Rpp32s roiHalfWidth)
+{
+    locW += perspectiveMatrix_f9->data[6];
+    locY += perspectiveMatrix_f9->data[3];   // Used in computation of next srcY locations by adding the delta from previous location
+    locX += perspectiveMatrix_f9->data[0];   // Used in computation of next srcX locations by adding the delta from previous location
+    srcX = ((locX / locW) + roiHalfWidth);
+    srcY = ((locY / locW) + roiHalfHeight);
+}
+
+/************* NEAREST NEIGHBOR INTERPOLATION *************/
+
+RppStatus warp_perspective_nn_u8_u8_host_tensor(Rpp8u *srcPtr,
+                                                RpptDescPtr srcDescPtr,
+                                                Rpp8u *dstPtr,
+                                                RpptDescPtr dstDescPtr,
+                                                Rpp32f *perspectiveTensor,
+                                                RpptROIPtr roiTensorPtrSrc,
+                                                RpptRoiType roiType,
+                                                RppLayoutParams srcLayoutParams,
+                                                rpp::Handle& handle)
+{
+    RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h};
+    Rpp32u numThreads = handle.GetNumThreads();
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++)
+    {
+        RpptROI roi, roiLTRB;
+        RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount];
+        compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+        compute_ltrb_from_xywh_host(&roi, &roiLTRB);
+        Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1;
+        Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1;
+
+        Rpp32f9 *perspectiveMatrix_f9;
+        perspectiveMatrix_f9 = reinterpret_cast<Rpp32f9 *>(perspectiveTensor + batchCount * 9);
+
+        Rpp8u *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage;
+        srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride;
+        srcPtrChannel = srcPtrImage;
+        dstPtrChannel = dstPtrImage;
+
+        Rpp32s vectorIncrementPerChannel = 8;
+        Rpp32s vectorIncrementPkd = 24;
+        Rpp32u bufferLength = roi.xywhROI.roiWidth;
+        Rpp32u alignedLength = bufferLength & ~7;   // Align dst width to process 16 dst pixels per iteration
+        Rpp32s srcLoc[8] = {0};         // Since 4 dst pixels are processed per iteration
+        Rpp32s invalidLoad[8] = {0};    // Since 4 dst pixels are processed per iteration
+
+#if __AVX2__
+        __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride);
+        __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7);
+        __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7);
+        __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7);
+        __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8);
+        __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8);
+        __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8);
+        __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight);
+        __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth);
+        __m256 pRoiLTRB[4];
+        pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x);
+        pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y);
+        pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x);
+        pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y);
+#endif
+
+        // Warp perspective with fused output-layout toggle (NHWC -> NCHW)
+        if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp8u *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8u *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256i pRow;
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true);
+                    rpp_simd_load(rpp_generic_nn_load_u8pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow);
+                    rpp_simd_store(rpp_store24_u8pkd3_to_u8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective with fused output-layout toggle (NCHW -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp8u *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            Rpp8u *srcPtrChannelR, *srcPtrChannelG, *srcPtrChannelB;
+            srcPtrChannelR = srcPtrChannel;
+            srcPtrChannelG = srcPtrChannelR + srcDescPtr->strides.cStride;
+            srcPtrChannelB = srcPtrChannelG + srcDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8u *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256i pRow[3];
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad);
+                    rpp_simd_load(rpp_generic_nn_load_u8pln1_avx, srcPtrChannelR, srcLoc, invalidLoad, pRow[0]);
+                    rpp_simd_load(rpp_generic_nn_load_u8pln1_avx, srcPtrChannelG, srcLoc, invalidLoad, pRow[1]);
+                    rpp_simd_load(rpp_generic_nn_load_u8pln1_avx, srcPtrChannelB, srcLoc, invalidLoad, pRow[2]);
+                    rpp_simd_store(rpp_store24_u8pln3_to_u8pkd3_avx, dstPtrTemp, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pln3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective without fused output-layout toggle (NHWC -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp8u *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8u *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256i pRow;
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true);
+                    rpp_simd_load(rpp_generic_nn_load_u8pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow);
+                    rpp_simd_store(rpp_store24_u8_to_u8_avx, dstPtrTemp, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp perspective without fused output-layout toggle (NCHW -> NCHW for 1 channel and 3 channel)
+        else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp8u *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8u *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    Rpp8u *dstPtrTempChn, *srcPtrTempChn;
+                    srcPtrTempChn = srcPtrChannel;
+                    dstPtrTempChn = dstPtrTemp;
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad);
+                    for (int c = 0; c < srcDescPtr->c; c++)
+                    {
+                        __m256i pRow;
+                        rpp_simd_load(rpp_generic_nn_load_u8pln1_avx, srcPtrTempChn, srcLoc, invalidLoad, pRow);
+                        rpp_storeu_si64(reinterpret_cast<__m128i *>(dstPtrTempChn), _mm256_castsi256_si128(pRow));
+                        srcPtrTempChn += srcDescPtr->strides.cStride;
+                        dstPtrTempChn += dstDescPtr->strides.cStride;
+                    }
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
+
+
+RppStatus warp_perspective_nn_f32_f32_host_tensor(Rpp32f *srcPtr,
+                                                  RpptDescPtr srcDescPtr,
+                                                  Rpp32f *dstPtr,
+                                                  RpptDescPtr dstDescPtr,
+                                                  Rpp32f *perspectiveTensor,
+                                                  RpptROIPtr roiTensorPtrSrc,
+                                                  RpptRoiType roiType,
+                                                  RppLayoutParams srcLayoutParams,
+                                                  rpp::Handle& handle)
+{
+    RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h};
+    Rpp32u numThreads = handle.GetNumThreads();
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++)
+    {
+        RpptROI roi, roiLTRB;
+        RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount];
+        compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+        compute_ltrb_from_xywh_host(&roi, &roiLTRB);
+        Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1;
+        Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1;
+
+        Rpp32f9 *perspectiveMatrix_f9;
+        perspectiveMatrix_f9 = reinterpret_cast<Rpp32f9 *>(perspectiveTensor + batchCount * 9);
+
+        Rpp32f *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage;
+        srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride;
+        srcPtrChannel = srcPtrImage;
+        dstPtrChannel = dstPtrImage;
+
+        Rpp32s vectorIncrementPerChannel = 8;
+        Rpp32s vectorIncrementPkd = 24;
+        Rpp32u bufferLength = roi.xywhROI.roiWidth;
+        Rpp32u alignedLength = bufferLength & ~7;   // Align dst width to process 16 dst pixels per iteration
+        Rpp32s srcLoc[8] = {0};         // Since 4 dst pixels are processed per iteration
+        Rpp32s invalidLoad[8] = {0};    // Since 4 dst pixels are processed per iteration
+
+#if __AVX2__
+        __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride);
+        __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7);
+        __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7);
+        __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7);
+        __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8);
+        __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8);
+        __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8);
+        __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight);
+        __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth);
+        __m256 pRoiLTRB[4];
+        pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x);
+        pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y);
+        pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x);
+        pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y);
+#endif
+
+        // Warp perspective with fused output-layout toggle (NHWC -> NCHW)
+        if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp32f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp32f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pRow[3];
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true);
+                    rpp_simd_load(rpp_generic_nn_load_f32pkd3_to_f32pln3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow);
+                    rpp_simd_store(rpp_store24_f32pln3_to_f32pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective with fused output-layout toggle (NCHW -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp32f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            Rpp32f *srcPtrChannelR, *srcPtrChannelG, *srcPtrChannelB;
+            srcPtrChannelR = srcPtrChannel;
+            srcPtrChannelG = srcPtrChannelR + srcDescPtr->strides.cStride;
+            srcPtrChannelB = srcPtrChannelG + srcDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp32f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pRow[3];
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad);
+                    rpp_simd_load(rpp_generic_nn_load_f32pln1_avx, srcPtrChannelR, srcLoc, invalidLoad, pRow[0]);
+                    rpp_simd_load(rpp_generic_nn_load_f32pln1_avx, srcPtrChannelG, srcLoc, invalidLoad, pRow[1]);
+                    rpp_simd_load(rpp_generic_nn_load_f32pln1_avx, srcPtrChannelB, srcLoc, invalidLoad, pRow[2]);
+                    rpp_simd_store(rpp_store24_f32pln3_to_f32pkd3_avx, dstPtrTemp, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pln3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective without fused output-layout toggle (NHWC -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp32f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp32f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pRow[3];
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true);
+                    rpp_simd_load(rpp_generic_nn_load_f32pkd3_to_f32pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow);
+                    rpp_simd_store(rpp_store24_f32pkd3_to_f32pkd3_avx, dstPtrTemp, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp perspective without fused output-layout toggle (NCHW -> NCHW for 1 channel and 3 channel)
+        else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp32f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp32f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    Rpp32f *dstPtrTempChn, *srcPtrTempChn;
+                    srcPtrTempChn = srcPtrChannel;
+                    dstPtrTempChn = dstPtrTemp;
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad);
+                    for (int c = 0; c < srcDescPtr->c; c++)
+                    {
+                        __m256 pRow;
+                        rpp_simd_load(rpp_generic_nn_load_f32pln1_avx, srcPtrTempChn, srcLoc, invalidLoad, pRow);
+                        rpp_simd_store(rpp_store8_f32_to_f32_avx, dstPtrTempChn, &pRow);
+                        srcPtrTempChn += srcDescPtr->strides.cStride;
+                        dstPtrTempChn += dstDescPtr->strides.cStride;
+                    }
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
+
+RppStatus warp_perspective_nn_i8_i8_host_tensor(Rpp8s *srcPtr,
+                                                RpptDescPtr srcDescPtr,
+                                                Rpp8s *dstPtr,
+                                                RpptDescPtr dstDescPtr,
+                                                Rpp32f *perspectiveTensor,
+                                                RpptROIPtr roiTensorPtrSrc,
+                                                RpptRoiType roiType,
+                                                RppLayoutParams srcLayoutParams,
+                                                rpp::Handle& handle)
+{
+    RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h};
+    Rpp32u numThreads = handle.GetNumThreads();
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++)
+    {
+        RpptROI roi, roiLTRB;
+        RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount];
+        compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+        compute_ltrb_from_xywh_host(&roi, &roiLTRB);
+        Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1;
+        Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1;
+
+        Rpp32f9 *perspectiveMatrix_f9;
+        perspectiveMatrix_f9 = reinterpret_cast<Rpp32f9 *>(perspectiveTensor + batchCount * 9);
+
+        Rpp8s *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage;
+        srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride;
+        srcPtrChannel = srcPtrImage;
+        dstPtrChannel = dstPtrImage;
+
+        Rpp32s vectorIncrementPerChannel = 8;
+        Rpp32s vectorIncrementPkd = 24;
+        Rpp32u bufferLength = roi.xywhROI.roiWidth;
+        Rpp32u alignedLength = bufferLength & ~7;   // Align dst width to process 16 dst pixels per iteration
+        Rpp32s srcLoc[8] = {0};         // Since 4 dst pixels are processed per iteration
+        Rpp32s invalidLoad[8] = {0};    // Since 4 dst pixels are processed per iteration
+
+#if __AVX2__
+        __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride);
+        __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7);
+        __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7);
+        __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7);
+        __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8);
+        __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8);
+        __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8);
+        __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight);
+        __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth);
+        __m256 pRoiLTRB[4];
+        pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x);
+        pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y);
+        pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x);
+        pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y);
+#endif
+
+        // Warp perspective with fused output-layout toggle (NHWC -> NCHW)
+        if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp8s *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8s *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256i pRow;
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true);
+                    rpp_simd_load(rpp_generic_nn_load_i8pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow);
+                    rpp_simd_store(rpp_store24_i8pkd3_to_i8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective with fused output-layout toggle (NCHW -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp8s *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            Rpp8s *srcPtrChannelR, *srcPtrChannelG, *srcPtrChannelB;
+            srcPtrChannelR = srcPtrChannel;
+            srcPtrChannelG = srcPtrChannelR + srcDescPtr->strides.cStride;
+            srcPtrChannelB = srcPtrChannelG + srcDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8s *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256i pRow[3];
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad);
+                    rpp_simd_load(rpp_generic_nn_load_i8pln1_avx, srcPtrChannelR, srcLoc, invalidLoad, pRow[0]);
+                    rpp_simd_load(rpp_generic_nn_load_i8pln1_avx, srcPtrChannelG, srcLoc, invalidLoad, pRow[1]);
+                    rpp_simd_load(rpp_generic_nn_load_i8pln1_avx, srcPtrChannelB, srcLoc, invalidLoad, pRow[2]);
+                    rpp_simd_store(rpp_store24_i8pln3_to_i8pkd3_avx, dstPtrTemp, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pln3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective without fused output-layout toggle (NHWC -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp8s *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8s *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256i pRow;
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true);
+                    rpp_simd_load(rpp_generic_nn_load_i8pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow);
+                    rpp_simd_store(rpp_store24_i8_to_i8_avx, dstPtrTemp, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp perspective without fused output-layout toggle (NCHW -> NCHW for 1 channel and 3 channel)
+        else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp8s *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8s *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    Rpp8s *dstPtrTempChn, *srcPtrTempChn;
+                    srcPtrTempChn = srcPtrChannel;
+                    dstPtrTempChn = dstPtrTemp;
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad);
+                    for (int c = 0; c < srcDescPtr->c; c++)
+                    {
+                        __m256i pRow;
+                        rpp_simd_load(rpp_generic_nn_load_i8pln1_avx, srcPtrTempChn, srcLoc, invalidLoad, pRow);
+                        rpp_storeu_si64(reinterpret_cast<__m128i *>(dstPtrTempChn), _mm256_castsi256_si128(pRow));
+                        srcPtrTempChn += srcDescPtr->strides.cStride;
+                        dstPtrTempChn += dstDescPtr->strides.cStride;
+                    }
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
+
+RppStatus warp_perspective_nn_f16_f16_host_tensor(Rpp16f *srcPtr,
+                                                  RpptDescPtr srcDescPtr,
+                                                  Rpp16f *dstPtr,
+                                                  RpptDescPtr dstDescPtr,
+                                                  Rpp32f *perspectiveTensor,
+                                                  RpptROIPtr roiTensorPtrSrc,
+                                                  RpptRoiType roiType,
+                                                  RppLayoutParams srcLayoutParams,
+                                                  rpp::Handle& handle)
+{
+    RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h};
+    Rpp32u numThreads = handle.GetNumThreads();
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++)
+    {
+        RpptROI roi, roiLTRB;
+        RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount];
+        compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+        compute_ltrb_from_xywh_host(&roi, &roiLTRB);
+        Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1;
+        Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1;
+
+        Rpp32f9 *perspectiveMatrix_f9;
+        perspectiveMatrix_f9 = reinterpret_cast<Rpp32f9 *>(perspectiveTensor + batchCount * 9);
+
+        Rpp16f *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage;
+        srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride;
+        srcPtrChannel = srcPtrImage;
+        dstPtrChannel = dstPtrImage;
+
+        Rpp32s vectorIncrementPerChannel = 8;
+        Rpp32s vectorIncrementPkd = 24;
+        Rpp32u bufferLength = roi.xywhROI.roiWidth;
+        Rpp32u alignedLength = bufferLength & ~7;   // Align dst width to process 16 dst pixels per iteration
+        Rpp32s srcLoc[8] = {0};         // Since 4 dst pixels are processed per iteration
+        Rpp32s invalidLoad[8] = {0};    // Since 4 dst pixels are processed per iteration
+
+#if __AVX2__
+        __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride);
+        __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7);
+        __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7);
+        __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7);
+        __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8);
+        __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8);
+        __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8);
+        __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight);
+        __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth);
+        __m256 pRoiLTRB[4];
+        pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x);
+        pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y);
+        pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x);
+        pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y);
+#endif
+
+        // Warp perspective with fused output-layout toggle (NHWC -> NCHW)
+        if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp16f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp16f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pRow[3];
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true);
+                    rpp_simd_load(rpp_generic_nn_load_f16pkd3_to_f32pln3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow);
+                    rpp_simd_store(rpp_store24_f32pln3_to_f16pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective with fused output-layout toggle (NCHW -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp16f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            Rpp16f *srcPtrChannelR, *srcPtrChannelG, *srcPtrChannelB;
+            srcPtrChannelR = srcPtrChannel;
+            srcPtrChannelG = srcPtrChannelR + srcDescPtr->strides.cStride;
+            srcPtrChannelB = srcPtrChannelG + srcDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp16f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pRow[3];
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad);
+                    rpp_simd_load(rpp_generic_nn_load_f16pln1_avx, srcPtrChannelR, srcLoc, invalidLoad, pRow[0]);
+                    rpp_simd_load(rpp_generic_nn_load_f16pln1_avx, srcPtrChannelG, srcLoc, invalidLoad, pRow[1]);
+                    rpp_simd_load(rpp_generic_nn_load_f16pln1_avx, srcPtrChannelB, srcLoc, invalidLoad, pRow[2]);
+                    rpp_simd_store(rpp_store24_f32pln3_to_f16pkd3_avx, dstPtrTemp, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pln3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp perspective without fused output-layout toggle (NHWC -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp16f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp16f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pRow[3];
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad, true);
+                    rpp_simd_load(rpp_generic_nn_load_f16pkd3_to_f32pkd3_avx, srcPtrChannel, srcLoc, invalidLoad, pRow);
+                    rpp_simd_store(rpp_store24_f32pkd3_to_f16pkd3_avx, dstPtrTemp, pRow);
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp perspective without fused output-layout toggle (NCHW -> NCHW for 1 channel and 3 channel)
+        else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp16f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp16f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    Rpp16f *dstPtrTempChn, *srcPtrTempChn;
+                    srcPtrTempChn = srcPtrChannel;
+                    dstPtrTempChn = dstPtrTemp;
+                    compute_generic_nn_srclocs_and_validate_avx(pSrcY, pSrcX, pRoiLTRB, pSrcStrideH, srcLoc, invalidLoad);
+                    for (int c = 0; c < srcDescPtr->c; c++)
+                    {
+                        __m256 pRow;
+                        rpp_simd_load(rpp_generic_nn_load_f16pln1_avx, srcPtrTempChn, srcLoc, invalidLoad, pRow);
+                        rpp_simd_store(rpp_store8_f32_to_f16_avx, dstPtrTempChn, &pRow);
+                        srcPtrTempChn += srcDescPtr->strides.cStride;
+                        dstPtrTempChn += dstDescPtr->strides.cStride;
+                    }
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_nn_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
+
+RppStatus warp_perspective_bilinear_u8_u8_host_tensor(Rpp8u *srcPtr,
+                                                      RpptDescPtr srcDescPtr,
+                                                      Rpp8u *dstPtr,
+                                                      RpptDescPtr dstDescPtr,
+                                                      Rpp32f *perspectiveTensor,
+                                                      RpptROIPtr roiTensorPtrSrc,
+                                                      RpptRoiType roiType,
+                                                      RppLayoutParams srcLayoutParams,
+                                                      rpp::Handle& handle)
+{
+    RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h};
+    Rpp32u numThreads = handle.GetNumThreads();
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++)
+    {
+        RpptROI roi, roiLTRB;
+        RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount];
+        compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+        compute_ltrb_from_xywh_host(&roi, &roiLTRB);
+        Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1;
+        Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1;
+
+        Rpp32f9 *perspectiveMatrix_f9;
+        perspectiveMatrix_f9 = reinterpret_cast<Rpp32f9 *>(perspectiveTensor + batchCount * 9);
+
+        Rpp8u *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage;
+        srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride;
+        srcPtrChannel = srcPtrImage;
+        dstPtrChannel = dstPtrImage;
+
+        Rpp32s vectorIncrementPerChannel = 8;
+        Rpp32s vectorIncrementPkd = 24;
+        Rpp32u bufferLength = roi.xywhROI.roiWidth;
+        Rpp32u alignedLength = bufferLength & ~7;   // Align dst width to process 16 dst pixels per iteration
+
+#if __AVX2__
+        __m256 pBilinearCoeffs[4];
+        __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride);
+        __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7);
+        __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7);
+        __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7);
+        __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8);
+        __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8);
+        __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8);
+        __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight);
+        __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth);
+        __m256 pRoiLTRB[4];
+        pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x);
+        pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y);
+        pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x);
+        pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y);
+
+        __m256i pxSrcStridesCHW[3];
+        pxSrcStridesCHW[0] = _mm256_set1_epi32(srcDescPtr->strides.cStride);
+        pxSrcStridesCHW[1] = _mm256_set1_epi32(srcDescPtr->strides.hStride);
+        pxSrcStridesCHW[2] = _mm256_set1_epi32(srcDescPtr->strides.wStride);
+        RpptBilinearNbhoodLocsVecLen8 srcLocs;
+#endif
+
+        // Warp perspective with fused output-layout toggle (NHWC -> NCHW)
+        if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp8u *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8u *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp8u>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_u8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp Perspective with fused output-layout toggle (NCHW -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp8u *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8u *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp8u>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_u8pkd3_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp perspective without fused output-layout toggle (NHWC -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp8u *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8u *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp8u>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_u8pkd3_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective without fused output-layout toggle (NCHW -> NCHW)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp8u *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8u *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp8u>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_u8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTempR++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp Perspective without fused output-layout toggle single channel (NCHW -> NCHW)
+        else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp8u *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8u *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[4], pDst;
+                    compute_generic_bilinear_srclocs_1c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, pRoiLTRB);
+                    rpp_simd_load(rpp_generic_bilinear_load_1c_avx<Rpp8u>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_1c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store8_f32pln1_to_u8pln1_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
+
+RppStatus warp_perspective_bilinear_f32_f32_host_tensor(Rpp32f *srcPtr,
+                                                        RpptDescPtr srcDescPtr,
+                                                        Rpp32f *dstPtr,
+                                                        RpptDescPtr dstDescPtr,
+                                                        Rpp32f *perspectiveTensor,
+                                                        RpptROIPtr roiTensorPtrSrc,
+                                                        RpptRoiType roiType,
+                                                        RppLayoutParams srcLayoutParams,
+                                                        rpp::Handle& handle)
+{
+    RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h};
+    Rpp32u numThreads = handle.GetNumThreads();
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++)
+    {
+        RpptROI roi, roiLTRB;
+        RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount];
+        compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+        compute_ltrb_from_xywh_host(&roi, &roiLTRB);
+        Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1;
+        Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1;
+
+        Rpp32f9 *perspectiveMatrix_f9;
+        perspectiveMatrix_f9 = reinterpret_cast<Rpp32f9 *>(perspectiveTensor + batchCount * 9);
+
+        Rpp32f *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage;
+        srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride;
+        srcPtrChannel = srcPtrImage;
+        dstPtrChannel = dstPtrImage;
+
+        Rpp32s vectorIncrementPerChannel = 8;
+        Rpp32s vectorIncrementPkd = 24;
+        Rpp32u bufferLength = roi.xywhROI.roiWidth;
+        Rpp32u alignedLength = bufferLength & ~7;   // Align dst width to process 16 dst pixels per iteration
+
+#if __AVX2__
+        __m256 pBilinearCoeffs[4];
+        __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride);
+        __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7);
+        __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7);
+        __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7);
+        __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8);
+        __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8);
+        __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8);
+        __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight);
+        __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth);
+        __m256 pRoiLTRB[4];
+        pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x);
+        pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y);
+        pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x);
+        pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y);
+
+        __m256i pxSrcStridesCHW[3];
+        pxSrcStridesCHW[0] = _mm256_set1_epi32(srcDescPtr->strides.cStride);
+        pxSrcStridesCHW[1] = _mm256_set1_epi32(srcDescPtr->strides.hStride);
+        pxSrcStridesCHW[2] = _mm256_set1_epi32(srcDescPtr->strides.wStride);
+        RpptBilinearNbhoodLocsVecLen8 srcLocs;
+#endif
+
+        // Warp perspective with fused output-layout toggle (NHWC -> NCHW)
+        if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp32f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp32f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp32f>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_f32pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp Perspective with fused output-layout toggle (NCHW -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp32f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp32f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp32f>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_f32pkd3_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp perspective without fused output-layout toggle (NHWC -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp32f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp32f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp32f>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_f32pkd3_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective without fused output-layout toggle (NCHW -> NCHW)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp32f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp32f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp32f>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_f32pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTempR++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp Perspective without fused output-layout toggle single channel (NCHW -> NCHW)
+        else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp32f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp32f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[4], pDst;
+                    compute_generic_bilinear_srclocs_1c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, pRoiLTRB);
+                    rpp_simd_load(rpp_generic_bilinear_load_1c_avx<Rpp32f>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_1c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store8_f32pln1_to_f32pln1_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
+
+RppStatus warp_perspective_bilinear_i8_i8_host_tensor(Rpp8s *srcPtr,
+                                                      RpptDescPtr srcDescPtr,
+                                                      Rpp8s *dstPtr,
+                                                      RpptDescPtr dstDescPtr,
+                                                      Rpp32f *perspectiveTensor,
+                                                      RpptROIPtr roiTensorPtrSrc,
+                                                      RpptRoiType roiType,
+                                                      RppLayoutParams srcLayoutParams,
+                                                      rpp::Handle& handle)
+{
+    RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h};
+    Rpp32u numThreads = handle.GetNumThreads();
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++)
+    {
+        RpptROI roi, roiLTRB;
+        RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount];
+        compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+        compute_ltrb_from_xywh_host(&roi, &roiLTRB);
+        Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1;
+        Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1;
+
+        Rpp32f9 *perspectiveMatrix_f9;
+        perspectiveMatrix_f9 = reinterpret_cast<Rpp32f9 *>(perspectiveTensor + batchCount * 9);
+
+        Rpp8s *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage;
+        srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride;
+        srcPtrChannel = srcPtrImage;
+        dstPtrChannel = dstPtrImage;
+
+        Rpp32s vectorIncrementPerChannel = 8;
+        Rpp32s vectorIncrementPkd = 24;
+        Rpp32u bufferLength = roi.xywhROI.roiWidth;
+        Rpp32u alignedLength = bufferLength & ~7;   // Align dst width to process 16 dst pixels per iteration
+
+#if __AVX2__
+        __m256 pBilinearCoeffs[4];
+        __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride);
+        __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7);
+        __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7);
+        __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7);
+        __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8);
+        __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8);
+        __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8);
+        __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight);
+        __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth);
+        __m256 pRoiLTRB[4];
+        pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x);
+        pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y);
+        pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x);
+        pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y);
+
+        __m256i pxSrcStridesCHW[3];
+        pxSrcStridesCHW[0] = _mm256_set1_epi32(srcDescPtr->strides.cStride);
+        pxSrcStridesCHW[1] = _mm256_set1_epi32(srcDescPtr->strides.hStride);
+        pxSrcStridesCHW[2] = _mm256_set1_epi32(srcDescPtr->strides.wStride);
+        RpptBilinearNbhoodLocsVecLen8 srcLocs;
+#endif
+
+        // Warp perspective with fused output-layout toggle (NHWC -> NCHW)
+        if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp8s *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8s *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp8s>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    compute_offset_i8_3c_avx(pDst);
+                    rpp_simd_store(rpp_store24_f32pln3_to_i8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp Perspective with fused output-layout toggle (NCHW -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp8s *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8s *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp8s>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    compute_offset_i8_3c_avx(pDst);
+                    rpp_simd_store(rpp_store24_f32pln3_to_i8pkd3_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp perspective without fused output-layout toggle (NHWC -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp8s *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8s *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp8s>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    compute_offset_i8_3c_avx(pDst);
+                    rpp_simd_store(rpp_store24_f32pln3_to_i8pkd3_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective without fused output-layout toggle (NCHW -> NCHW)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp8s *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8s *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp8s>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    compute_offset_i8_3c_avx(pDst);
+                    rpp_simd_store(rpp_store24_f32pln3_to_i8pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTempR++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp Perspective without fused output-layout toggle single channel (NCHW -> NCHW)
+        else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp8s *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp8s *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[4], pDst;
+                    compute_generic_bilinear_srclocs_1c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, pRoiLTRB);
+                    rpp_simd_load(rpp_generic_bilinear_load_1c_avx<Rpp8s>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_1c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    compute_offset_i8_1c_avx(pDst);
+                    rpp_simd_store(rpp_store8_f32pln1_to_i8pln1_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
+
+RppStatus warp_perspective_bilinear_f16_f16_host_tensor(Rpp16f *srcPtr,
+                                                        RpptDescPtr srcDescPtr,
+                                                        Rpp16f *dstPtr,
+                                                        RpptDescPtr dstDescPtr,
+                                                        Rpp32f *perspectiveTensor,
+                                                        RpptROIPtr roiTensorPtrSrc,
+                                                        RpptRoiType roiType,
+                                                        RppLayoutParams srcLayoutParams,
+                                                        rpp::Handle& handle)
+{
+    RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h};
+    Rpp32u numThreads = handle.GetNumThreads();
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for (int batchCount = 0; batchCount < dstDescPtr->n; batchCount++)
+    {
+        RpptROI roi, roiLTRB;
+        RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount];
+        compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+        compute_ltrb_from_xywh_host(&roi, &roiLTRB);
+        Rpp32s roiHalfWidth = roi.xywhROI.roiWidth >> 1;
+        Rpp32s roiHalfHeight = roi.xywhROI.roiHeight >> 1;
+
+        Rpp32f9 *perspectiveMatrix_f9;
+        perspectiveMatrix_f9 = reinterpret_cast<Rpp32f9 *>(perspectiveTensor + batchCount * 9);
+
+        Rpp16f *srcPtrChannel, *dstPtrChannel, *srcPtrImage, *dstPtrImage;
+        srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride;
+        srcPtrChannel = srcPtrImage;
+        dstPtrChannel = dstPtrImage;
+
+        Rpp32s vectorIncrementPerChannel = 8;
+        Rpp32s vectorIncrementPkd = 24;
+        Rpp32u bufferLength = roi.xywhROI.roiWidth;
+        Rpp32u alignedLength = bufferLength & ~7;   // Align dst width to process 16 dst pixels per iteration
+
+#if __AVX2__
+        __m256 pBilinearCoeffs[4];
+        __m256 pSrcStrideH = _mm256_set1_ps(srcDescPtr->strides.hStride);
+        __m256 pPerspectiveMatrixTerm0 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[0], perspectiveMatrix_f9->data[0] * 2, perspectiveMatrix_f9->data[0] * 3, perspectiveMatrix_f9->data[0] * 4, perspectiveMatrix_f9->data[0] * 5, perspectiveMatrix_f9->data[0] * 6, perspectiveMatrix_f9->data[0] * 7);
+        __m256 pPerspectiveMatrixTerm3 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[3], perspectiveMatrix_f9->data[3] * 2, perspectiveMatrix_f9->data[3] * 3, perspectiveMatrix_f9->data[3] * 4, perspectiveMatrix_f9->data[3] * 5, perspectiveMatrix_f9->data[3] * 6, perspectiveMatrix_f9->data[3] * 7);
+        __m256 pPerspectiveMatrixTerm6 = _mm256_setr_ps(0, perspectiveMatrix_f9->data[6], perspectiveMatrix_f9->data[6] * 2, perspectiveMatrix_f9->data[6] * 3, perspectiveMatrix_f9->data[6] * 4, perspectiveMatrix_f9->data[6] * 5, perspectiveMatrix_f9->data[6] * 6, perspectiveMatrix_f9->data[6] * 7);
+        __m256 pPerspectiveMatrixTerm0Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[0] * 8);
+        __m256 pPerspectiveMatrixTerm3Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[3] * 8);
+        __m256 pPerspectiveMatrixTerm6Incr = _mm256_set1_ps(perspectiveMatrix_f9->data[6] * 8);
+        __m256 pRoiHalfHeight = _mm256_set1_ps(roiHalfHeight);
+        __m256 pRoiHalfWidth = _mm256_set1_ps(roiHalfWidth);
+        __m256 pRoiLTRB[4];
+        pRoiLTRB[0] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.x);
+        pRoiLTRB[1] = _mm256_set1_ps(roiLTRB.ltrbROI.lt.y);
+        pRoiLTRB[2] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.x);
+        pRoiLTRB[3] = _mm256_set1_ps(roiLTRB.ltrbROI.rb.y);
+
+        __m256i pxSrcStridesCHW[3];
+        pxSrcStridesCHW[0] = _mm256_set1_epi32(srcDescPtr->strides.cStride);
+        pxSrcStridesCHW[1] = _mm256_set1_epi32(srcDescPtr->strides.hStride);
+        pxSrcStridesCHW[2] = _mm256_set1_epi32(srcDescPtr->strides.wStride);
+        RpptBilinearNbhoodLocsVecLen8 srcLocs;
+#endif
+
+        // Warp perspective with fused output-layout toggle (NHWC -> NCHW)
+        if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp16f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp16f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp16f>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_f16pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pkd3_to_pln3(srcY, srcX, &roiLTRB, dstPtrTempR++, dstPtrTempG++, dstPtrTempB++, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp Perspective with fused output-layout toggle (NCHW -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp16f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp16f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp16f>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_f16pkd3_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+        // Warp perspective without fused output-layout toggle (NHWC -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            Rpp16f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp16f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, true);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp16f>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_f16pkd3_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPkd;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln3pkd3_to_pkd3(srcY, srcX, &roiLTRB, dstPtrTemp, srcPtrChannel, srcDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                    dstPtrTemp += 3;
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp perspective without fused output-layout toggle (NCHW -> NCHW)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp16f *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp16f *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[12], pDst[3];
+                    compute_generic_bilinear_srclocs_3c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, srcDescPtr->c, pRoiLTRB, false);
+                    rpp_simd_load(rpp_generic_bilinear_load_3c_avx<Rpp16f>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_3c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store24_f32pln3_to_f16pln3_avx, dstPtrTempR, dstPtrTempG, dstPtrTempB, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTempR += vectorIncrementPerChannel;
+                    dstPtrTempG += vectorIncrementPerChannel;
+                    dstPtrTempB += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTempR++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Warp Perspective without fused output-layout toggle single channel (NCHW -> NCHW)
+        else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            Rpp16f *dstPtrRow;
+            dstPtrRow = dstPtrChannel;
+            for (int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                Rpp16f *dstPtrTemp;
+                dstPtrTemp = dstPtrRow;
+
+                int vectorLoopCount = 0;
+                Rpp32f locX, locY, locW, srcX, srcY;
+                compute_warp_perspective_src_loc_params(i, vectorLoopCount, locW, locY, locX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+#if __AVX2__
+                __m256 plocX, plocY, plocW, pSrcX, pSrcY;
+                compute_warp_perspective_src_loc_first_term_avx(locX, locY, locW, plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6, pPerspectiveMatrixTerm3, pPerspectiveMatrixTerm0, pRoiHalfHeight, pRoiHalfWidth);
+                for (; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrementPerChannel)
+                {
+                    __m256 pSrc[4], pDst;
+                    compute_generic_bilinear_srclocs_1c_avx(pSrcY, pSrcX, srcLocs, pBilinearCoeffs, pSrcStrideH, pxSrcStridesCHW, pRoiLTRB);
+                    rpp_simd_load(rpp_generic_bilinear_load_1c_avx<Rpp16f>, srcPtrChannel, srcDescPtr, srcLocs, pSrcY, pSrcX, pRoiLTRB, pSrc);  // Load input pixels required for bilinear interpolation
+                    compute_bilinear_interpolation_1c_avx(pSrc, pBilinearCoeffs, pDst); // Compute Bilinear interpolation
+                    rpp_simd_store(rpp_store8_f32pln1_to_f16pln1_avx, dstPtrTemp, pDst); // Store dst pixels
+                    compute_warp_perspective_src_loc_next_term_avx(plocW, plocY, plocX, pSrcY, pSrcX, pPerspectiveMatrixTerm6Incr, pPerspectiveMatrixTerm3Incr, pPerspectiveMatrixTerm0Incr, pRoiHalfHeight, pRoiHalfWidth);
+                    dstPtrTemp += vectorIncrementPerChannel;
+                }
+#endif
+                locW += (perspectiveMatrix_f9->data[6] * vectorLoopCount);
+                locY += (perspectiveMatrix_f9->data[3] * vectorLoopCount);
+                locX += (perspectiveMatrix_f9->data[0] * vectorLoopCount);
+                srcX = ((locX / locW) + roiHalfWidth);
+                srcY = ((locY / locW) + roiHalfHeight);
+                for (; vectorLoopCount < bufferLength; vectorLoopCount++)
+                {
+                    compute_generic_bilinear_interpolation_pln_to_pln(srcY, srcX, &roiLTRB, dstPtrTemp++, srcPtrChannel, srcDescPtr, dstDescPtr);
+                    compute_warp_perspective_src_loc_next_term(vectorLoopCount, locW, locY, locX, srcY, srcX, perspectiveMatrix_f9, roiHalfHeight, roiHalfWidth);
+                }
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
\ No newline at end of file
diff --git a/src/modules/hip/hip_tensor_geometric_augmentations.hpp b/src/modules/hip/hip_tensor_geometric_augmentations.hpp
index 102e7d686..80a7ac356 100644
--- a/src/modules/hip/hip_tensor_geometric_augmentations.hpp
+++ b/src/modules/hip/hip_tensor_geometric_augmentations.hpp
@@ -39,5 +39,6 @@ SOFTWARE.
 #include "kernel/transpose.hpp"
 #include "kernel/crop_and_patch.hpp"
 #include "kernel/flip_voxel.hpp"
+#include "kernel/warp_perspective.hpp"
 
 #endif // HIP_TENSOR_GEOMETRIC_AUGMENTATIONS_HPP
diff --git a/src/modules/hip/kernel/warp_perspective.hpp b/src/modules/hip/kernel/warp_perspective.hpp
new file mode 100644
index 000000000..4e6aa4532
--- /dev/null
+++ b/src/modules/hip/kernel/warp_perspective.hpp
@@ -0,0 +1,461 @@
+#include <hip/hip_runtime.h>
+#include "rpp_hip_common.hpp"
+
+// -------------------- Set 0 - warp_perspective device helpers --------------------
+
+__device__ void warp_perspective_srclocs_hip_compute(float perspectiveMatrixElement, float4 locHomComponent_f4, float4 roiComponent_f4, d_float8 *locHomW_f8, d_float8 *locSrcPtr_f8)
+{
+    d_float8 increment_f8;
+    increment_f8.f4[0] = make_float4(0, perspectiveMatrixElement, perspectiveMatrixElement + perspectiveMatrixElement, perspectiveMatrixElement + perspectiveMatrixElement + perspectiveMatrixElement);
+    increment_f8.f4[1] = static_cast<float4>(perspectiveMatrixElement + increment_f8.f4[0].w) + increment_f8.f4[0];
+    locSrcPtr_f8->f4[0] = ((locHomComponent_f4 + increment_f8.f4[0])/locHomW_f8->f4[0]) + roiComponent_f4; //Compute src x/src y locations based on homogeneous coords hom x/hom y and common scale hom w for dst x and dst y locations [0-3]
+    locSrcPtr_f8->f4[1] = ((locHomComponent_f4 + increment_f8.f4[1])/locHomW_f8->f4[1]) + roiComponent_f4; //Compute src x/src y locations based on homogeneous coords hom x/hom y and common scale hom w for dst x and dst y locations [4-7]
+}
+
+__device__ void warp_perspective_roi_and_srclocs_hip_compute(int4 *srcRoiPtr_i4, int id_x, int id_y, d_float9 *perspectiveMatrix_f9, d_float16 *locSrc_f16)
+{
+    float2 locDst_f2;
+    float3 locHom_f3;
+    float4 locHomW_f4;
+    d_float8 locHomW_f8, incrementW_f8;
+    float roiHalfWidth = (srcRoiPtr_i4->z - srcRoiPtr_i4->x + 1) >> 1;
+    float roiHalfHeight = (srcRoiPtr_i4->w - srcRoiPtr_i4->y + 1) >> 1;
+    locDst_f2.x = static_cast<float>(id_x - roiHalfWidth);
+    locDst_f2.y = static_cast<float>(id_y - roiHalfHeight);
+    locHom_f3.x = fmaf(locDst_f2.x, perspectiveMatrix_f9->f1[0], fmaf(locDst_f2.y, perspectiveMatrix_f9->f1[1], perspectiveMatrix_f9->f1[2]));
+    locHom_f3.y = fmaf(locDst_f2.x, perspectiveMatrix_f9->f1[3], fmaf(locDst_f2.y, perspectiveMatrix_f9->f1[4], perspectiveMatrix_f9->f1[5]));
+    locHom_f3.z = fmaf(locDst_f2.x, perspectiveMatrix_f9->f1[6], fmaf(locDst_f2.y, perspectiveMatrix_f9->f1[7], perspectiveMatrix_f9->f1[8]));    // Compute first homogenous coords based on which final destination coords are computed
+    locHomW_f4 = static_cast<float4>(locHom_f3.z);
+    incrementW_f8.f4[0] = make_float4(0, perspectiveMatrix_f9->f1[6], perspectiveMatrix_f9->f1[6] + perspectiveMatrix_f9->f1[6], perspectiveMatrix_f9->f1[6] + perspectiveMatrix_f9->f1[6] + perspectiveMatrix_f9->f1[6]);
+    incrementW_f8.f4[1] = static_cast<float4>(perspectiveMatrix_f9->f1[6] + incrementW_f8.f4[0].w) + incrementW_f8.f4[0];
+    locHomW_f8.f4[0] = locHomW_f4 + incrementW_f8.f4[0];
+    locHomW_f8.f4[1] = locHomW_f4 + incrementW_f8.f4[1];    // Compute multiple homogenous coords terms using first term and perspective matrix based on which final destination coords are computed
+    warp_perspective_srclocs_hip_compute(perspectiveMatrix_f9->f1[0], static_cast<float4>(locHom_f3.x), static_cast<float4>(roiHalfWidth), &locHomW_f8, &(locSrc_f16->f8[0]));    // Compute 8 locSrcX
+    warp_perspective_srclocs_hip_compute(perspectiveMatrix_f9->f1[3], static_cast<float4>(locHom_f3.y), static_cast<float4>(roiHalfHeight), &locHomW_f8, &(locSrc_f16->f8[1]));    // Compute 8 locSrcY
+}
+
+// -------------------- Set 1 - Bilinear Interpolation --------------------
+
+template <typename T>
+__global__ void warp_perspective_bilinear_pkd_hip_tensor(T *srcPtr,
+                                                         uint2 srcStridesNH,
+                                                         T *dstPtr,
+                                                         uint2 dstStridesNH,
+                                                         d_float9 *perspectiveTensor,
+                                                         RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+    {
+        return;
+    }
+
+    uint srcIdx = (id_z * srcStridesNH.x);
+    uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3;
+
+    d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z];
+    int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z];
+    d_float16 locSrc_f16;
+    warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16);
+
+    d_float24 dst_f24;
+    rpp_hip_interpolate24_bilinear_pkd3(srcPtr + srcIdx, srcStridesNH.y, &locSrc_f16, &srcRoi_i4, &dst_f24);
+    rpp_hip_pack_float24_pkd3_and_store24_pkd3(dstPtr + dstIdx, &dst_f24);
+}
+
+template <typename T>
+__global__ void warp_perspective_bilinear_pln_hip_tensor(T *srcPtr,
+                                                         uint3 srcStridesNCH,
+                                                         T *dstPtr,
+                                                         uint3 dstStridesNCH,
+                                                         int channelsDst,
+                                                         d_float9 *perspectiveTensor,
+                                                         RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+    {
+        return;
+    }
+
+    uint srcIdx = (id_z * srcStridesNCH.x);
+    uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x;
+
+    d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z];
+    int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z];
+    d_float16 locSrc_f16;
+    warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16);
+
+    d_float8 dst_f8;
+    rpp_hip_interpolate8_bilinear_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8);
+    rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8);
+    if (channelsDst == 3)
+    {
+        srcIdx += srcStridesNCH.y;
+        dstIdx += dstStridesNCH.y;
+
+        rpp_hip_interpolate8_bilinear_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8);
+        rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8);
+
+        srcIdx += srcStridesNCH.y;
+        dstIdx += dstStridesNCH.y;
+
+        rpp_hip_interpolate8_bilinear_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8);
+        rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8);
+    }
+}
+
+template <typename T>
+__global__ void warp_perspective_bilinear_pkd3_pln3_hip_tensor(T *srcPtr,
+                                                               uint2 srcStridesNH,
+                                                               T *dstPtr,
+                                                               uint3 dstStridesNCH,
+                                                               d_float9 *perspectiveTensor,
+                                                               RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+    {
+        return;
+    }
+
+    uint srcIdx = (id_z * srcStridesNH.x);
+    uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x;
+
+    d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z];
+    int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z];
+    d_float16 locSrc_f16;
+    warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16);
+
+    d_float24 dst_f24;
+    rpp_hip_interpolate24_bilinear_pkd3(srcPtr + srcIdx, srcStridesNH.y, &locSrc_f16, &srcRoi_i4, &dst_f24);
+    rpp_hip_pack_float24_pkd3_and_store24_pln3(dstPtr + dstIdx, dstStridesNCH.y, &dst_f24);
+}
+
+template <typename T>
+__global__ void warp_perspective_bilinear_pln3_pkd3_hip_tensor(T *srcPtr,
+                                                               uint3 srcStridesNCH,
+                                                               T *dstPtr,
+                                                               uint2 dstStridesNH,
+                                                               d_float9 *perspectiveTensor,
+                                                               RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+    {
+        return;
+    }
+
+    uint srcIdx = (id_z * srcStridesNCH.x);
+    uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3;
+
+    d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z];
+    int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z];
+    d_float16 locSrc_f16;
+    warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16);
+
+    d_float24 dst_f24;
+    rpp_hip_interpolate24_bilinear_pln3(srcPtr + srcIdx, &srcStridesNCH, &locSrc_f16, &srcRoi_i4, &dst_f24);
+    rpp_hip_pack_float24_pln3_and_store24_pkd3(dstPtr + dstIdx, &dst_f24);
+}
+
+// -------------------- Set 2 - Nearest Neighbor Interpolation --------------------
+
+template <typename T>
+__global__ void warp_perspective_nearest_neighbor_pkd_hip_tensor(T *srcPtr,
+                                                                 uint2 srcStridesNH,
+                                                                 T *dstPtr,
+                                                                 uint2 dstStridesNH,
+                                                                 d_float9 *perspectiveTensor,
+                                                                 RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+    {
+        return;
+    }
+
+    uint srcIdx = (id_z * srcStridesNH.x);
+    uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3;
+
+    d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z];
+    int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z];
+    d_float16 locSrc_f16;
+    warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16);
+
+    d_float24 dst_f24;
+    rpp_hip_interpolate24_nearest_neighbor_pkd3(srcPtr + srcIdx, srcStridesNH.y, &locSrc_f16, &srcRoi_i4, &dst_f24);
+    rpp_hip_pack_float24_pkd3_and_store24_pkd3(dstPtr + dstIdx, &dst_f24);
+}
+
+template <typename T>
+__global__ void warp_perspective_nearest_neighbor_pln_hip_tensor(T *srcPtr,
+                                                                 uint3 srcStridesNCH,
+                                                                 T *dstPtr,
+                                                                 uint3 dstStridesNCH,
+                                                                 int channelsDst,
+                                                                 d_float9 *perspectiveTensor,
+                                                                 RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+    {
+        return;
+    }
+
+    uint srcIdx = (id_z * srcStridesNCH.x);
+    uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x;
+
+    d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z];
+    int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z];
+    d_float16 locSrc_f16;
+    warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16);
+
+    d_float8 dst_f8;
+    rpp_hip_interpolate8_nearest_neighbor_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8);
+    rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8);
+    if (channelsDst == 3)
+    {
+        srcIdx += srcStridesNCH.y;
+        dstIdx += dstStridesNCH.y;
+
+        rpp_hip_interpolate8_nearest_neighbor_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8);
+        rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8);
+
+        srcIdx += srcStridesNCH.y;
+        dstIdx += dstStridesNCH.y;
+
+        rpp_hip_interpolate8_nearest_neighbor_pln1(srcPtr + srcIdx, srcStridesNCH.z, &locSrc_f16, &srcRoi_i4, &dst_f8);
+        rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &dst_f8);
+    }
+}
+
+template <typename T>
+__global__ void warp_perspective_nearest_neighbor_pkd3_pln3_hip_tensor(T *srcPtr,
+                                                                       uint2 srcStridesNH,
+                                                                       T *dstPtr,
+                                                                       uint3 dstStridesNCH,
+                                                                       d_float9 *perspectiveTensor,
+                                                                       RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+    {
+        return;
+    }
+
+    uint srcIdx = (id_z * srcStridesNH.x);
+    uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x;
+
+    d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z];
+    int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z];
+    d_float16 locSrc_f16;
+    warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16);
+
+    d_float24 dst_f24;
+    rpp_hip_interpolate24_nearest_neighbor_pkd3(srcPtr + srcIdx, srcStridesNH.y, &locSrc_f16, &srcRoi_i4, &dst_f24);
+    rpp_hip_pack_float24_pkd3_and_store24_pln3(dstPtr + dstIdx, dstStridesNCH.y, &dst_f24);
+}
+
+template <typename T>
+__global__ void warp_perspective_nearest_neighbor_pln3_pkd3_hip_tensor(T *srcPtr,
+                                                                       uint3 srcStridesNCH,
+                                                                       T *dstPtr,
+                                                                       uint2 dstStridesNH,
+                                                                       d_float9 *perspectiveTensor,
+                                                                       RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+    {
+        return;
+    }
+
+    uint srcIdx = (id_z * srcStridesNCH.x);
+    uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3;
+
+    d_float9 perspectiveMatrix_f9 = perspectiveTensor[id_z];
+    int4 srcRoi_i4 = *(int4 *)&roiTensorPtrSrc[id_z];
+    d_float16 locSrc_f16;
+    warp_perspective_roi_and_srclocs_hip_compute(&srcRoi_i4, id_x, id_y, &perspectiveMatrix_f9, &locSrc_f16);
+
+    d_float24 dst_f24;
+    rpp_hip_interpolate24_nearest_neighbor_pln3(srcPtr + srcIdx, &srcStridesNCH, &locSrc_f16, &srcRoi_i4, &dst_f24);
+    rpp_hip_pack_float24_pln3_and_store24_pkd3(dstPtr + dstIdx, &dst_f24);
+}
+
+// -------------------- Set 3 - Kernel Executors --------------------
+
+template <typename T>
+RppStatus hip_exec_warp_perspective_tensor(T *srcPtr,
+                                           RpptDescPtr srcDescPtr,
+                                           T *dstPtr,
+                                           RpptDescPtr dstDescPtr,
+                                           Rpp32f *perspectiveTensor,
+                                           RpptInterpolationType interpolationType,
+                                           RpptROIPtr roiTensorPtrSrc,
+                                           RpptRoiType roiType,
+                                           rpp::Handle& handle)
+{
+    if (roiType == RpptRoiType::XYWH)
+        hip_exec_roi_converison_xywh_to_ltrb(roiTensorPtrSrc, handle);
+
+    int globalThreads_x = (dstDescPtr->w + 7) >> 3;
+    int globalThreads_y = dstDescPtr->h;
+    int globalThreads_z = dstDescPtr->n;
+
+    if (interpolationType == RpptInterpolationType::BILINEAR)
+    {
+        if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            hipLaunchKernelGGL(warp_perspective_bilinear_pkd_hip_tensor,
+                               dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               srcPtr,
+                               make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride),
+                               dstPtr,
+                               make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+                               reinterpret_cast<d_float9 *>(perspectiveTensor),
+                               roiTensorPtrSrc);
+        }
+        else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            hipLaunchKernelGGL(warp_perspective_bilinear_pln_hip_tensor,
+                               dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               srcPtr,
+                               make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride),
+                               dstPtr,
+                               make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+                               dstDescPtr->c,
+                               reinterpret_cast<d_float9 *>(perspectiveTensor),
+                               roiTensorPtrSrc);
+        }
+        else if ((srcDescPtr->c == 3) && (dstDescPtr->c == 3))
+        {
+            if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+            {
+                hipLaunchKernelGGL(warp_perspective_bilinear_pkd3_pln3_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                                   0,
+                                   handle.GetStream(),
+                                   srcPtr,
+                                   make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride),
+                                   dstPtr,
+                                   make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+                                   reinterpret_cast<d_float9 *>(perspectiveTensor),
+                                   roiTensorPtrSrc);
+            }
+            else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+            {
+                globalThreads_x = (srcDescPtr->strides.hStride + 7) >> 3;
+                hipLaunchKernelGGL(warp_perspective_bilinear_pln3_pkd3_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                                   0,
+                                   handle.GetStream(),
+                                   srcPtr,
+                                   make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride),
+                                   dstPtr,
+                                   make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+                                   reinterpret_cast<d_float9 *>(perspectiveTensor),
+                                   roiTensorPtrSrc);
+            }
+        }
+    }
+    else if (interpolationType == RpptInterpolationType::NEAREST_NEIGHBOR)
+    {
+        if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            hipLaunchKernelGGL(warp_perspective_nearest_neighbor_pkd_hip_tensor,
+                               dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               srcPtr,
+                               make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride),
+                               dstPtr,
+                               make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+                               reinterpret_cast<d_float9 *>(perspectiveTensor),
+                               roiTensorPtrSrc);
+        }
+        else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            hipLaunchKernelGGL(warp_perspective_nearest_neighbor_pln_hip_tensor,
+                               dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               srcPtr,
+                               make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride),
+                               dstPtr,
+                               make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+                               dstDescPtr->c,
+                               reinterpret_cast<d_float9 *>(perspectiveTensor),
+                               roiTensorPtrSrc);
+        }
+        else if ((srcDescPtr->c == 3) && (dstDescPtr->c == 3))
+        {
+            if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+            {
+                hipLaunchKernelGGL(warp_perspective_nearest_neighbor_pkd3_pln3_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                                   0,
+                                   handle.GetStream(),
+                                   srcPtr,
+                                   make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride),
+                                   dstPtr,
+                                   make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+                                   reinterpret_cast<d_float9 *>(perspectiveTensor),
+                                   roiTensorPtrSrc);
+            }
+            else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+            {
+                globalThreads_x = (srcDescPtr->strides.hStride + 7) >> 3;
+                hipLaunchKernelGGL(warp_perspective_nearest_neighbor_pln3_pkd3_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                                   0,
+                                   handle.GetStream(),
+                                   srcPtr,
+                                   make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride),
+                                   dstPtr,
+                                   make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+                                   reinterpret_cast<d_float9 *>(perspectiveTensor),
+                                   roiTensorPtrSrc);
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
diff --git a/src/modules/rppt_tensor_geometric_augmentations.cpp b/src/modules/rppt_tensor_geometric_augmentations.cpp
index 4b54ed317..c2540473a 100644
--- a/src/modules/rppt_tensor_geometric_augmentations.cpp
+++ b/src/modules/rppt_tensor_geometric_augmentations.cpp
@@ -1438,6 +1438,126 @@ RppStatus rppt_transpose_host(RppPtr_t srcPtr,
     return RPP_SUCCESS;
 }
 
+RppStatus rppt_warp_perspective_host(RppPtr_t srcPtr,
+                                     RpptDescPtr srcDescPtr,
+                                     RppPtr_t dstPtr,
+                                     RpptDescPtr dstDescPtr,
+                                     Rpp32f *perspectiveTensor,
+                                     RpptInterpolationType interpolationType,
+                                     RpptROIPtr roiTensorPtrSrc,
+                                     RpptRoiType roiType,
+                                     rppHandle_t rppHandle)
+{
+    if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR))
+        return RPP_ERROR_NOT_IMPLEMENTED;
+
+    RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c);
+
+    if(interpolationType == RpptInterpolationType::NEAREST_NEIGHBOR)
+    {
+        if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
+        {
+            warp_perspective_nn_u8_u8_host_tensor(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes,
+                                                  srcDescPtr,
+                                                  static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes,
+                                                  dstDescPtr,
+                                                  perspectiveTensor,
+                                                  roiTensorPtrSrc,
+                                                  roiType,
+                                                  layoutParams,
+                                                  rpp::deref(rppHandle));
+        }
+        else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
+        {
+            warp_perspective_nn_f32_f32_host_tensor(reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
+                                                    srcDescPtr,
+                                                    reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
+                                                    dstDescPtr,
+                                                    perspectiveTensor,
+                                                    roiTensorPtrSrc,
+                                                    roiType,
+                                                    layoutParams,
+                                                    rpp::deref(rppHandle));
+        }
+        else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8))
+        {
+            warp_perspective_nn_i8_i8_host_tensor(static_cast<Rpp8s*>(srcPtr) + srcDescPtr->offsetInBytes,
+                                                  srcDescPtr,
+                                                  static_cast<Rpp8s*>(dstPtr) + dstDescPtr->offsetInBytes,
+                                                  dstDescPtr,
+                                                  perspectiveTensor,
+                                                  roiTensorPtrSrc,
+                                                  roiType,
+                                                  layoutParams,
+                                                  rpp::deref(rppHandle));
+        }
+        else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16))
+        {
+            warp_perspective_nn_f16_f16_host_tensor(reinterpret_cast<Rpp16f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
+                                                    srcDescPtr,
+                                                    reinterpret_cast<Rpp16f*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
+                                                    dstDescPtr,
+                                                    perspectiveTensor,
+                                                    roiTensorPtrSrc,
+                                                    roiType,
+                                                    layoutParams,
+                                                    rpp::deref(rppHandle));
+        }
+    }
+    else if(interpolationType == RpptInterpolationType::BILINEAR)
+    {
+        if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
+        {
+            warp_perspective_bilinear_u8_u8_host_tensor(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes,
+                                                        srcDescPtr,
+                                                        static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes,
+                                                        dstDescPtr,
+                                                        perspectiveTensor,
+                                                        roiTensorPtrSrc,
+                                                        roiType,
+                                                        layoutParams,
+                                                        rpp::deref(rppHandle));
+        }
+        else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
+        {
+            warp_perspective_bilinear_f32_f32_host_tensor(reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
+                                                          srcDescPtr,
+                                                          reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
+                                                          dstDescPtr,
+                                                          perspectiveTensor,
+                                                          roiTensorPtrSrc,
+                                                          roiType,
+                                                          layoutParams,
+                                                          rpp::deref(rppHandle));
+        }
+        else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8))
+        {
+            warp_perspective_bilinear_i8_i8_host_tensor(static_cast<Rpp8s*>(srcPtr) + srcDescPtr->offsetInBytes,
+                                                        srcDescPtr,
+                                                        static_cast<Rpp8s*>(dstPtr) + dstDescPtr->offsetInBytes,
+                                                        dstDescPtr,
+                                                        perspectiveTensor,
+                                                        roiTensorPtrSrc,
+                                                        roiType,
+                                                        layoutParams,
+                                                        rpp::deref(rppHandle));
+        }
+        else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16))
+        {
+            warp_perspective_bilinear_f16_f16_host_tensor(reinterpret_cast<Rpp16f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
+                                                          srcDescPtr,
+                                                          reinterpret_cast<Rpp16f*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
+                                                          dstDescPtr,
+                                                          perspectiveTensor,
+                                                          roiTensorPtrSrc,
+                                                          roiType,
+                                                          layoutParams,
+                                                          rpp::deref(rppHandle));
+        }
+    }
+    return RPP_SUCCESS;
+}
+
 /********************************************************************************************************************/
 /*********************************************** RPP_GPU_SUPPORT = ON ***********************************************/
 /********************************************************************************************************************/
@@ -1667,6 +1787,75 @@ RppStatus rppt_warp_affine_gpu(RppPtr_t srcPtr,
 #endif // backend
 }
 
+RppStatus rppt_warp_perspective_gpu(RppPtr_t srcPtr,
+                                    RpptDescPtr srcDescPtr,
+                                    RppPtr_t dstPtr,
+                                    RpptDescPtr dstDescPtr,
+                                    Rpp32f *perspectiveTensor,
+                                    RpptInterpolationType interpolationType,
+                                    RpptROIPtr roiTensorPtrSrc,
+                                    RpptRoiType roiType,
+                                    rppHandle_t rppHandle)
+{
+#ifdef HIP_COMPILE
+    if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR))
+        return RPP_ERROR_NOT_IMPLEMENTED;
+
+    if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
+    {
+        hip_exec_warp_perspective_tensor(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes,
+                                         srcDescPtr,
+                                         static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes,
+                                         dstDescPtr,
+                                         perspectiveTensor,
+                                         interpolationType,
+                                         roiTensorPtrSrc,
+                                         roiType,
+                                         rpp::deref(rppHandle));
+    }
+    else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16))
+    {
+        hip_exec_warp_perspective_tensor(reinterpret_cast<half*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
+                                         srcDescPtr,
+                                         reinterpret_cast<half*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
+                                         dstDescPtr,
+                                         perspectiveTensor,
+                                         interpolationType,
+                                         roiTensorPtrSrc,
+                                         roiType,
+                                         rpp::deref(rppHandle));
+    }
+    else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
+    {
+        hip_exec_warp_perspective_tensor(reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
+                                         srcDescPtr,
+                                         reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
+                                         dstDescPtr,
+                                         perspectiveTensor,
+                                         interpolationType,
+                                         roiTensorPtrSrc,
+                                         roiType,
+                                         rpp::deref(rppHandle));
+    }
+    else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8))
+    {
+        hip_exec_warp_perspective_tensor(static_cast<Rpp8s*>(srcPtr) + srcDescPtr->offsetInBytes,
+                                         srcDescPtr,
+                                         static_cast<Rpp8s*>(dstPtr) + dstDescPtr->offsetInBytes,
+                                         dstDescPtr,
+                                         perspectiveTensor,
+                                         interpolationType,
+                                         roiTensorPtrSrc,
+                                         roiType,
+                                         rpp::deref(rppHandle));
+    }
+
+    return RPP_SUCCESS;
+#elif defined(OCL_COMPILE)
+    return RPP_ERROR_NOT_IMPLEMENTED;
+#endif // backend
+}
+
 /******************** flip ********************/
 
 RppStatus rppt_flip_gpu(RppPtr_t srcPtr,
diff --git a/utilities/examples/brightness/CMakeLists.txt b/utilities/examples/brightness/CMakeLists.txt
index 4c23da688..20367aba4 100644
--- a/utilities/examples/brightness/CMakeLists.txt
+++ b/utilities/examples/brightness/CMakeLists.txt
@@ -42,7 +42,7 @@ if(NOT WIN32)
     set(White       "${Esc}[37m")
 endif()
 
-find_package(hip QUIET)
+find_package(HIP QUIET)
 find_package(OpenCV QUIET)
 find_package(TurboJpeg QUIET)
 
@@ -59,7 +59,7 @@ else()
     message("-- ${Yellow}Error: TurboJpeg must be installed to install ${PROJECT_NAME} successfully!${ColourReset}")
 endif()
 
-if (hip_FOUND AND OpenCV_FOUND)
+if (HIP_FOUND AND OpenCV_FOUND)
     message("-- ${Green}${PROJECT_NAME} set to build with rpp, hip and OpenCV${ColourReset}")
     set(COMPILER_FOR_HIP ${ROCM_PATH}/bin/amdclang++)
     set(CMAKE_CXX_COMPILER ${COMPILER_FOR_HIP})
diff --git a/utilities/rpp-performancetests/HIP_NEW/CMakeLists.txt b/utilities/rpp-performancetests/HIP_NEW/CMakeLists.txt
index cdf861914..57c81ebd9 100644
--- a/utilities/rpp-performancetests/HIP_NEW/CMakeLists.txt
+++ b/utilities/rpp-performancetests/HIP_NEW/CMakeLists.txt
@@ -42,7 +42,7 @@ if(NOT WIN32)
     set(White       "${Esc}[37m")
 endif()
 
-find_package(hip QUIET)
+find_package(HIP QUIET)
 find_package(OpenCV QUIET)
 
 # OpenMP
@@ -50,7 +50,7 @@ find_package(OpenMP REQUIRED)
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
 
-if (hip_FOUND AND OpenCV_FOUND)
+if(HIP_FOUND AND OpenCV_FOUND)
     message("-- ${Green}${PROJECT_NAME} set to build with rpp, hip and OpenCV${ColourReset}")
     set(COMPILER_FOR_HIP ${ROCM_PATH}/bin/amdclang++)
     set(CMAKE_CXX_COMPILER ${COMPILER_FOR_HIP})
diff --git a/utilities/rpp-unittests/HIP_NEW/CMakeLists.txt b/utilities/rpp-unittests/HIP_NEW/CMakeLists.txt
index 18255dc2f..ecb581283 100644
--- a/utilities/rpp-unittests/HIP_NEW/CMakeLists.txt
+++ b/utilities/rpp-unittests/HIP_NEW/CMakeLists.txt
@@ -44,7 +44,7 @@ if(NOT WIN32)
     set(White       "${Esc}[37m")
 endif()
 
-find_package(hip QUIET)
+find_package(HIP QUIET)
 find_package(OpenCV QUIET)
 
 # OpenMP
@@ -52,7 +52,7 @@ find_package(OpenMP REQUIRED)
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
 
-if (hip_FOUND AND OpenCV_FOUND)
+if(HIP_FOUND AND OpenCV_FOUND)
     message("-- ${Green}${PROJECT_NAME} set to build with rpp, hip and OpenCV${ColourReset}")
     set(COMPILER_FOR_HIP ${ROCM_PATH}/bin/amdclang++)
     set(CMAKE_CXX_COMPILER ${COMPILER_FOR_HIP})
diff --git a/utilities/test_suite/CMakeLists.txt b/utilities/test_suite/CMakeLists.txt
index 708cd7c3a..9a9fc90d1 100644
--- a/utilities/test_suite/CMakeLists.txt
+++ b/utilities/test_suite/CMakeLists.txt
@@ -23,20 +23,27 @@ SOFTWARE.
 ]]
 
 cmake_minimum_required(VERSION 3.10)
-project(rpp-test)
-
-# make test with CTest
-enable_testing()
-include(CTest)
 
 # ROCM Path
 if(DEFINED ENV{ROCM_PATH})
     set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path")
 elseif(ROCM_PATH)
-    message("-- ${PROJECT_NAME} INFO:ROCM_PATH Set -- ${ROCM_PATH}")
+    message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}")
 else()
     set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
 endif()
+# Set AMD Clang as default compiler
+if(NOT DEFINED CMAKE_CXX_COMPILER AND EXISTS "${ROCM_PATH}/bin/amdclang++")
+    set(CMAKE_C_COMPILER ${ROCM_PATH}/bin/amdclang)
+    set(CMAKE_CXX_COMPILER ${ROCM_PATH}/bin/amdclang++)
+elseif(NOT DEFINED CMAKE_CXX_COMPILER AND NOT EXISTS "${ROCM_PATH}/bin/amdclang++")
+    set(CMAKE_CXX_COMPILER clang++)
+endif()
+
+project(rpp-test)
+# make test with CTest
+enable_testing()
+include(CTest)
 
 # Set message options
 if(NOT WIN32)
@@ -66,6 +73,8 @@ List of high level dependency checks - for RPP QA tests in utilities/test_suite/
 - OpenCV (For IMAGE and VOXEL tests - to decode image inputs and write images for unit test outputs)
 - TurboJPEG (For IMAGE tests - to decode image inputs)
 ]]
+# add find modules
+list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/share/rpp/test/cmake)
 
 # find RPP
 find_library(RPP_LIBRARY NAMES rpp HINTS ${ROCM_PATH}/lib)
@@ -91,6 +100,7 @@ else()
     elseif(NOT DEFINED BACKEND)
         set(BACKEND "CPU")
     endif()
+    message("-- ${White}${PROJECT_NAME}: Using RPP Backend: ${BACKEND}${ColourReset}")
 
     # RPP installation - Audio support check
     set(RPP_AUDIO_AUGMENTATIONS_SUPPORT_FOUND 0)
@@ -123,7 +133,7 @@ else()
     if(Python3_FOUND)
         if (PANDAS_FOUND EQUAL 0)
             if(OpenMP_FOUND)
-
+                message("-- ${White}${PROJECT_NAME}: Adding RPP HOST tests${ColourReset}")
                 # HOST test set 1 - rpp_qa_tests_tensor_host_all - HOST Tensor-Image QA PASS/FAIL tests
                 if(TurboJpeg_FOUND)
                     if(OpenCV_FOUND)
@@ -177,9 +187,17 @@ else()
                 )
 
                 if( "${BACKEND}" STREQUAL "HIP")
-                    find_package(hip QUIET)
-                    if(hip_FOUND)
-
+                    if(NOT DEFINED HIP_PATH)
+                        if(NOT DEFINED ENV{HIP_PATH})
+                            set(HIP_PATH ${ROCM_PATH} CACHE PATH "Path to which HIP has been installed")
+                        else()
+                            set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
+                        endif()
+                    endif()
+                    list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip)
+                    find_package(HIP QUIET)
+                    if(HIP_FOUND)
+                        message("-- ${White}${PROJECT_NAME}: Adding RPP HIP tests${ColourReset}")
                         # HIP test set 1 - rpp_qa_tests_tensor_hip_all - HIP Tensor-Image QA PASS/FAIL tests
                         if(TurboJpeg_FOUND)
                             if(OpenCV_FOUND)
@@ -232,7 +250,7 @@ else()
                             WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
                         )
 
-                    endif(hip_FOUND)
+                    endif(HIP_FOUND)
                 elseif( "${BACKEND}" STREQUAL "OCL")
                     # TBD: Add OCL Tests
                     message("-- ${Yellow}${PROJECT_NAME} Warning: OpenCL tests not enabled${ColourReset}")
diff --git a/utilities/test_suite/HIP/CMakeLists.txt b/utilities/test_suite/HIP/CMakeLists.txt
index 084f0981e..2ffe377e6 100644
--- a/utilities/test_suite/HIP/CMakeLists.txt
+++ b/utilities/test_suite/HIP/CMakeLists.txt
@@ -23,7 +23,6 @@ SOFTWARE.
 ]]
 
 cmake_minimum_required(VERSION 3.10)
-project(test_suite/HIP)
 
 # ROCM Path
 if(DEFINED ENV{ROCM_PATH})
@@ -33,6 +32,15 @@ elseif(ROCM_PATH)
 else()
     set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
 endif()
+# Set AMD Clang as default compiler
+if(NOT DEFINED CMAKE_CXX_COMPILER AND EXISTS "${ROCM_PATH}/bin/amdclang++")
+    set(CMAKE_C_COMPILER ${ROCM_PATH}/bin/amdclang)
+    set(CMAKE_CXX_COMPILER ${ROCM_PATH}/bin/amdclang++)
+elseif(NOT DEFINED CMAKE_CXX_COMPILER AND NOT EXISTS "${ROCM_PATH}/bin/amdclang++")
+    set(CMAKE_CXX_COMPILER clang++)
+endif()
+
+project(test_suite/HIP)
 
 list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/share/rpp/test/cmake)
 list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
@@ -105,8 +113,11 @@ else()
             set(RPP_AUDIO_AUGMENTATIONS_SUPPORT_FOUND ${CMAKE_MATCH_1})
         endif()
 
+        # add find modules
+        list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/share/rpp/test/cmake)
+
         # find required libraries
-        find_package(hip QUIET)
+        find_package(HIP QUIET)
         set(Python3_FIND_VIRTUALENV FIRST)
         find_package(Python3 QUIET)
         find_package(NIFTI QUIET)
@@ -117,6 +128,7 @@ else()
         find_package(OpenMP QUIET)
         find_package(OpenCV QUIET)
         find_package(TurboJpeg QUIET)
+        find_package(StdFilesystem QUIET)
 
         # find required python3-pip imports
         execute_process(
@@ -132,7 +144,7 @@ else()
         #     ERROR_QUIET                                           # added only on HOST
         # )
 
-        if(hip_FOUND)
+        if(HIP_FOUND)
             message("-- ${Green}HIP found at hip_INCLUDE_DIRS - ${hip_INCLUDE_DIRS} and hip_LIBRARIES - ${hip_LIBRARIES}${ColourReset}")
 
             if(Python3_FOUND)
@@ -209,9 +221,6 @@ else()
                                 link_directories(${SndFile_LIBRARIES_DIR} /usr/local/lib/)
                                 message("-- ${Green}${PROJECT_NAME} Tensor-Audio HIP tests set to build with libsnd${ColourReset}")
                                 add_executable(Tensor_audio_hip Tensor_audio_hip.cpp)
-                                if(NOT APPLE)
-                                    set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} stdc++fs)
-                                endif(NOT APPLE)
                                 target_link_libraries(Tensor_audio_hip -lrpp pthread ${libsnd_LIBS} -lsndfile ${LINK_LIBRARY_LIST} ${hip_LIBRARIES})
                             else()
                                 message("-- ${Yellow}Warning: libsnd must be installed to install ${PROJECT_NAME} Tensor-Audio tests successfully!${ColourReset}")
@@ -234,7 +243,7 @@ else()
             endif(Python3_FOUND)
         else()
             message("-- ${Red}${PROJECT_NAME} Error: ROCm HIP must be installed to run any RPP test_suite HIP tests successfully${ColourReset}")
-        endif(hip_FOUND)
+        endif(HIP_FOUND)
     else()
         message("-- ${Red}${PROJECT_NAME} is supported to build and run only with RPP HIP backend installation${ColourReset}")
     endif("${BACKEND}" STREQUAL "HIP")
diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp
index 7493fc289..415d06552 100644
--- a/utilities/test_suite/HIP/Tensor_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_hip.cpp
@@ -60,12 +60,12 @@ int main(int argc, char **argv)
     int decoderType = atoi(argv[13]);
     int batchSize = atoi(argv[14]);
 
-    bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23|| testCase == 24 || testCase == 40 || testCase == 41 || testCase == 49 || testCase == 54 || testCase == 79);
+    bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23|| testCase == 24 || testCase == 28 || testCase == 40 || testCase == 41 || testCase == 49 || testCase == 54 || testCase == 79);
     bool kernelSizeCase = (testCase == 40 || testCase == 41 || testCase == 49 || testCase == 54);
     bool dualInputCase = (testCase == 2 || testCase == 30 || testCase == 33 || testCase == 61 || testCase == 63 || testCase == 65 || testCase == 68);
     bool randomOutputCase = (testCase == 6 || testCase == 8 || testCase == 10 || testCase == 84 || testCase == 49 || testCase == 54);
-    bool nonQACase = (testCase == 24 || testCase == 54);
-    bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 79);
+    bool nonQACase = (testCase == 24 || testCase == 28 || testCase == 54);
+    bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24|| testCase == 28 || testCase == 79);
     bool reductionTypeCase = (testCase == 87 || testCase == 88 || testCase == 89 || testCase == 90 || testCase == 91);
     bool noiseTypeCase = (testCase == 8);
     bool pln1OutTypeCase = (testCase == 86);
@@ -427,6 +427,10 @@ int main(int argc, char **argv)
     void *d_interDstPtr;
     if(testCase == 5)
         CHECK_RETURN_STATUS(hipHostMalloc(&d_interDstPtr, srcDescPtr->strides.nStride * srcDescPtr->n * sizeof(Rpp32f)));
+    
+    Rpp32f *perspectiveTensorPtr = NULL;
+    if(testCase == 28)
+        CHECK_RETURN_STATUS(hipHostMalloc(&perspectiveTensorPtr, batchSize * 9 * sizeof(Rpp32f)));
 
     // case-wise RPP API and measure time script for Unit and Performance test
     cout << "\nRunning " << func << " " << numRuns << " times (each time with a batch size of " << batchSize << " images) and computing mean statistics...";
@@ -821,6 +825,37 @@ int main(int argc, char **argv)
 
                     break;
                 }
+                case 28:
+                {
+                    testCaseName = "warp_perspective";
+
+                    if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR))
+                    {
+                        missingFuncFlag = 1;
+                        break;
+                    }
+
+                    for (i = 0, j = 0; i < batchSize; i++, j += 9)
+                    {
+                        perspectiveTensorPtr[j + 0] = 0.93;
+                        perspectiveTensorPtr[j + 1] = 0.5;
+                        perspectiveTensorPtr[j + 2] = 0.0;
+                        perspectiveTensorPtr[j + 3] = -0.5;
+                        perspectiveTensorPtr[j + 4] = 0.93;
+                        perspectiveTensorPtr[j + 5] = 0.0;
+                        perspectiveTensorPtr[j + 6] = 0.005;
+                        perspectiveTensorPtr[j + 7] = 0.005;
+                        perspectiveTensorPtr[j + 8] = 1;
+                    }
+
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_warp_perspective_gpu(d_input, srcDescPtr, d_output, dstDescPtr, perspectiveTensorPtr, interpolationType, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
+                }
                 case 29:
                 {
                     testCaseName = "water";
@@ -1684,6 +1719,8 @@ int main(int argc, char **argv)
     }
     if(testCase == 35)
         CHECK_RETURN_STATUS(hipHostFree(rgbOffsets));
+    if(perspectiveTensorPtr != NULL)
+      CHECK_RETURN_STATUS(hipHostFree(perspectiveTensorPtr));
     if (reductionTypeCase)
     {
         CHECK_RETURN_STATUS(hipHostFree(reductionFuncResultArr));
diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index c916c8d6b..38d5eff66 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -75,10 +75,10 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType))
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
-            elif case == "21" or case == "23" or case == "24" or case == "79":
+            elif case == "21" or case == "23" or case == "24"  or case == "28" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
-                if case =='79':
+                if case == '28' or case =='79':
                     interpolationRange = 2
                 for interpolationType in range(interpolationRange):
                     print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType))
@@ -271,7 +271,7 @@ def rpp_test_suite_parser_and_validator():
 subprocess.call(["make", "-j16"], cwd=".")    # nosec
 
 # List of cases supported
-supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '10', '13', '20', '21', '23', '26', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '49', '54', '61', '63', '65', '68', '70', '79', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92']
+supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '10', '13', '20', '21', '23', '24', '26', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '49', '54', '61', '63', '65', '68', '70', '79', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92']
 
 # Create folders based on testType and profilingOption
 if testType == 1 and profilingOption == "YES":
@@ -368,7 +368,7 @@ def rpp_test_suite_parser_and_validator():
                             # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                             for noiseType in range(3):
                                 run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, noiseType, "_noiseType", numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
-                        elif case == "21" or case == "23" or case == "24" or case == "79":
+                        elif case == "21" or case == "23" or case == "24" or case == "28" or case == "79":
                             # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                             for interpolationType in range(6):
                                 run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, interpolationType, "_interpolationType", numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
@@ -426,7 +426,7 @@ def rpp_test_suite_parser_and_validator():
                                 fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter)
                                 if fileCheck == False:
                                     continue
-                        elif (CASE_NUM == "24" or CASE_NUM == "21" or CASE_NUM == "23" or CASE_NUM == "79") and TYPE.startswith("Tensor"):
+                        elif (CASE_NUM == "24" or CASE_NUM == "21" or CASE_NUM == "23" or CASE_NUM == "28" or CASE_NUM == "79") and TYPE.startswith("Tensor"):
                             INTERPOLATIONTYPE_LIST = [0, 1, 2, 3, 4, 5]
                             # Loop through extra param interpolationType
                             for INTERPOLATIONTYPE in INTERPOLATIONTYPE_LIST:
@@ -486,7 +486,7 @@ def rpp_test_suite_parser_and_validator():
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
 # print the results of qa tests
-nonQACaseList = ['6', '8', '10', '24', '54', '84'] # Add cases present in supportedCaseList, but without QA support
+nonQACaseList = ['6', '8', '10', '24', '28', '54', '84'] # Add cases present in supportedCaseList, but without QA support
 
 if qaMode and testType == 0:
     qaFilePath = os.path.join(outFilePath, "QA_results.txt")
diff --git a/utilities/test_suite/HOST/CMakeLists.txt b/utilities/test_suite/HOST/CMakeLists.txt
index c9becedd3..5ec9f8494 100644
--- a/utilities/test_suite/HOST/CMakeLists.txt
+++ b/utilities/test_suite/HOST/CMakeLists.txt
@@ -23,16 +23,24 @@ SOFTWARE.
 ]]
 
 cmake_minimum_required(VERSION 3.10)
-project(test_suite/HOST)
 
 # ROCM Path
 if(DEFINED ENV{ROCM_PATH})
     set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path")
 elseif(ROCM_PATH)
-    message("-- ${PROJECT_NAME} INFO:ROCM_PATH Set -- ${ROCM_PATH}")
+    message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}")
 else()
     set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
 endif()
+# Set AMD Clang as default compiler
+if(NOT DEFINED CMAKE_CXX_COMPILER AND EXISTS "${ROCM_PATH}/bin/amdclang++")
+    set(CMAKE_C_COMPILER ${ROCM_PATH}/bin/amdclang)
+    set(CMAKE_CXX_COMPILER ${ROCM_PATH}/bin/amdclang++)
+elseif(NOT DEFINED CMAKE_CXX_COMPILER AND NOT EXISTS "${ROCM_PATH}/bin/amdclang++")
+    set(CMAKE_CXX_COMPILER clang++)
+endif()
+
+project(test_suite/HOST)
 
 list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/share/rpp/test/cmake)
 list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
@@ -104,6 +112,9 @@ else()
             set(RPP_AUDIO_AUGMENTATIONS_SUPPORT_FOUND ${CMAKE_MATCH_1})
         endif()
 
+        # add find modules
+        list(APPEND CMAKE_MODULE_PATH ${ROCM_PATH}/share/rpp/test/cmake)
+
         # find required libraries
         set(Python3_FIND_VIRTUALENV FIRST)
         find_package(Python3 QUIET)
@@ -115,6 +126,7 @@ else()
         find_package(OpenMP QUIET)
         find_package(OpenCV QUIET)
         find_package(TurboJpeg QUIET)
+        find_package(StdFilesystem QUIET)
 
         # find required python3-pip imports
         execute_process(
@@ -208,9 +220,6 @@ else()
                             link_directories(${SndFile_LIBRARIES_DIR} /usr/local/lib/)
                             message("-- ${Green}${PROJECT_NAME} Tensor-Audio HOST tests set to build with libsnd${ColourReset}")
                             add_executable(Tensor_audio_host Tensor_audio_host.cpp)
-                            if(NOT APPLE)
-                                set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} stdc++fs)
-                            endif(NOT APPLE)
                             target_link_libraries(Tensor_audio_host -lrpp pthread ${libsnd_LIBS} -lsndfile ${LINK_LIBRARY_LIST})
                         else()
                             message("-- ${Yellow}Warning: libsnd must be installed to install ${PROJECT_NAME} Tensor-Audio tests successfully!${ColourReset}")
diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp
index f2626302e..82743ff53 100644
--- a/utilities/test_suite/HOST/Tensor_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_host.cpp
@@ -60,12 +60,12 @@ int main(int argc, char **argv)
     int decoderType = atoi(argv[13]);
     int batchSize = atoi(argv[14]);
 
-    bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23 || testCase == 24 || testCase == 49 || testCase ==54 || testCase == 79);
+    bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23 || testCase == 24 || testCase == 28 || testCase == 49 || testCase ==54 || testCase == 79);
     bool kernelSizeCase = (testCase == 49 || testCase == 54);
     bool dualInputCase = (testCase == 2 || testCase == 30 || testCase == 33 || testCase == 61 || testCase == 63 || testCase == 65 || testCase == 68);
     bool randomOutputCase = (testCase == 6 || testCase == 8 || testCase == 10 || testCase == 84);
-    bool nonQACase = (testCase == 24);
-    bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 79);
+    bool nonQACase = (testCase == 24 || testCase == 28);
+    bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 28 || testCase == 79);
     bool reductionTypeCase = (testCase == 87 || testCase == 88 || testCase == 89 || testCase == 90 || testCase == 91);
     bool noiseTypeCase = (testCase == 8);
     bool pln1OutTypeCase = (testCase == 86);
@@ -785,6 +785,40 @@ int main(int argc, char **argv)
 
                     break;
                 }
+                case 28:
+                {
+                    testCaseName = "warp_perspective";
+
+                    if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR))
+                    {
+                        missingFuncFlag = 1;
+                        break;
+                    }
+
+                    Rpp32f9 perspectiveTensor_f9[batchSize];
+                    Rpp32f *perspectiveTensor = reinterpret_cast<Rpp32f *>(perspectiveTensor_f9);
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        perspectiveTensor_f9[i].data[0] = 0.93;
+                        perspectiveTensor_f9[i].data[1] = 0.5;
+                        perspectiveTensor_f9[i].data[2] = 0.0;
+                        perspectiveTensor_f9[i].data[3] = -0.5;
+                        perspectiveTensor_f9[i].data[4] = 0.93;
+                        perspectiveTensor_f9[i].data[5] = 0.0;
+                        perspectiveTensor_f9[i].data[6] = 0.005;
+                        perspectiveTensor_f9[i].data[7] = 0.005;
+                        perspectiveTensor_f9[i].data[8] = 1;
+                    }
+
+                    startWallTime = omp_get_wtime();
+                    startCpuTime = clock();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_warp_perspective_host(input, srcDescPtr, output, dstDescPtr, perspectiveTensor, interpolationType, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
+                }
                 case 29:
                 {
                     testCaseName = "water";
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index db550c488..375aa907e 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -74,10 +74,10 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                     print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0")
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
-            elif case == "21" or case == "23" or case == "24" or case == "79":
+            elif case == "21" or case == "23" or case == "24" or case == "79" or case == "28":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 interpolationRange = 6
-                if case =='79':
+                if case =='79' or case == "28":
                     interpolationRange = 2
                 for interpolationType in range(interpolationRange):
                     print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0")
@@ -120,7 +120,7 @@ def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPa
                 for noiseType in range(3):
                     run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, noiseType, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
                     print("")
-            elif case == "21" or case == "23" or case == "24" or case == "79":
+            elif case == "21" or case == "23" or case == "24" or case == "28" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 for interpolationType in range(6):
                     run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, interpolationType, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
@@ -259,7 +259,7 @@ def rpp_test_suite_parser_and_validator():
 subprocess.call(["make", "-j16"], cwd=".")    # nosec
 
 # List of cases supported
-supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '10', '13', '20', '21', '23', '26', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '49', '54', '61', '63', '65', '68', '70', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92']
+supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '10', '13', '20', '21', '23', '24', '26', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '49', '54', '61', '63', '65', '68', '70', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92']
 
 if testType == 0:
     noCaseSupported = all(case not in supportedCaseList for case in caseList)
@@ -319,7 +319,7 @@ def rpp_test_suite_parser_and_validator():
             run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
 
 # print the results of qa tests
-nonQACaseList = ['6', '8', '10', '24', '54', '84'] # Add cases present in supportedCaseList, but without QA support
+nonQACaseList = ['6', '8', '10', '24', '28', '54', '84'] # Add cases present in supportedCaseList, but without QA support
 
 if qaMode and testType == 0:
     qaFilePath = os.path.join(outFilePath, "QA_results.txt")
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
index c0922db65..bcdd2b875 100644
--- a/utilities/test_suite/common.py
+++ b/utilities/test_suite/common.py
@@ -53,6 +53,7 @@
     23: ["rotate", "HOST", "HIP"],
     24: ["warp_affine", "HOST", "HIP"],
     26: ["lens_correction", "HOST", "HIP"],
+    28: ["warp_perspective", "HOST", "HIP"],
     29: ["water", "HOST", "HIP"],
     30: ["non_linear_blend", "HOST", "HIP"],
     31: ["color_cast", "HOST", "HIP"],
@@ -119,7 +120,7 @@
 ImageAugmentationGroupMap = {
     "color_augmentations" : [0, 1, 2, 3, 4, 13, 31, 34, 36, 45, 81],
     "effects_augmentations" : [5, 6, 8, 10, 29, 30, 32, 35, 46, 82, 83, 84],
-    "geometric_augmentations" : [20, 21, 23, 24, 26, 33, 37, 38, 39, 63, 79, 80, 92],
+    "geometric_augmentations" : [20, 21, 23, 24, 26, 28, 33, 37, 38, 39, 63, 79, 80, 92],
     "filter_augmentations" : [49, 54],
     "arithmetic_operations" : [61],
     "logical_operations" : [65, 68],
diff --git a/utilities/test_suite/rpp_test_suite_image.h b/utilities/test_suite/rpp_test_suite_image.h
index 28f3b651e..336f23ffa 100644
--- a/utilities/test_suite/rpp_test_suite_image.h
+++ b/utilities/test_suite/rpp_test_suite_image.h
@@ -78,6 +78,7 @@ std::map<int, string> augmentationMap =
     {23, "rotate"},
     {24, "warp_afffine"},
     {26, "lens_correction"},
+    {28, "warp_perspective"},
     {29, "water"},
     {30, "non_linear_blend"},
     {31, "color_cast"},

From fde933ce8c7a72ba7da8a4f4712849d7640202cc Mon Sep 17 00:00:00 2001
From: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Date: Wed, 8 Jan 2025 14:16:10 +0000
Subject: [PATCH 12/17] Fix on error code display and gaussian Filter

---
 utilities/test_suite/HOST/runTests.py       | 2 +-
 utilities/test_suite/rpp_test_suite_image.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 375aa907e..8b4e99b70 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -66,7 +66,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
             if case == "49" or case == "54":
                 for kernelSize in range(3, 10, 2):
                     print(f"./Tensor_host {srcPath1} {srcPath2} {dstPathTemp} {bitDepth} {outputFormatToggle} {case} {kernelSize} 0 ")
-                    result = subprocess.run([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
+                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
diff --git a/utilities/test_suite/rpp_test_suite_image.h b/utilities/test_suite/rpp_test_suite_image.h
index 336f23ffa..277d12212 100644
--- a/utilities/test_suite/rpp_test_suite_image.h
+++ b/utilities/test_suite/rpp_test_suite_image.h
@@ -1028,7 +1028,7 @@ inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, R
         func += "_noiseType" + noiseTypeName;
         binFile += "_noiseType" + noiseTypeName;
     }
-    else if(testCase == 49)
+    else if(testCase == 49 || testCase == 54)
     {
         func += "_kernelSize" + std::to_string(additionalParam);
         binFile += "_kernelSize" + std::to_string(additionalParam);

From ea9d991a62308a64ae17827994f287c61abf4c52 Mon Sep 17 00:00:00 2001
From: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Date: Fri, 10 Jan 2025 06:25:46 +0000
Subject: [PATCH 13/17] F string bug is resolved

---
 utilities/test_suite/HOST/runTests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 8b4e99b70..3b3b86864 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -65,7 +65,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
 
             if case == "49" or case == "54":
                 for kernelSize in range(3, 10, 2):
-                    print(f"./Tensor_host {srcPath1} {srcPath2} {dstPathTemp} {bitDepth} {outputFormatToggle} {case} {kernelSize} 0 ")
+                    print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize) + " 0")
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
             elif case == "8":

From 55c8ddddad99fbac05c04d8af7ee7a4f502ad53e Mon Sep 17 00:00:00 2001
From: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Date: Thu, 16 Jan 2025 06:04:31 +0000
Subject: [PATCH 14/17] Fix for CI failure and some improvement in error
 detection

---
 include/rppdefs.h                             |  4 +-
 utilities/test_suite/HIP/Tensor_image_hip.cpp |  2 +-
 utilities/test_suite/HIP/runImageTests.py     | 28 +++++++-------
 .../test_suite/HOST/Tensor_image_host.cpp     |  2 +-
 utilities/test_suite/HOST/runImageTests.py    | 24 ++++++------
 utilities/test_suite/common.py                | 38 ++++++++++++++++++-
 6 files changed, 68 insertions(+), 30 deletions(-)

diff --git a/include/rppdefs.h b/include/rppdefs.h
index 6bbee49ea..0ce632b65 100644
--- a/include/rppdefs.h
+++ b/include/rppdefs.h
@@ -164,7 +164,9 @@ typedef enum
     /*! \brief Number of src dims is invalid. (Needs to adhere to function specification.) \ingroup group_rppdefs */
     RPP_ERROR_INVALID_SRC_DIMS          = -23,
     /*! \brief Number of dst dims is invalid. (Needs to adhere to function specification.) \ingroup group_rppdefs */
-    RPP_ERROR_INVALID_DST_DIMS          = -24
+    RPP_ERROR_INVALID_DST_DIMS          = -24,
+    /*! \brief Cpp API functionality is not implemented. (Needs to adhere to function specification.) \ingroup group_rppdefs */
+    RPP_ERROR_CPP_API_NOT_IMPLEMENTED    = -25
 } RppStatus;
 
 /*! \brief RPP rppStatus_t type enums
diff --git a/utilities/test_suite/HIP/Tensor_image_hip.cpp b/utilities/test_suite/HIP/Tensor_image_hip.cpp
index 1f50908ba..a83f7660a 100644
--- a/utilities/test_suite/HIP/Tensor_image_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_image_hip.cpp
@@ -1582,7 +1582,7 @@ int main(int argc, char **argv)
             if (missingFuncFlag == 1)
             {
                 cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n";
-                return -1;
+                return RPP_ERROR_CPP_API_NOT_IMPLEMENTED;
             }
 
             maxWallTime = max(maxWallTime, wallTime);
diff --git a/utilities/test_suite/HIP/runImageTests.py b/utilities/test_suite/HIP/runImageTests.py
index 9bdb31e61..b1e2c653f 100644
--- a/utilities/test_suite/HIP/runImageTests.py
+++ b/utilities/test_suite/HIP/runImageTests.py
@@ -66,14 +66,14 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
 
             if case == "40" or case == "41" or case == "49" or case == "54":
                 for kernelSize in range(3, 10, 2):
-                    print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize))
-                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
+                    print("./Tensor_image_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize))
+                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
-                    print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType))
-                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
+                    print("./Tensor_image_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType))
+                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
             elif case == "21" or case == "23" or case == "24" or case == "28" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
@@ -81,19 +81,19 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 if case == '28' or case =='79':
                     interpolationRange = 2
                 for interpolationType in range(interpolationRange):
-                    print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType))
-                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
+                    print("./Tensor_image_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType))
+                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
             else:
-                print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout))
-                result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
+                print("./Tensor_image_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout))
+                result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                 log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
             print("------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
-    with open(loggingFolder + "/Tensor_hip_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
-        print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam))
-        process = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec        
+    with open(loggingFolder + "/Tensor_image_hip_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
+        print("./Tensor_image_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam))
+        process = subprocess.Popen([buildFolderPath + "/build/Tensor_image_hip", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)  # nosec        
         read_from_subprocess_and_write_to_log(process, logFile)
         log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HIP"))
 
@@ -492,11 +492,11 @@ def rpp_test_suite_parser_and_validator():
     qaFilePath = os.path.join(outFilePath, "QA_results.txt")
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
-        print("---------------------------------- Results of QA Test - Tensor_hip ----------------------------------\n")
-        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_hip")
+        print("---------------------------------- Results of QA Test - Tensor_image_hip ----------------------------------\n")
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_image_hip")
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_hip ----------------------------------\n")
+    print("\n---------------------------------- Error log - Tensor_image_hip ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
diff --git a/utilities/test_suite/HOST/Tensor_image_host.cpp b/utilities/test_suite/HOST/Tensor_image_host.cpp
index 8914e576c..1f00dff7a 100644
--- a/utilities/test_suite/HOST/Tensor_image_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_image_host.cpp
@@ -1618,7 +1618,7 @@ int main(int argc, char **argv)
             if (missingFuncFlag == 1)
             {
                 cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n";
-                return -1;
+                return RPP_ERROR_CPP_API_NOT_IMPLEMENTED;
             }
 
             maxWallTime = std::max(maxWallTime, wallTime);
diff --git a/utilities/test_suite/HOST/runImageTests.py b/utilities/test_suite/HOST/runImageTests.py
index f2830398d..083c70b0a 100644
--- a/utilities/test_suite/HOST/runImageTests.py
+++ b/utilities/test_suite/HOST/runImageTests.py
@@ -65,14 +65,14 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
 
             if case == "49" or case == "54":
                 for kernelSize in range(3, 10, 2):
-                    print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize) + " 0")
-                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
+                    print("./Tensor_image_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize) + " 0")
+                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
-                    print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0")
-                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
+                    print("./Tensor_image_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0")
+                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
             elif case == "21" or case == "23" or case == "24" or case == "79" or case == "28":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
@@ -80,12 +80,12 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 if case =='79' or case == "28":
                     interpolationRange = 2
                 for interpolationType in range(interpolationRange):
-                    print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0")
-                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
+                    print("./Tensor_image_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0")
+                    result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                     log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
             else:
-                print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0")
-                result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
+                print("./Tensor_image_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0")
+                result = subprocess.Popen([buildFolderPath + "/build/Tensor_image_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
                 log_detected(result, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
 
             print("------------------------------------------------------------------------------------------")
@@ -96,9 +96,9 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
             process = subprocess.Popen([buildFolderPath + "/build/BatchPD_host_" + logFileLayout, srcPath1, srcPath2, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), "0"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
             read_from_subprocess_and_write_to_log(process, logFile)
             log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
-    with open(loggingFolder + "/Tensor_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
-        logFile.write("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n")
-        process = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
+    with open(loggingFolder + "/Tensor_image_host" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
+        logFile.write("./Tensor_image_host " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(additionalParam) + " 0\n")
+        process = subprocess.Popen([buildFolderPath + "/build/Tensor_image_host", srcPath1, srcPath2, dstPath, str(bitDepth), str(outputFormatToggle), str(case), str(additionalParam), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)    # nosec
         read_from_subprocess_and_write_to_log(process, logFile)
         log_detected(process, errorLog, imageAugmentationMap[int(case)][0], get_bit_depth(int(bitDepth)), get_image_layout_type(layout, outputFormatToggle, "HOST"))
         
@@ -488,7 +488,7 @@ def rpp_test_suite_parser_and_validator():
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_host ----------------------------------\n")
+    print("\n---------------------------------- Error log - Tensor_image_host ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
index e3c123a74..eda494e8a 100644
--- a/utilities/test_suite/common.py
+++ b/utilities/test_suite/common.py
@@ -130,6 +130,35 @@
     "statistical_operations" : [15, 87, 88, 89, 90, 91]
 }
 
+StatusMap = {
+    0: "RPP_SUCCESS",
+    -1: "RPP_ERROR",
+    -2: "RPP_ERROR_INVALID_ARGUMENTS",
+    -3: "RPP_ERROR_LOW_OFFSET",
+    -4: "RPP_ERROR_ZERO_DIVISION",
+    -5: "RPP_ERROR_HIGH_SRC_DIMENSION",
+    -6: "RPP_ERROR_NOT_IMPLEMENTED",
+    -7: "RPP_ERROR_INVALID_SRC_CHANNELS",
+    -8: "RPP_ERROR_INVALID_DST_CHANNELS",
+    -9: "RPP_ERROR_INVALID_SRC_LAYOUT",
+    -10: "RPP_ERROR_INVALID_DST_LAYOUT",
+    -11: "RPP_ERROR_INVALID_SRC_DATATYPE",
+    -12: "RPP_ERROR_INVALID_DST_DATATYPE",
+    -13: "RPP_ERROR_INVALID_SRC_OR_DST_DATATYPE",
+    -14: "RPP_ERROR_INSUFFICIENT_DST_BUFFER_LENGTH",
+    -15: "RPP_ERROR_INVALID_PARAMETER_DATATYPE",
+    -16: "RPP_ERROR_NOT_ENOUGH_MEMORY",
+    -17: "RPP_ERROR_OUT_OF_BOUND_SRC_ROI",
+    -18: "RPP_ERROR_LAYOUT_MISMATCH",
+    -19: "RPP_ERROR_INVALID_CHANNELS",
+    -20: "RPP_ERROR_INVALID_OUTPUT_TILE_LENGTH",
+    -21: "RPP_ERROR_OUT_OF_BOUND_SHARED_MEMORY_SIZE",
+    -22: "RPP_ERROR_OUT_OF_BOUND_SCRATCH_MEMORY_SIZE",
+    -23: "RPP_ERROR_INVALID_SRC_DIMS",
+    -24: "RPP_ERROR_INVALID_DST_DIMS",
+    -25: "RPP_ERROR_CPP_API_NOT_IMPLEMENTED",
+}
+
 # Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents
 def validate_and_remove_files(path):
     if not path:  # check if a string is empty
@@ -458,6 +487,10 @@ def get_signal_name_from_return_code(returnCode):
                 signalName = signame
                 break
         result = result + signalName
+    elif( returnCode > 127):
+        signalNum = returnCode - 256
+        if signalNum in StatusMap.keys():
+            result = result + " Error = " +StatusMap[signalNum]
     return result
 
 def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificName):
@@ -465,6 +498,9 @@ def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificN
     print(stdoutData.decode())
     exitCode = result.returncode
     if(exitCode != 0):
-        errorData = "Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode();
+        if exitCode > 127:
+            errorData = "Returned non-zero exit status : "+ str(exitCode - 256) + " " + stderrData.decode()
+        else:
+            errorData = "Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode()
         msg = caseName + functionBitDepth + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + get_signal_name_from_return_code(exitCode)
         errorLog.append(msg)

From 0566127be7dc4f0370579af56a48f2cf51a5b121 Mon Sep 17 00:00:00 2001
From: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Date: Thu, 16 Jan 2025 07:33:41 +0000
Subject: [PATCH 15/17] Changes based on review comments

---
 include/rppdefs.h                               |  4 +---
 utilities/test_suite/HIP/Tensor_image_hip.cpp   |  2 +-
 utilities/test_suite/HIP/runAudioTests.py       |  2 +-
 utilities/test_suite/HIP/runImageTests.py       |  2 +-
 utilities/test_suite/HIP/runMiscTests.py        |  2 +-
 utilities/test_suite/HIP/runVoxelTests.py       |  2 +-
 utilities/test_suite/HOST/Tensor_image_host.cpp |  2 +-
 utilities/test_suite/HOST/runAudioTests.py      |  2 +-
 utilities/test_suite/HOST/runImageTests.py      |  2 +-
 utilities/test_suite/HOST/runMiscTests.py       |  2 +-
 utilities/test_suite/HOST/runVoxelTests.py      |  2 +-
 utilities/test_suite/common.py                  | 15 +++++++--------
 12 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/include/rppdefs.h b/include/rppdefs.h
index 0ce632b65..6bbee49ea 100644
--- a/include/rppdefs.h
+++ b/include/rppdefs.h
@@ -164,9 +164,7 @@ typedef enum
     /*! \brief Number of src dims is invalid. (Needs to adhere to function specification.) \ingroup group_rppdefs */
     RPP_ERROR_INVALID_SRC_DIMS          = -23,
     /*! \brief Number of dst dims is invalid. (Needs to adhere to function specification.) \ingroup group_rppdefs */
-    RPP_ERROR_INVALID_DST_DIMS          = -24,
-    /*! \brief Cpp API functionality is not implemented. (Needs to adhere to function specification.) \ingroup group_rppdefs */
-    RPP_ERROR_CPP_API_NOT_IMPLEMENTED    = -25
+    RPP_ERROR_INVALID_DST_DIMS          = -24
 } RppStatus;
 
 /*! \brief RPP rppStatus_t type enums
diff --git a/utilities/test_suite/HIP/Tensor_image_hip.cpp b/utilities/test_suite/HIP/Tensor_image_hip.cpp
index a83f7660a..dbcc31005 100644
--- a/utilities/test_suite/HIP/Tensor_image_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_image_hip.cpp
@@ -1582,7 +1582,7 @@ int main(int argc, char **argv)
             if (missingFuncFlag == 1)
             {
                 cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n";
-                return RPP_ERROR_CPP_API_NOT_IMPLEMENTED;
+                return RPP_ERROR_NOT_IMPLEMENTED;
             }
 
             maxWallTime = max(maxWallTime, wallTime);
diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py
index e5792c6fe..f79b16091 100644
--- a/utilities/test_suite/HIP/runAudioTests.py
+++ b/utilities/test_suite/HIP/runAudioTests.py
@@ -301,7 +301,7 @@ def rpp_test_suite_parser_and_validator():
         print("Unable to open results in " + CONSOLIDATED_FILE)
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_audio_hip ----------------------------------\n")
+    print("\n---------------------------------- Log of function variants requested but not run - Tensor_audio_hip  ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
diff --git a/utilities/test_suite/HIP/runImageTests.py b/utilities/test_suite/HIP/runImageTests.py
index b1e2c653f..71a99967a 100644
--- a/utilities/test_suite/HIP/runImageTests.py
+++ b/utilities/test_suite/HIP/runImageTests.py
@@ -496,7 +496,7 @@ def rpp_test_suite_parser_and_validator():
         print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_image_hip")
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_image_hip ----------------------------------\n")
+    print("\n---------------------------------- Log of function variants requested but not run - Tensor_image_hip  ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py
index d31745bae..7d1886aa1 100644
--- a/utilities/test_suite/HIP/runMiscTests.py
+++ b/utilities/test_suite/HIP/runMiscTests.py
@@ -290,7 +290,7 @@ def rpp_test_suite_parser_and_validator():
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_misc_hip ----------------------------------\n")
+    print("\n---------------------------------- Log of function variants requested but not run - Tensor_misc_hip  ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py
index e0956e454..2466b76da 100644
--- a/utilities/test_suite/HIP/runVoxelTests.py
+++ b/utilities/test_suite/HIP/runVoxelTests.py
@@ -362,7 +362,7 @@ def rpp_test_suite_parser_and_validator():
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_voxel_hip ----------------------------------\n")
+    print("\n---------------------------------- Log of function variants requested but not run - Tensor_voxel_hip  ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/Tensor_image_host.cpp b/utilities/test_suite/HOST/Tensor_image_host.cpp
index 1f00dff7a..64e89b7a2 100644
--- a/utilities/test_suite/HOST/Tensor_image_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_image_host.cpp
@@ -1618,7 +1618,7 @@ int main(int argc, char **argv)
             if (missingFuncFlag == 1)
             {
                 cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n";
-                return RPP_ERROR_CPP_API_NOT_IMPLEMENTED;
+                return RPP_ERROR_NOT_IMPLEMENTED;
             }
 
             maxWallTime = std::max(maxWallTime, wallTime);
diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py
index 441615a15..df01c3822 100644
--- a/utilities/test_suite/HOST/runAudioTests.py
+++ b/utilities/test_suite/HOST/runAudioTests.py
@@ -212,7 +212,7 @@ def rpp_test_suite_parser_and_validator():
         print_performance_tests_summary(log_file, "", numRuns)
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_audio_host ----------------------------------\n")
+    print("\n---------------------------------- Log of function variants requested but not run - Tensor_audio_host ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
diff --git a/utilities/test_suite/HOST/runImageTests.py b/utilities/test_suite/HOST/runImageTests.py
index 083c70b0a..b78f8e761 100644
--- a/utilities/test_suite/HOST/runImageTests.py
+++ b/utilities/test_suite/HOST/runImageTests.py
@@ -488,7 +488,7 @@ def rpp_test_suite_parser_and_validator():
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_image_host ----------------------------------\n")
+    print("\n---------------------------------- Log of function variants requested but not run - Tensor_image_host ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py
index e864f9912..d41400389 100644
--- a/utilities/test_suite/HOST/runMiscTests.py
+++ b/utilities/test_suite/HOST/runMiscTests.py
@@ -204,7 +204,7 @@ def rpp_test_suite_parser_and_validator():
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_misc_host ----------------------------------\n")
+    print("\n---------------------------------- Log of function variants requested but not run - Tensor_misc_host ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py
index 87625bfe6..a8cebfec0 100644
--- a/utilities/test_suite/HOST/runVoxelTests.py
+++ b/utilities/test_suite/HOST/runVoxelTests.py
@@ -257,7 +257,7 @@ def rpp_test_suite_parser_and_validator():
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
 if errorLog:
-    print("\n---------------------------------- Error log - Tensor_voxel_host ----------------------------------\n")
+    print("\n---------------------------------- Log of function variants requested but not run - Tensor_voxel_host ----------------------------------\n")
     for error in errorLog:
         print(error)
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
index eda494e8a..7ca0f12fc 100644
--- a/utilities/test_suite/common.py
+++ b/utilities/test_suite/common.py
@@ -156,7 +156,6 @@
     -22: "RPP_ERROR_OUT_OF_BOUND_SCRATCH_MEMORY_SIZE",
     -23: "RPP_ERROR_INVALID_SRC_DIMS",
     -24: "RPP_ERROR_INVALID_DST_DIMS",
-    -25: "RPP_ERROR_CPP_API_NOT_IMPLEMENTED",
 }
 
 # Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents
@@ -481,16 +480,16 @@ def get_signal_name_from_return_code(returnCode):
     result = ""
     if returnCode < 0:
         signalNum = -returnCode
-        result = result + " Signal = "
+        result = result + " ( "
         for signame, signum in signal.__dict__.items():
             if isinstance(signum, int) and signum == signalNum:
                 signalName = signame
                 break
-        result = result + signalName
-    elif( returnCode > 127):
+        result = result + signalName + " ) "
+    elif(returnCode > 127):
         signalNum = returnCode - 256
         if signalNum in StatusMap.keys():
-            result = result + " Error = " +StatusMap[signalNum]
+            result = result + " ( " + StatusMap[signalNum] + " ) "
     return result
 
 def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificName):
@@ -499,8 +498,8 @@ def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificN
     exitCode = result.returncode
     if(exitCode != 0):
         if exitCode > 127:
-            errorData = "Returned non-zero exit status : "+ str(exitCode - 256) + " " + stderrData.decode()
+            errorData = "Returned non-zero exit status : " + str(exitCode - 256) + " " + stderrData.decode()
         else:
-            errorData = "Returned non-zero exit status : "+ str(exitCode) + " " + stderrData.decode()
-        msg = caseName + functionBitDepth + functionSpecificName + " kernel execution failed. Getting below error\n" + errorData + get_signal_name_from_return_code(exitCode)
+            errorData = "Returned non-zero exit status : " + str(exitCode) + " " + stderrData.decode()
+        msg = caseName + functionBitDepth + functionSpecificName + " - " + errorData + get_signal_name_from_return_code(exitCode)
         errorLog.append(msg)

From ff76911a1eb303786647d75adbbf419acbee0fcd Mon Sep 17 00:00:00 2001
From: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Date: Thu, 16 Jan 2025 09:46:32 +0000
Subject: [PATCH 16/17] Fix for rain and warp_perspective issue of unable to
 open file

---
 utilities/test_suite/HOST/Tensor_image_host.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utilities/test_suite/HOST/Tensor_image_host.cpp b/utilities/test_suite/HOST/Tensor_image_host.cpp
index 64e89b7a2..b6f4d2e50 100644
--- a/utilities/test_suite/HOST/Tensor_image_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_image_host.cpp
@@ -63,9 +63,9 @@ int main(int argc, char **argv)
     bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23 || testCase == 24 || testCase == 28 || testCase == 49 || testCase ==54 || testCase == 79);
     bool kernelSizeCase = (testCase == 49 || testCase == 54);
     bool dualInputCase = (testCase == 2 || testCase == 30 || testCase == 33 || testCase == 61 || testCase == 63 || testCase == 65 || testCase == 68);
-    bool randomOutputCase = (testCase == 6 || testCase == 8 || testCase == 10 || testCase == 84);
-    bool nonQACase = (testCase == 24);
-    bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 79);
+    bool randomOutputCase = (testCase == 6 || testCase == 8 || testCase == 10 || testCase == 11 || testCase == 84);
+    bool nonQACase = (testCase == 24 || testCase == 28);
+    bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 28 || testCase == 79);
     bool reductionTypeCase = (testCase == 87 || testCase == 88 || testCase == 89 || testCase == 90 || testCase == 91);
     bool noiseTypeCase = (testCase == 8);
     bool pln1OutTypeCase = (testCase == 86);

From b00c63ae9efa9fefb5134c59a88c5c39bd3b9ccb Mon Sep 17 00:00:00 2001
From: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Date: Thu, 16 Jan 2025 10:20:26 +0000
Subject: [PATCH 17/17] Enhanced display for non implimented functionality

---
 utilities/test_suite/HIP/runAudioTests.py  | 10 ++++++----
 utilities/test_suite/HIP/runImageTests.py  | 10 ++++++----
 utilities/test_suite/HIP/runMiscTests.py   | 10 ++++++----
 utilities/test_suite/HIP/runVoxelTests.py  | 10 ++++++----
 utilities/test_suite/HOST/runAudioTests.py | 10 ++++++----
 utilities/test_suite/HOST/runImageTests.py | 10 ++++++----
 utilities/test_suite/HOST/runMiscTests.py  | 10 ++++++----
 utilities/test_suite/HOST/runVoxelTests.py | 10 ++++++----
 utilities/test_suite/common.py             | 13 ++++++++-----
 9 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py
index f79b16091..8f8fba351 100644
--- a/utilities/test_suite/HIP/runAudioTests.py
+++ b/utilities/test_suite/HIP/runAudioTests.py
@@ -37,7 +37,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 7
-errorLog = []
+errorLog = [{"notExecutedFunctionality" : 0}]
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -300,8 +300,10 @@ def rpp_test_suite_parser_and_validator():
     except IOError:
         print("Unable to open results in " + CONSOLIDATED_FILE)
 
-if errorLog:
+if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0:
     print("\n---------------------------------- Log of function variants requested but not run - Tensor_audio_hip  ----------------------------------\n")
-    for error in errorLog:
-        print(error)
+    for i in range(1,len(errorLog)):
+        print(errorLog[i])
+    if(errorLog[0]["notExecutedFunctionality"] != 0):
+        print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_audio_hip were not executed since these sub-variants are not currently supported in RPP.\n")
     print("-----------------------------------------------------------------------------------------------")
diff --git a/utilities/test_suite/HIP/runImageTests.py b/utilities/test_suite/HIP/runImageTests.py
index 71a99967a..a986a7e7b 100644
--- a/utilities/test_suite/HIP/runImageTests.py
+++ b/utilities/test_suite/HIP/runImageTests.py
@@ -41,7 +41,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 92
-errorLog = []
+errorLog = [{"notExecutedFunctionality" : 0}]
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list(preserveOutput):
@@ -495,8 +495,10 @@ def rpp_test_suite_parser_and_validator():
         print("---------------------------------- Results of QA Test - Tensor_image_hip ----------------------------------\n")
         print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_image_hip")
 
-if errorLog:
+if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0:
     print("\n---------------------------------- Log of function variants requested but not run - Tensor_image_hip  ----------------------------------\n")
-    for error in errorLog:
-        print(error)
+    for i in range(1,len(errorLog)):
+        print(errorLog[i])
+    if(errorLog[0]["notExecutedFunctionality"] != 0):
+        print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_image_hip were not executed since these sub-variants are not currently supported in RPP.\n")
     print("-----------------------------------------------------------------------------------------------")
diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py
index 7d1886aa1..9f90a2de6 100644
--- a/utilities/test_suite/HIP/runMiscTests.py
+++ b/utilities/test_suite/HIP/runMiscTests.py
@@ -39,7 +39,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 2
-errorLog = []
+errorLog = [{"notExecutedFunctionality" : 0}]
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -289,8 +289,10 @@ def rpp_test_suite_parser_and_validator():
     for logFile in logFileList:
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
-if errorLog:
+if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0:
     print("\n---------------------------------- Log of function variants requested but not run - Tensor_misc_hip  ----------------------------------\n")
-    for error in errorLog:
-        print(error)
+    for i in range(1,len(errorLog)):
+        print(errorLog[i])
+    if(errorLog[0]["notExecutedFunctionality"] != 0):
+        print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_misc_hip were not executed since these sub-variants are not currently supported in RPP.\n")
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py
index 2466b76da..e02990801 100644
--- a/utilities/test_suite/HIP/runVoxelTests.py
+++ b/utilities/test_suite/HIP/runVoxelTests.py
@@ -39,7 +39,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 6
-errorLog = []
+errorLog = [{"notExecutedFunctionality" : 0}]
 
 def get_log_file_list(preserveOutput):
     return [
@@ -361,8 +361,10 @@ def rpp_test_suite_parser_and_validator():
     for logFile in logFileList:
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
-if errorLog:
+if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0:
     print("\n---------------------------------- Log of function variants requested but not run - Tensor_voxel_hip  ----------------------------------\n")
-    for error in errorLog:
-        print(error)
+    for i in range(1,len(errorLog)):
+        print(errorLog[i])
+    if(errorLog[0]["notExecutedFunctionality"] != 0):
+        print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_voxel_hip were not executed since these sub-variants are not currently supported in RPP.\n")
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py
index df01c3822..1466237c0 100644
--- a/utilities/test_suite/HOST/runAudioTests.py
+++ b/utilities/test_suite/HOST/runAudioTests.py
@@ -37,7 +37,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 7
-errorLog = []
+errorLog = [{"notExecutedFunctionality" : 0}]
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -211,9 +211,11 @@ def rpp_test_suite_parser_and_validator():
     for log_file in log_file_list:
         print_performance_tests_summary(log_file, "", numRuns)
 
-if errorLog:
+if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0:
     print("\n---------------------------------- Log of function variants requested but not run - Tensor_audio_host ----------------------------------\n")
-    for error in errorLog:
-        print(error)
+    for i in range(1,len(errorLog)):
+        print(errorLog[i])
+    if(errorLog[0]["notExecutedFunctionality"] != 0):
+        print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_audio_host were not executed since these sub-variants are not currently supported in RPP.\n")
     print("-----------------------------------------------------------------------------------------------")
 
diff --git a/utilities/test_suite/HOST/runImageTests.py b/utilities/test_suite/HOST/runImageTests.py
index b78f8e761..4cf5bc6de 100644
--- a/utilities/test_suite/HOST/runImageTests.py
+++ b/utilities/test_suite/HOST/runImageTests.py
@@ -41,7 +41,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 92
-errorLog = []
+errorLog = [{"notExecutedFunctionality" : 0}]
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list(preserveOutput):
@@ -487,8 +487,10 @@ def rpp_test_suite_parser_and_validator():
     for logFile in logFileList:
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
-if errorLog:
+if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0:
     print("\n---------------------------------- Log of function variants requested but not run - Tensor_image_host ----------------------------------\n")
-    for error in errorLog:
-        print(error)
+    for i in range(1,len(errorLog)):
+        print(errorLog[i])
+    if(errorLog[0]["notExecutedFunctionality"] != 0):
+        print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_image_host were not executed since these sub-variants are not currently supported in RPP.\n")
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py
index d41400389..3d5a383e1 100644
--- a/utilities/test_suite/HOST/runMiscTests.py
+++ b/utilities/test_suite/HOST/runMiscTests.py
@@ -39,7 +39,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 2
-errorLog = []
+errorLog = [{"notExecutedFunctionality" : 0}]
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -203,8 +203,10 @@ def rpp_test_suite_parser_and_validator():
     for logFile in logFileList:
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
-if errorLog:
+if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0:
     print("\n---------------------------------- Log of function variants requested but not run - Tensor_misc_host ----------------------------------\n")
-    for error in errorLog:
-        print(error)
+    for i in range(1,len(errorLog)):
+        print(errorLog[i])
+    if(errorLog[0]["notExecutedFunctionality"] != 0):
+        print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_misc_host were not executed since these sub-variants are not currently supported in RPP.\n")
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py
index a8cebfec0..54734abfa 100644
--- a/utilities/test_suite/HOST/runVoxelTests.py
+++ b/utilities/test_suite/HOST/runVoxelTests.py
@@ -39,7 +39,7 @@
 buildFolderPath = os.getcwd()
 caseMin = 0
 caseMax = 6
-errorLog = []
+errorLog = [{"notExecutedFunctionality" : 0}]
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -256,8 +256,10 @@ def rpp_test_suite_parser_and_validator():
     for logFile in logFileList:
         print_performance_tests_summary(logFile, functionalityGroupList, numRuns)
 
-if errorLog:
+if len(errorLog) > 1 or errorLog[0]["notExecutedFunctionality"] != 0:
     print("\n---------------------------------- Log of function variants requested but not run - Tensor_voxel_host ----------------------------------\n")
-    for error in errorLog:
-        print(error)
+    for i in range(1,len(errorLog)):
+        print(errorLog[i])
+    if(errorLog[0]["notExecutedFunctionality"] != 0):
+        print(str(errorLog[0]["notExecutedFunctionality"]) + " functionality variants requested by test_suite_voxel_host were not executed since these sub-variants are not currently supported in RPP.\n")
     print("-----------------------------------------------------------------------------------------------")
\ No newline at end of file
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
index 7ca0f12fc..28590ae0b 100644
--- a/utilities/test_suite/common.py
+++ b/utilities/test_suite/common.py
@@ -497,9 +497,12 @@ def log_detected(result, errorLog, caseName, functionBitDepth, functionSpecificN
     print(stdoutData.decode())
     exitCode = result.returncode
     if(exitCode != 0):
-        if exitCode > 127:
-            errorData = "Returned non-zero exit status : " + str(exitCode - 256) + " " + stderrData.decode()
+        if exitCode == 250:
+            errorLog[0]["notExecutedFunctionality"] += 1
         else:
-            errorData = "Returned non-zero exit status : " + str(exitCode) + " " + stderrData.decode()
-        msg = caseName + functionBitDepth + functionSpecificName + " - " + errorData + get_signal_name_from_return_code(exitCode)
-        errorLog.append(msg)
+            if exitCode > 127:
+                errorData = "Returned non-zero exit status : " + str(exitCode - 256) + " " + stderrData.decode()
+            else:
+                errorData = "Returned non-zero exit status : " + str(exitCode) + " " + stderrData.decode()
+            msg = caseName + functionBitDepth + functionSpecificName + " - " + errorData + get_signal_name_from_return_code(exitCode)
+            errorLog.append(msg)