Skip to content

Commit

Permalink
Merge pull request #73 from guacamoleo/develop
Browse files Browse the repository at this point in the history
fixes critical bug in LibraryClient for multiple problem types
  • Loading branch information
guacamoleo authored Feb 23, 2017
2 parents 246dac0 + e695807 commit 81ffaca
Show file tree
Hide file tree
Showing 23 changed files with 273 additions and 265 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
Tensile.egg-info
build
dist
.cache
73 changes: 6 additions & 67 deletions Tensile/BenchmarkProblems.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def benchmarkProblemType( config ):

totalBenchmarkSteps = len(benchmarkProcess)
winners = WinningParameterDict()
determinedParameters = [{}] # winner chosen from benchmark
print1("NumBenchmarkSteps: %u" % totalBenchmarkSteps)
print1("")
print1(HR)
Expand All @@ -56,7 +55,7 @@ def benchmarkProblemType( config ):
numHardcoded = len(benchmarkStep.hardcodedParameters)
stepName = str(benchmarkStep)
shortName = benchmarkStep.abbreviation()
print1("\n\n")
print1("\n")
print1(HR)
print1("# %s\n# %s" % (problemTypeName, stepName))
print1("# NumProblems: %u" % benchmarkStep.problemSizes.totalProblemSizes)
Expand Down Expand Up @@ -126,7 +125,6 @@ def benchmarkProblemType( config ):
# Enumerate Benchmark Permutations
############################################################################
solutions = []
currentSolution = {"ProblemType": deepcopy(benchmarkProcess.problemType.state) }
totalBenchmarkPermutations = 1
for benchmarkParamName in benchmarkStep.benchmarkParameters:
totalBenchmarkPermutations *= len(benchmarkStep.benchmarkParameters[benchmarkParamName])
Expand Down Expand Up @@ -213,14 +211,12 @@ def benchmarkProblemType( config ):
for j in range(0, len(solutionsForHardcoded)):
solution = solutionsForHardcoded[j]
solutionList.append(solution)
solutionsMinNaming = Solution.getMinNaming(solutionList)
if globalParameters["PrintLevel"] >= 1:
for i in range(0, len(solutions)):
solutionsForHardcoded = solutions[i]
for j in range(0, len(solutionsForHardcoded)):
solution = solutionsForHardcoded[j]
print2("# (%u:%u) %s" % (i, j, \
#Solution.getNameMin(solution, solutionsMinNaming) ))
Solution.getNameFull(solution) ))
print2(HR)

Expand Down Expand Up @@ -252,7 +248,9 @@ def benchmarkProblemType( config ):

# run runScript
process = Popen(runScriptName, cwd=globalParameters["WorkingPath"])
status = process.communicate()
process.communicate()
if process.returncode:
printWarning("Benchmark Process exited with code %u" % process.returncode)
popWorkingPath() # build


Expand All @@ -270,10 +268,6 @@ def benchmarkProblemType( config ):
YAMLIO.writeSolutions(solutionsFileName, benchmarkStep.problemSizes, \
solutions )

#solutionsFromFile = YAMLIO.readSolutions(solutionYAMLFileName)
#solutionsMinNaming = Solution.getMinNaming(solutionsFromFile)
#for solution in solutionsFromFile:

# End Iteration
popWorkingPath() # stepName
print1("%s\n# %s\n# %s: End\n%s\n" \
Expand Down Expand Up @@ -318,7 +312,6 @@ def getResults(resultsFileName, solutions):
printWarning("CSV File %s row %u doesn't have %u elements; ignoring remainer of file." \
% (resultsFileName, rowIdx, rowLength) )
break
totalFlops = float(row[problemSizeIdx])
idx = startIdx
#for i in range(0, len(numBenchmarksPerHardcoded)):
# for j in range(0, numBenchmarksPerHardcoded[i]):
Expand All @@ -327,9 +320,6 @@ def getResults(resultsFileName, solutions):
for j in range(0, len(solutionsForHardcoded)):
solution = solutionsForHardcoded[j]
gflops = float(row[idx])
#time_ms = float(row[idx])
#flops = totalFlops / (time_ms / 1000)
#gflops = flops / (1000*1000*1000)
results[i][j].append(gflops)
idx += 1
if rowIdx < 2:
Expand All @@ -349,8 +339,6 @@ def writeBenchmarkFiles(solutions, problemSizes, stepName, filesToCopy):
##############################################################################
# Min Naming
##############################################################################
solutionFileNames = []
kernelNames = []
kernels = []
for solution in solutions:
solutionKernels = solution.getKernels()
Expand Down Expand Up @@ -379,52 +367,6 @@ def writeBenchmarkFiles(solutions, problemSizes, stepName, filesToCopy):

clientName = "TensileBenchmark_%s" % stepName
writeCMake(globalParameters["WorkingPath"], solutions, filesToCopy, clientName)
"""
generatedFile = open(os.path.join(globalParameters["WorkingPath"], \
"Generated.cmake"), "w")
generatedFile.write(CMakeHeader)
generatedFile.write("set( TensileBenchmark_Solutions\n")
# write solution names
if globalParameters["MergeFiles"]:
generatedFile.write(" ${CMAKE_SOURCE_DIR}/Solutions.h\n")
generatedFile.write(" ${CMAKE_SOURCE_DIR}/Solutions.cpp\n")
else:
for solutionFileName in solutionFileNames:
generatedFile.write(" ${CMAKE_SOURCE_DIR}/Solutions/%s.h\n" \
% (solutionFileName) )
generatedFile.write(" ${CMAKE_SOURCE_DIR}/Solutions/%s.cpp\n" \
% (solutionFileName) )
generatedFile.write(" )\n")
# write kernel names
generatedFile.write("set( TensileBenchmark_Kernels\n")
if globalParameters["MergeFiles"]:
generatedFile.write(" ${CMAKE_SOURCE_DIR}/Kernels.h\n")
generatedFile.write(" ${CMAKE_SOURCE_DIR}/Kernels.cpp\n")
else:
for kernelName in kernelNames:
generatedFile.write(" ${CMAKE_SOURCE_DIR}/Kernels/%s.h\n" % (kernelName))
generatedFile.write(" ${CMAKE_SOURCE_DIR}/Kernels/%s.cpp\n" % kernelName)
generatedFile.write(" )\n")
generatedFile.write("set( TensileBenchmark_Source\n")
for fileName in filesToCopy:
generatedFile.write(" ${CMAKE_SOURCE_DIR}/%s\n" % fileName)
generatedFile.write(" )\n\n")
# benchmark parameters
generatedFile.write("set( ClientName TensileBenchmark_%s)\n" \
% (stepName) )
generatedFile.write("set( Tensile_BACKEND \"%s\")\n" \
% (globalParameters["Backend"]) )
# build parameters
generatedFile.write("set( CMAKE_BUILD_TYPE \"%s\")\n" \
% (globalParameters["CMakeBuildType"]) )
# close generated cmake
generatedFile.close()
"""

forBenchmark = True
writeClientParameters(forBenchmark, solutions, problemSizes, stepName, filesToCopy)
Expand Down Expand Up @@ -492,8 +434,8 @@ def addResults( self, hardcodedParameterList, benchmarkPermutations, \
if len(matches) != 1:
printExit("Didn't find exactly 1 match")
hardcodedParametersKey = matches[0][0]
oldWinningParameters = matches[0][1]
oldScore = matches[0][2]
#oldWinningParameters = matches[0][1]
#oldScore = matches[0][2]
self.winners[hardcodedParametersKey][0].update(winningParameters)
self.winners[hardcodedParametersKey][1] = winningScore

Expand All @@ -508,7 +450,6 @@ def __getitem__( self, hardcodedParameters ):
elif len(matches) == 0:
return None
else:
happy += 1
printExit("Didn't find exactly 1 match")

##########################################################
Expand Down Expand Up @@ -536,7 +477,6 @@ def update(self, newHardcodedParameterList ):
self.winners[FrozenDictionary(newHardcodedParameters)] = \
[ winningParameters, score ]
elif len(matches) > 1: # join
fastestIdx = -1
fastestScore = -1
fastestHardcodedParameters = {}
fastestWinningParameters = {}
Expand All @@ -547,7 +487,6 @@ def update(self, newHardcodedParameterList ):
score = match[2]
if score > fastestScore:
fastestScore = score
fastestIdx = matchIdx
fastestWinningParameters = winningParameters
fastestHardcodedParameters = hardcodedFrozen.parameters
newHardcodedParameters.update(fastestHardcodedParameters)
Expand Down
2 changes: 1 addition & 1 deletion Tensile/BenchmarkStructs.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ def __str__(self):
else:
string += "_Final"
return string
def __repr__():
def __repr__(self):
return self.__str__()


Expand Down
30 changes: 10 additions & 20 deletions Tensile/ClientWriter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from Common import globalParameters, HR, pushWorkingPath, popWorkingPath, print1, CHeader
from Common import globalParameters, HR, pushWorkingPath, popWorkingPath, print1, CHeader, printExit
from SolutionStructs import Solution
from SolutionWriter import SolutionWriter
from KernelWriter import KernelWriter
import YAMLIO

import os
Expand Down Expand Up @@ -35,7 +34,6 @@ def main( config ):
]

for f in filesToCopy:
filename = os.path.join(globalParameters["SourcePath"], f)
shutil_copy(
os.path.join(globalParameters["SourcePath"], f),
globalParameters["WorkingPath"] )
Expand Down Expand Up @@ -93,7 +91,9 @@ def main( config ):

# run runScript
process = Popen(runScriptName, cwd=globalParameters["WorkingPath"])
status = process.communicate()
process.communicate()
if process.returncode:
printWarning("Benchmark Process exited with code %u" % process.returncode)
popWorkingPath() # build

popWorkingPath() # LibraryClient
Expand Down Expand Up @@ -165,7 +165,6 @@ def writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \
# Min Naming
##############################################################################
if forBenchmark:
kernelNames = []
kernels = []
for solution in solutions:
solutionKernels = solution.getKernels()
Expand All @@ -180,20 +179,13 @@ def writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \
solutionWriter = SolutionWriter( \
solutionMinNaming, solutionSerialNaming, \
kernelMinNaming, kernelSerialNaming)
kernelWriter = KernelWriter( \
kernelMinNaming, kernelSerialNaming)

if forBenchmark:
if globalParameters["MergeFiles"]:
h += "#include \"Solutions.h\"\n"
else:
for solution in solutions:
solutionName = solutionWriter.getSolutionName(solution)
#if globalParameters["ShortNames"]:
# solutionFileName = \
# Solution.getNameSerial(solution, solutionSerialNaming)
#else:
# solutionFileName = Solution.getNameMin(solution, solutionMinNaming)
h += "#include \"" + solutionName + ".h\"\n"
h += "\n"
else:
Expand Down Expand Up @@ -279,8 +271,6 @@ def writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \
functionIdxForProblemType = 0
for functionIdxForProblemType in range(0, \
len(schedulesForProblemType[problemType])):
schedule = \
schedulesForProblemType[problemType][functionIdxForProblemType]
functionInfo.append([ \
dataTypeIdxSerial, \
problemTypeIdxForDataType, \
Expand All @@ -292,7 +282,7 @@ def writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \
functionIdxForProblemType += 1
functionIdxForDataType += 1
functionIdxSerial += 1
problemTypeIdxSerial += 1
problemTypeIdxSerial += 1
numProblemTypes = problemTypeIdxSerial
numFunctions = functionIdxSerial
h += "const unsigned int numFunctions = %u;\n" % numFunctions
Expand Down Expand Up @@ -360,7 +350,7 @@ def writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \
if i < len(indices):
h += ", %u" % indices[i]
else:
h += ", -1"
h += ", static_cast<unsigned int>(-1)"
if problemTypeIdx < numProblemTypes-1:
h += " },\n"
else:
Expand All @@ -376,7 +366,7 @@ def writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \
if i < len(indices):
h += ", %u" % indices[i]
else:
h += ", -1"
h += ", static_cast<unsigned int>(-1)"
if problemTypeIdx < numProblemTypes-1:
h += " },\n"
else:
Expand Down Expand Up @@ -557,17 +547,17 @@ def writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \
h += "TensileStatus status;\n"
if globalParameters["Backend"] == "OCL":
h += "unsigned int platformIdx = %u;\n" \
% (globalParameters["PlatformIdx"])
% (globalParameters["Platform"])
h += "unsigned int deviceIdx = %u;\n" \
% (globalParameters["DeviceIdx"])
% (globalParameters["Device"])
h += "cl_platform_id platform;\n"
h += "cl_device_id device;\n"
h += "cl_context context;\n"
h += "cl_command_queue stream;\n"
else:
h += "hipStream_t stream;\n"
h += "int deviceIdx = %u;\n" \
% (globalParameters["DeviceIdx"])
% (globalParameters["Device"])
h += "\n"
h += "void *deviceC;\n"
h += "void *deviceA;\n"
Expand Down
4 changes: 2 additions & 2 deletions Tensile/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
globalParameters["LibraryLogicPath"] = "3_LibraryLogic"
globalParameters["LibraryClientPath"] = "4_LibraryClient"
# device
globalParameters["PlatformIdx"] = 0
globalParameters["DeviceIdx"] = 0
globalParameters["Platform"] = 0
globalParameters["Device"] = 0
# benchmark behavior
globalParameters["CMakeBuildType"] = "Release" # Debug
globalParameters["ForceRedoBenchmarkProblems"] = True
Expand Down
18 changes: 7 additions & 11 deletions Tensile/Configs/rocblas_cgemm.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Parameters:
GlobalParameters:
Name: Fiji
PrintLevel: 1
ForceRedoBenchmarkProblems: False
Expand All @@ -13,8 +13,8 @@ Parameters:
ValidationPrintValids: False
ShortNames: False
MergeFiles: True
PlatformIdx: 0
DeviceIdx: 0
Platform: 0
Device: 0
DataInitType: 0

BenchmarkProblems:
Expand Down Expand Up @@ -43,11 +43,10 @@ BenchmarkProblems:
- ThreadTileEdge: [2, 4, 6, 8]
- NumLoadsCoalescedA: [ 1, 2, 3, 4, 6, 8 ]
- NumLoadsCoalescedB: [ 1, 2, 3, 4, 6, 8 ]
BenchmarkForkParameters:
- LoopUnroll: [8, 16]
BenchmarkForkParameters:
JoinParameters:
- MacroTile
- DepthU
BenchmarkJoinParameters:
BenchmarkFinalParameters:
- ProblemSizes: [ [32, 32, 32, 4000], [32, 32, 32, 4000], [2], [1536] ]
Expand Down Expand Up @@ -75,13 +74,12 @@ BenchmarkProblems:
- WorkGroupShape: [ 0 ]
- ThreadTileEdge: [2, 4, 6, 8]
- ThreadTileShape: [ 0 ]
BenchmarkForkParameters:
- NumLoadsCoalescedA: [ 1, 2, 3, 4, 6, 8 ]
- NumLoadsCoalescedB: [ 1, 2, 3, 4, 6, 8 ]
- LoopUnroll: [8, 16]
BenchmarkForkParameters:
JoinParameters:
- MacroTile
- DepthU
BenchmarkJoinParameters:
BenchmarkFinalParameters:
- ProblemSizes: [ [32, 32, 32, 4000], [32, 32, 32, 4000], [2], [1536] ]
Expand Down Expand Up @@ -109,13 +107,12 @@ BenchmarkProblems:
- WorkGroupShape: [ 0 ]
- ThreadTileEdge: [2, 4, 6, 8]
- ThreadTileShape: [ 0 ]
BenchmarkForkParameters:
- NumLoadsCoalescedA: [ 1, 2, 3, 4, 6, 8 ]
- NumLoadsCoalescedB: [ 1, 2, 3, 4, 6, 8 ]
- LoopUnroll: [8, 16]
BenchmarkForkParameters:
JoinParameters:
- MacroTile
- DepthU
BenchmarkJoinParameters:
BenchmarkFinalParameters:
- ProblemSizes: [ [32, 32, 32, 4000], [32, 32, 32, 4000], [2], [1536] ]
Expand Down Expand Up @@ -143,13 +140,12 @@ BenchmarkProblems:
- WorkGroupShape: [ 0 ]
- ThreadTileEdge: [2, 4, 6, 8]
- ThreadTileShape: [ 0 ]
BenchmarkForkParameters:
- NumLoadsCoalescedA: [ 1, 2, 3, 4, 6, 8 ]
- NumLoadsCoalescedB: [ 1, 2, 3, 4, 6, 8 ]
- LoopUnroll: [8, 16]
BenchmarkForkParameters:
JoinParameters:
- MacroTile
- DepthU
BenchmarkJoinParameters:
BenchmarkFinalParameters:
- ProblemSizes: [ [32, 32, 32, 4000], [32, 32, 32, 4000], [2], [1536] ]
Expand Down
Loading

0 comments on commit 81ffaca

Please sign in to comment.