Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
guacamoleo committed Aug 29, 2017
2 parents bb95a21 + e3ea5b4 commit 98cf4f2
Show file tree
Hide file tree
Showing 22 changed files with 5,387 additions and 3,733 deletions.
12 changes: 11 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
parallel rocm_fiji: {

currentBuild.result = "SUCCESS"
node('rocm-1.5&& fiji')
node('rocm-1.6 && fiji')
{
def scm_dir = pwd()
def build_dir_debug = "${scm_dir}/test/debug"
Expand Down Expand Up @@ -65,13 +65,23 @@ parallel rocm_fiji: {
// run jenkins tests
dir("${build_dir_release}") {
stage("unit tests") {

// defaults
sh "tensile ../../Tensile/Configs/test_hgemm_defaults.yaml hgemm_defaults"
sh "tensile ../../Tensile/Configs/test_sgemm_defaults.yaml sgemm_defaults"
sh "tensile ../../Tensile/Configs/test_dgemm_defaults.yaml dgemm_defaults"

// thorough tests
sh "tensile --runtime-language=HIP --kernel-language=HIP ../../Tensile/Configs/test_hgemm.yaml hgemm"
sh "tensile --runtime-language=HIP --kernel-language=HIP ../../Tensile/Configs/test_sgemm.yaml sgemm"

// vectors
sh "tensile --runtime-language=HIP --kernel-language=HIP ../../Tensile/Configs/test_hgemm_vectors.yaml hgemm_vectors"
sh "tensile --runtime-language=HIP --kernel-language=HIP ../../Tensile/Configs/test_sgemm_vectors.yaml sgemm_vectors"

// assembly
sh "tensile ../../Tensile/Configs/sgemm_gfx803.yaml sgemm_gfx803"

// TODO re-enable when jenkins supports opencl
//sh "tensile --runtime-language=OCL --kernel-language=OCL ../../Tensile/Configs/test_sgemm_vectors.yaml sgemm_vectors"
}
Expand Down
13 changes: 10 additions & 3 deletions Tensile/BenchmarkProblems.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@
import time

from BenchmarkStructs import BenchmarkProcess
from Common import globalParameters, HR, pushWorkingPath, popWorkingPath, print1, print2, printExit, printWarning, ensurePath, startTime, ProgressBar
from Common import globalParameters, HR, pushWorkingPath, popWorkingPath, print1, print2, printExit, printWarning, ensurePath, startTime, ProgressBar, kernelLanguageIsSource
from SolutionStructs import Solution, ProblemType
from SolutionWriter import SolutionWriter
from KernelWriterSource import KernelWriterSource
from KernelWriterAssembly import KernelWriterAssembly
from ClientWriter import writeRunScript, writeClientParameters
from TensileCreateLibrary import writeSolutionsAndKernels, writeCMake
import YAMLIO
Expand Down Expand Up @@ -234,6 +235,8 @@ def benchmarkProblemType( problemTypeConfig, problemSizeGroupConfig, \
for j in range(0, len(solutionsForHardcoded)):
solution = solutionsForHardcoded[j]
solutionList.append(solution)
if len(solutionList) == 0:
printExit("Your parameters resulted in 0 valid solutions.\nYou should re-run with \"PrintSolutionRejectionReason: True\" to see why each parameter combination was rejected.")
if globalParameters["PrintLevel"] >= 1:
for i in range(0, len(solutions)):
solutionsForHardcoded = solutions[i]
Expand Down Expand Up @@ -380,8 +383,12 @@ def writeBenchmarkFiles(solutions, problemSizes, stepName, filesToCopy):
solutionWriter = SolutionWriter( \
solutionMinNaming, solutionSerialNaming, \
kernelMinNaming, kernelSerialNaming)
kernelWriter = KernelWriterSource( \
kernelMinNaming, kernelSerialNaming)
if kernelLanguageIsSource():
kernelWriter = KernelWriterSource( \
kernelMinNaming, kernelSerialNaming)
else:
kernelWriter = KernelWriterAssembly( \
kernelMinNaming, kernelSerialNaming)

# write solution, kernels and CMake
writeSolutionsAndKernels( \
Expand Down
4 changes: 3 additions & 1 deletion Tensile/BenchmarkStructs.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ def fillInMissingStepsWithDefaults(self, config):
for paramDict in copy(stepList):
for paramName in copy(paramDict):
paramValues = paramDict[paramName]
if paramValues == None:
printExit("You must specify value for parameters \"%s\"" % paramName )
if len(paramValues) < 2 and paramName != "ProblemSizes":
paramDict.pop(paramName)
#self.benchmarkCommonParameters.insert(0, {paramName: paramValues })
Expand Down Expand Up @@ -380,7 +382,7 @@ def convertParametersToSteps(self):
print1("# Join Parameters")
macroTileJoinSet = set()
totalPermutations = 1
if len(self.joinParameters) > 1:
if len(self.joinParameters) > 0:
for joinName in self.joinParameters:
# joining a parameter with only a single value
if hasParam(joinName, self.singleValueParameters):
Expand Down
9 changes: 5 additions & 4 deletions Tensile/ClientWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,13 @@ def writeRunScript(path, libraryLogicPath, forBenchmark):
runScriptFile.write("\n")
else:
if globalParameters["PinClocks"]:
runScriptFile.write("rocm-smi -d 0 --setfan 255 --setsclk 7\n")
runScriptFile.write("/opt/rocm/bin/rocm-smi -d 0 --setfan 255 --setsclk 7\n")
runScriptFile.write("sleep 1\n")
runScriptFile.write("rocm-smi -d 0 -a\n")
runScriptFile.write("/opt/rocm/bin/rocm-smi -d 0 -a\n")
runScriptFile.write("./client\n")
if globalParameters["PinClocks"]:
runScriptFile.write("rocm-smi -d 0 --resetclocks\n")
runScriptFile.write("/opt/rocm/bin/rocm-smi -d 0 --resetclocks\n")
runScriptFile.write("/opt/rocm/bin/rocm-smi -d 0 --setfan 255\n")
else:
executablePath = os.path.join(globalParameters["WorkingPath"])
if os.name == "nt":
Expand Down Expand Up @@ -564,7 +565,7 @@ def writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \
h += "\n"
# Solution Ptrs
h += "typedef TensileStatus (*SolutionFunctionPointer)(\n"
argList = solutionWriter.getArgList(solutions[0]["ProblemType"], True, True)
argList = solutionWriter.getArgList(solutions[0]["ProblemType"], True, True, True)
for i in range(0, len(argList)):
h += " %s %s%s" % (argList[i][0], argList[i][1], \
",\n" if i < len(argList)-1 else ");\n\n")
Expand Down
31 changes: 27 additions & 4 deletions Tensile/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
# print level
globalParameters["PrintLevel"] = 1
globalParameters["LibraryPrintDebug"] = False
globalParameters["DebugKernel"] = False
globalParameters["PrintSolutionRejectionReason"] = False
# paths
globalParameters["ScriptPath"] = os.path.dirname(os.path.realpath(__file__))
Expand All @@ -69,6 +70,7 @@
globalParameters["SyncsPerBenchmark"] = 1
globalParameters["PinClocks"] = False
globalParameters["KernelTime"] = False
globalParameters["AssemblerPath"] = "/opt/rocm/bin/hcc"

# file heirarchy
globalParameters["ShortNames"] = False
Expand All @@ -77,8 +79,8 @@
globalParameters["NumElementsToValidate"] = 128
globalParameters["ValidationMaxToPrint"] = 4
globalParameters["ValidationPrintValids"] = False
globalParameters["DataInitTypeAB"] = 0 # 0=rand, 1=1, 2=serial, 3=0
globalParameters["DataInitTypeC"] = 0 # 0=rand, 1=1, 2=serial, 3=0
globalParameters["DataInitTypeAB"] = 3 # 0=0, 1=1, 2=serial, 3=rand, 4=NaN
globalParameters["DataInitTypeC"] = 3 # 0=0, 1=1, 2=serial, 3=rand, 4=NaN
# protect against invalid kernel
globalParameters["MaxLDS"] = 32768
globalParameters["DeviceLDS"] = 32768
Expand All @@ -97,7 +99,7 @@
validWorkGroups.append(workGroup)


validThreadTileSides = [1, 2, 3, 4, 5, 6, 7, 8]
validThreadTileSides = [1, 2, 3, 4, 5, 6, 7, 8, 12, 16]
validThreadTiles = []
for i in validThreadTileSides:
for j in validThreadTileSides:
Expand All @@ -120,16 +122,23 @@
"UnrollMemFence": [ False, True ],
"GlobalSplitUWorkGroupMappingRoundRobin": [ False, True ],
"GlobalSplitUSummationAssignmentRoundRobin": [ False, True ],
"GlobalRead2A": [ False, True ],
"GlobalRead2B": [ False, True ],
"LocalWrite2A": [ False, True ],
"LocalWrite2B": [ False, True ],
"LocalRead2A": [ False, True ],
"LocalRead2B": [ False, True ],

"WorkGroupMapping": range(-1024,1024+1),
"WorkGroupMappingType": ["B", "Z"], # Blocking, S-order
"MaxOccupancy": range(1, 40+1), # wg / CU
"WorkGroup": validWorkGroups,
"ThreadTile": validThreadTiles,
"NumLoadsCoalescedA": range(-1, 64+1),
"NumLoadsCoalescedB": range(-1, 64+1),
"DepthU": range(2, 256+1, 2),
"GlobalSplitU": range(1, 64+1),
"VectorWidth": [ -1, 1, 2, 4, 8 ],
"VectorWidth": [ -1, 1, 2, 3, 4, 6, 8, 12, 16 ],
"LdsPad": [ 0, 1 ],
"MacroTileShapeMin": range(1, 64+1),
"MacroTileShapeMax": range(1, 64+1),
Expand All @@ -153,6 +162,12 @@
{"PrefetchGlobalRead": [ True ] },
{"PrefetchLocalRead": [ True ] },
{"UnrollMemFence": [ False ] },
{"GlobalRead2A": [ True ] },
{"GlobalRead2B": [ True ] },
{"LocalWrite2A": [ True ] },
{"LocalWrite2B": [ True ] },
{"LocalRead2A": [ True ] },
{"LocalRead2B": [ True ] },
{"GlobalSplitU": [ 1 ] },
{"GlobalSplitUWorkGroupMappingRoundRobin": [ True ] },
{"GlobalSplitUSummationAssignmentRoundRobin": [ True ] },
Expand All @@ -161,6 +176,7 @@
{"NumLoadsCoalescedA": [ 1 ] },
{"NumLoadsCoalescedB": [ 1 ] },
{"WorkGroup": [ [16,16,1]] },
{"WorkGroupMappingType": [ "B" ] },
{"WorkGroupMapping": [ 1 ] },
{"ThreadTile": [ [4,4] ] },
{"DepthU": [ 16 ] },
Expand Down Expand Up @@ -215,6 +231,13 @@
}


################################################################################
# Kernel Language Belongs to Source or Assembly?
################################################################################
def kernelLanguageIsSource():
return globalParameters["KernelLanguage"] \
in ["OCL", "HIP"]

################################################################################
# Searching Nested Lists / Dictionaries
################################################################################
Expand Down
66 changes: 66 additions & 0 deletions Tensile/Configs/convolution.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
GlobalParameters:
MinimumRequiredVersion: 3.0.0
PrintLevel: 1
ForceRedoBenchmarkProblems: True
ForceRedoLibraryLogic: True
ForceRedoLibraryClient: True
CMakeBuildType: Release
EnqueuesPerSync: 1
SyncsPerBenchmark: 1
LibraryPrintDebug: False
NumElementsToValidate: -1
ValidationMaxToPrint: 4
ValidationPrintValids: False
ShortNames: False
MergeFiles: True
Platform: 0
Device: 0

BenchmarkProblems:

- # 7-D Image Convolution
- # ProblemType
OperationType: TensorContraction
DataType: s
UseBeta: True
NumIndicesC: 4
IndexAssignmentsA: [6, 5, 0, 1, 4, 3]
IndexAssignmentsB: [6, 5, 1, 4, 2, 3]

- # BenchmarkProblemSizeGroup - Standard
InitialSolutionParameters:
BenchmarkCommonParameters:
- ProblemSizes:
- Exact: [ 32, 32, 32, 100, 3, 5, 5 ] # caffe layer 1
- Exact: [ 16, 16, 32, 100, 32, 5, 5 ] # caffe layer 2
- Exact: [ 8, 8, 64, 100, 32, 5, 5 ] # caffe layer 3
- EdgeType: ["ShiftPtr"]
- WorkGroupMapping: [ 1 ]
- LoopDoWhile: [False]
- LoopTail: [True]
ForkParameters:
- ThreadTile:
- [ 4, 4 ]
- [ 2, 2 ]
- WorkGroup:
- [ 4, 8, 2 ]
- [ 8, 8, 1 ]
- [ 16, 16, 1 ]
- DepthU: [2, 4]
BenchmarkForkParameters:
JoinParameters:
- MacroTile
BenchmarkJoinParameters:
BenchmarkFinalParameters:
- ProblemSizes:
- Exact: [ 32, 32, 32, 100, 3, 5, 5 ] # caffe layer 1
- Exact: [ 16, 16, 32, 100, 32, 5, 5 ] # caffe layer 2
- Exact: [ 8, 8, 64, 100, 32, 5, 5 ] # caffe layer 3



LibraryLogic:
ScheduleName: Fiji
DeviceNames: ["R9 Nano"]

LibraryClient:
Loading

0 comments on commit 98cf4f2

Please sign in to comment.