Skip to content

Commit

Permalink
Implement MessagePack backend (ROCm#904)
Browse files Browse the repository at this point in the history
* Implement MessagePack backend

* Install msgpack on Docker by adding Debian repo and installing from there

* Revert "Install msgpack on Docker by adding Debian repo and installing from there"

This reverts commit 94ae9de.
  • Loading branch information
Yakov Lipkovich authored Jun 9, 2020
1 parent a93bc68 commit ab39002
Show file tree
Hide file tree
Showing 38 changed files with 701 additions and 188 deletions.
6 changes: 3 additions & 3 deletions HostLibraryTests/ContractionLibraryLoading_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ using namespace Tensile;
TEST(ContractionLibraryLoadingTest, MultipleKernels)
{
auto library = LoadLibraryFile<ContractionProblem>(
TestData::Instance().file("SampleTensileKernels.yaml").native());
TestData::Instance().file("SampleTensileKernels").native());
ASSERT_NE(library, nullptr);

AMDGPU hardware;
Expand Down Expand Up @@ -87,7 +87,7 @@ TEST(ContractionLibraryLoadingTest, MultipleKernels)

TEST(ContractionLibraryLoadingTest, SGEMM_Kernels_Lite)
{
auto library = LoadLibraryFile<ContractionProblem>(
TestData::Instance().file("KernelsLite.yaml").native());
auto library
= LoadLibraryFile<ContractionProblem>(TestData::Instance().file("KernelsLite").native());
ASSERT_NE(library, nullptr);
}
4 changes: 2 additions & 2 deletions HostLibraryTests/TestData_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ TEST(TestData, Simple)
auto is_regular_file
= static_cast<bool (*)(boost::filesystem::path const&)>(boost::filesystem::is_regular_file);

EXPECT_PRED1(is_regular_file, data.file("KernelsLite.yaml"));
EXPECT_PRED1(is_regular_file, data.file("KernelsLite"));
EXPECT_FALSE(is_regular_file(data.file("fjdlksljfjldskj")));

auto files = data.glob("*.yaml");
auto files = data.glob(std::string("*.") + TestData::defaultExtension);
EXPECT_EQ(files.size(), 6);
for(auto file : files)
{
Expand Down
32 changes: 21 additions & 11 deletions HostLibraryTests/configs/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@

set(SOLUTION_LIBRARY_FILES
${SOLUTION_LIBRARY_FILES}
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsLite.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsLiteMixed.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsLiteNavi.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsTileLite.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/SampleTensileKernels.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/rocBLAS_Full.yaml"
PARENT_SCOPE)

if(Tensile_YAML)
set(SOLUTION_LIBRARY_FILES
${SOLUTION_LIBRARY_FILES}
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsLite.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsLiteMixed.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsLiteNavi.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsTileLite.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/SampleTensileKernels.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/rocBLAS_Full.yaml"
PARENT_SCOPE)
else()
set(SOLUTION_LIBRARY_FILES
${SOLUTION_LIBRARY_FILES}
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsLite.dat"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsLiteMixed.dat"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsLiteNavi.dat"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/KernelsTileLite.dat"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/SampleTensileKernels.dat"
"${CMAKE_CURRENT_SOURCE_DIR}/SolutionLibraries/rocBLAS_Full.dat"
PARENT_SCOPE)
endif()
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
27 changes: 15 additions & 12 deletions HostLibraryTests/hip/RunGEMMKernelTileSelection_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,17 +176,20 @@ struct RunGEMMKernelSolutionSelectionTest : public ::testing::TestWithParam<Cont
#endif
}

void TearDown() override
{
hipFree(a_d);
hipFree(b_d);
hipFree(c_d);
hipFree(d_d);
hipFree(d_ref_d);
}

hipDeviceReset();
}
};
void TearDown() override
{
hipFree(a_d);
hipFree(b_d);
hipFree(c_d);
hipFree(d_d);
hipFree(d_ref_d);

hipDeviceReset();
}
}
;

TEST_P(RunGEMMKernelSolutionSelectionTest, KernelsTileSelection)
{
Expand All @@ -195,11 +198,11 @@ TEST_P(RunGEMMKernelSolutionSelectionTest, KernelsTileSelection)
// std::cout << problem << std::endl;

auto library = LoadLibraryFile<ContractionProblem>(
TestData::File("kernels_tile_selection/TensileLibrary.yaml").native());
TestData::Instance().file("kernels_tile_selection/TensileLibrary").native());

hip::SolutionAdapter adapter(false);
adapter.loadCodeObjectFile(
TestData::File("kernels_tile_selection/TensileLibrary_gfx906.co").native());
TestData::Instance().file("kernels_tile_selection/TensileLibrary_gfx906", "co").native());

ASSERT_NE(library, nullptr);

Expand Down
9 changes: 4 additions & 5 deletions HostLibraryTests/hip/RunGEMMKernel_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ std::vector<std::tuple<std::shared_ptr<SolutionLibrary<ContractionProblem>>,

{
auto library = LoadLibraryFile<ContractionProblem>(
TestData::Instance().file("kernels_lite/TensileLibrary.yaml").native());
TestData::Instance().file("kernels_lite/TensileLibrary").native());
auto adapter = std::make_shared<hip::SolutionAdapter>(debug, "kernels_lite (file)");
for(auto file : TestData::Instance().glob("kernels_lite/*.*co"))
adapter->loadCodeObjectFile(file.native());
Expand All @@ -544,7 +544,7 @@ std::vector<std::tuple<std::shared_ptr<SolutionLibrary<ContractionProblem>>,

{
auto library = LoadLibraryFile<ContractionProblem>(
TestData::Instance().file("kernels_lite_mixed/TensileLibrary.yaml").native());
TestData::Instance().file("kernels_lite_mixed/TensileLibrary").native());
auto adapter = std::make_shared<hip::SolutionAdapter>(debug, "kernels_lite_mixed (file)");
for(auto file : TestData::Instance().glob("kernels_lite_mixed/*.*co"))
adapter->loadCodeObjectFile(file.native());
Expand All @@ -554,7 +554,7 @@ std::vector<std::tuple<std::shared_ptr<SolutionLibrary<ContractionProblem>>,

{
auto library = LoadLibraryFile<ContractionProblem>(
TestData::Instance().file("tile_aware_selection/library/TensileLibrary.yaml").native());
TestData::Instance().file("tile_aware_selection/library/TensileLibrary").native());

auto adapter = std::make_shared<hip::SolutionAdapter>(debug, "tile_aware_selection");
for(auto file : TestData::Instance().glob("tile_aware_selection/library/*.*co"))
Expand All @@ -569,8 +569,7 @@ std::vector<std::tuple<std::shared_ptr<SolutionLibrary<ContractionProblem>>,
auto envDir = TestData::Env("TENSILE_TEST_LIBRARY");
if(envDir)
{
auto library
= LoadLibraryFile<ContractionProblem>(envDir.file("TensileLibrary.yaml").native());
auto library = LoadLibraryFile<ContractionProblem>(envDir.file("TensileLibrary").native());
auto adapter = std::make_shared<hip::SolutionAdapter>(debug, "TENSILE_TEST_LIBRARY");

for(auto file : envDir.glob("*.co"))
Expand Down
30 changes: 15 additions & 15 deletions HostLibraryTests/llvm/LibraryPerformance_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@ using namespace Tensile;
/**
* LibraryPerformanceTest:
*
* This suite contains micro-benchmarks for pieces of the runtime library. It
* does not exercise any of the Hip-specific code.
* This suite contains micro-benchmarks for pieces of the runtime library. It does not
* exercise any of the Hip-specific code.
*
* There are no performance-based assertions or checks. The timing results are
* provided by googletest.
* There are no performance-based assertions or checks. The timing results are provided by
* googletest.
*
* Most of these tests depend on a library being loaded from a YAML file. The
* library objects are cached so that the deserialization time is not a part of
* the actual test (outside of the LoadLibrary test). PopulateCache is an empty
* test whose purpose is to ensure the cache is populated for the actual tests.
* Most of these tests depend on a library being loaded from a DAT/YAML file. The library objects
* are cached so that the deserialization time is not a part of the actual test (outside of the
* LoadLibrary test). PopulateCache is an empty test whose purpose is to ensure the cache is
* populated for the actual tests.
*/
struct LibraryPerformanceTest
: public ::testing::TestWithParam<std::tuple<AMDGPU, std::string, bool, bool>>
Expand Down Expand Up @@ -223,17 +223,17 @@ std::vector<LibraryPerformanceTest::ParamType> GetParams()

for(auto const& gpu : gpus)
{
rv.push_back(std::make_tuple(gpu, "KernelsLite.yaml", false, false));
rv.push_back(std::make_tuple(gpu, "KernelsLiteMixed.yaml", false, true));
rv.push_back(std::make_tuple(gpu, "KernelsLiteNavi.yaml", true, false));
rv.push_back(std::make_tuple(gpu, "KernelsTileLite.yaml", false, false));
rv.push_back(std::make_tuple(gpu, "rocBLAS_Full.yaml", false, true));
rv.push_back(std::make_tuple(gpu, "KernelsLite", false, false));
rv.push_back(std::make_tuple(gpu, "KernelsLiteMixed", false, true));
rv.push_back(std::make_tuple(gpu, "KernelsLiteNavi", true, false));
rv.push_back(std::make_tuple(gpu, "KernelsTileLite", false, false));
rv.push_back(std::make_tuple(gpu, "rocBLAS_Full", false, true));
}

rv.push_back(std::make_tuple(
AMDGPU(AMDGPU::Processor::gfx908, 64, "Arcturus"), "rocBLAS_Full.yaml", false, true));
AMDGPU(AMDGPU::Processor::gfx908, 64, "Arcturus"), "rocBLAS_Full", false, true));
rv.push_back(std::make_tuple(
AMDGPU(AMDGPU::Processor::gfx1010, 40, "Navi"), "KernelsLiteNavi.yaml", true, true));
AMDGPU(AMDGPU::Processor::gfx1010, 40, "Navi"), "KernelsLiteNavi", true, true));

return rv;
}
Expand Down
4 changes: 3 additions & 1 deletion HostLibraryTests/testlib/include/TestData.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ struct TestData : public Tensile::LazySingleton<TestData>

boost::filesystem::path dataDir() const;

boost::filesystem::path file(std::string const& filename) const;
static const std::string defaultExtension;
boost::filesystem::path file(std::string const& filename,
std::string const& extension = defaultExtension) const;

std::vector<boost::filesystem::path> glob(std::string const& pattern) const;

Expand Down
11 changes: 9 additions & 2 deletions HostLibraryTests/testlib/source/TestData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@

#include <Tensile/Utils.hpp>

#ifdef TENSILE_YAML
const std::string TestData::defaultExtension = "yaml";
#else
const std::string TestData::defaultExtension = "dat";
#endif

TestData::operator bool() const
{
return boost::filesystem::is_directory(dataDir());
Expand All @@ -64,9 +70,10 @@ boost::filesystem::path TestData::dataDir() const
return m_dataDir;
}

boost::filesystem::path TestData::file(std::string const& filename) const
boost::filesystem::path TestData::file(std::string const& filename,
std::string const& extension) const
{
return dataDir() / filename;
return dataDir() / (filename + "." + extension);
}

std::vector<boost::filesystem::path> TestData::glob(std::string const& pattern) const
Expand Down
9 changes: 5 additions & 4 deletions Tensile/BenchmarkProblems.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

from . import ClientExecutable
from . import SolutionLibrary
from . import YAMLIO
from . import LibraryIO
from . import Utils
from .BenchmarkStructs import BenchmarkProcess
from .ClientWriter import runClient, writeClientParameters, writeClientConfig
Expand Down Expand Up @@ -362,7 +362,7 @@ def benchmarkProblemType( problemTypeConfig, problemSizeGroupConfig, \
############################################################################
# Write Solutions YAML
############################################################################
YAMLIO.writeSolutions(solutionsFileName, benchmarkStep.problemSizes, \
LibraryIO.writeSolutions(solutionsFileName, benchmarkStep.problemSizes, \
solutions )

# End Iteration
Expand Down Expand Up @@ -520,11 +520,12 @@ def writeBenchmarkFiles(stepBaseDir, solutions, problemSizes, stepName, filesToC
globalParameters["WorkingPath"], globalParameters["CxxCompiler"], [problemType], solutions, kernels, kernelsBetaOnly, \
solutionWriter, kernelWriterSource, kernelWriterAssembly, errorTolerant=True )

newLibraryFilename = "TensileLibrary.yaml" if globalParameters["YAML"] else "TensileLibrary.dat"
newLibraryDir = ensurePath(os.path.join(globalParameters["WorkingPath"], 'library'))
newLibraryFile = os.path.join(newLibraryDir, "TensileLibrary.yaml")
newLibraryFile = os.path.join(newLibraryDir, newLibraryFilename)
newLibrary = SolutionLibrary.MasterSolutionLibrary.BenchmarkingLibrary(solutions)
newLibrary.applyNaming(kernelMinNaming)
YAMLIO.write(newLibraryFile, Utils.state(newLibrary))
LibraryIO.configWriter(globalParameters["YAML"]).write(newLibraryFile, Utils.state(newLibrary))

codeObjectFiles = [os.path.relpath(f, globalParameters["WorkingPath"]) for f in codeObjectFiles]

Expand Down
7 changes: 4 additions & 3 deletions Tensile/ClientWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from .Common import globalParameters, HR, pushWorkingPath, popWorkingPath, print1, CHeader, printWarning, listToInitializer, ClientExecutionLock
from . import ClientExecutable
from . import Common
from . import YAMLIO
from . import LibraryIO

import os
import subprocess
Expand Down Expand Up @@ -98,7 +98,7 @@ def main( config ):
for logicFileName in logicFiles:
(scheduleName, deviceNames, problemType, solutionsForType, \
indexOrder, exactLogic, rangeLogic, newLibrary, architectureName) \
= YAMLIO.readLibraryLogicForSchedule(logicFileName)
= LibraryIO.readLibraryLogicForSchedule(logicFileName)
if problemType["DataType"].isHalf():
enableHalf = True
functions.append((scheduleName, problemType))
Expand Down Expand Up @@ -559,7 +559,8 @@ def param(key, value):
f.write("{}={}\n".format(key, value))

sourceDir = os.path.join(stepBaseDir, "source")
libraryFile = os.path.join(sourceDir, "library", "TensileLibrary.yaml")
libraryFilename = "TensileLibrary.yaml" if globalParameters["YAML"] else "TensileLibrary.dat"
libraryFile = os.path.join(sourceDir, "library", libraryFilename)
param("library-file", libraryFile)

currentGFXName = Common.gfxName(globalParameters["CurrentISA"])
Expand Down
1 change: 1 addition & 0 deletions Tensile/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
########################################
globalParameters["CMakeBuildType"] = "Release" # whether benchmark clients and library client should be release or debug
globalParameters["PrintSolutionRejectionReason"] = False # when a solution is marked as invalid, print why
globalParameters["YAML"] = False # whether to use the YAML backend (as opposed to the MessagePack backend)

# how to initialize tensor data
# serial-in-u will use a sequence that increments in the K dimension
Expand Down
4 changes: 2 additions & 2 deletions Tensile/Component.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
################################################################################
# Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved.
# Copyright 2020 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -72,7 +72,7 @@ def PartialMatch(pattern, obj):
for key, value in pattern.items():
if key not in obj:
return False

objValue = obj[key]
if isinstance(value, collections.abc.Mapping) and \
isinstance(objValue, collections.abc.Mapping):
Expand Down
2 changes: 1 addition & 1 deletion Tensile/Components/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

def use(): pass

__all__ = ["MAC_F16"]
__all__ = ["MAC_F16"]
4 changes: 2 additions & 2 deletions Tensile/KernelWriterAssembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -6027,7 +6027,7 @@ def endSummation(self, kernel):
# lastVgprForReads ^ ^ ^
# startVgprReuse ^ ^
# lastValuC ^
# if valuC does not include all of lastVgprForReads, we can reuse the
# if valuC does not include all of lastVgprForReads, we can reuse the
# non-overlapped part of lastVgprForReads
# |<-------------- valuC -------------->|
# |xxxxxxxxxxxxxxxxxxxxx|xxxxxxxxxxxxxxx|oooooo|xx|
Expand All @@ -6036,7 +6036,7 @@ def endSummation(self, kernel):
# startVgprReuse ^
vbegin = self.numVgprValuC
vsize = max(0, self.lastVgprForReads-self.numVgprValuC)
else:
else:
vbegin = self.startVgprValuA
vsize = self.lastVgprForReads - self.startVgprValuA

Expand Down
Loading

0 comments on commit ab39002

Please sign in to comment.