You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am trying GemmGrouped in cutlass. with cutlass::arch::OpClassTensorOp, the following codes can be compiled successfully. but with cutlass::arch::OpClassSimt, there is compiling error.
The reason why I try OpClassSimt is that I need do some grouped Gemm on sm70, with double precision. Currently, I cannot compile with OpClassSimt+sm80. Thanks.
//using MMAOp = cutlass::arch::OpClassSimt; //compiling error with this
using MMAOp = cutlass::arch::OpClassTensorOp; //compile successfully
using ElemType = double;
using ElementA = ElemType ;
using ElementB = ElemType ;
using ElementOutput = ElemType ;
using ElementAccumulator = double;
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
using LayoutC = cutlass::layout::ColumnMajor;
using Gemm_8 = typename cutlass::gemm::kernel::DefaultGemmGrouped<
ElementA, LayoutA, cutlass::ComplexTransform::kNone, 1, ElementB, LayoutB, cutlass::ComplexTransform::kNone, 1, ElementOutput, LayoutC,
ElementAccumulator, MMAOp, cutlass::arch::Sm80,
cutlass::gemm::GemmShape<32, 32, 16>, cutlass::gemm::GemmShape<16, 16, 16>, cutlass::gemm::GemmShape<8, 8, 4>,
cutlass::epilogue::thread::LinearCombination<
ElementOutput, 64 / cutlass::sizeof_bits<ElementOutput>::value,
ElementAccumulator, ElementAccumulator>,
cutlass::gemm::threadblock::GemmBatchedIdentityThreadblockSwizzle,
4>::GemmKernel;
compiling error for cutlass::arch::OpClassSimt
cutlass-main/include/cutlass/gemm/kernel/default_gemm_grouped.h(214): error: incomplete type is not allowed
using DefaultGemmKernel = typename kernel::DefaultGemm<
^
detected during instantiation of class "cutlass::gemm::kernel::DefaultGemmGrouped<ElementA, LayoutA, cutlass::ComplexTransform::kNone, kAlignmentA, ElementB, LayoutB, cutlass::ComplexTransform::kNone, kAlignmentB, ElementC, LayoutC, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, GroupScheduleMode_, Operator, SharedMemoryClear, PermuteDLayout, std::enable_if<<expression>, void>::type> [with ElementA=gs::ElementA, LayoutA=gs::LayoutA, kAlignmentA=1, ElementB=gs::ElementB, LayoutB=gs::LayoutB, kAlignmentB=1, ElementC=gs::ElementOutput, LayoutC=gs::LayoutC, ElementAccumulator=gs::ElementAccumulator, OperatorClass=gs::MMAOp, ArchTag=cutlass::arch::Sm80, ThreadblockShape=cutlass::gemm::GemmShape<32, 32, 16>, WarpShape=cutlass::gemm::GemmShape<16, 16, 16>, InstructionShape=cutlass::gemm::GemmShape<8, 8, 4>, EpilogueOutputOp=cutlass::epilogue::thread::LinearCombination<gs::ElementOutput, 1, gs::ElementAccumulator, gs::ElementAccumulator, cutlass::epilogue::thread::ScaleType::Default, cutlass::FloatRoundStyle::round_to_nearest, gs::ElementOutput>, ThreadblockSwizzle=cutlass::gemm::threadblock::GemmBatchedIdentityThreadblockSwizzle, Stages=4, GroupScheduleMode_=cutlass::gemm::kernel::GroupScheduleMode::kDeviceOnly, Operator=cutlass::arch::OpMultiplyAdd, SharedMemoryClear=cutlass::gemm::SharedMemoryClearOption::kNone, PermuteDLayout=cutlass::layout::NoPermute]" at line 45 of xx.cu
cutlass/gemm/kernel/default_gemm_grouped.h(243): error: a class or namespace qualified name is required
typename DefaultGemmKernel::Mma,
^
detected during instantiation of class "cutlass::gemm::kernel::DefaultGemmGrouped<ElementA, LayoutA, cutlass::ComplexTransform::kNone, kAlignmentA, ElementB, LayoutB, cutlass::ComplexTransform::kNone, kAlignmentB, ElementC, LayoutC, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, GroupScheduleMode_, Operator, SharedMemoryClear, PermuteDLayout, std::enable_if<<expression>, void>::type> [with ElementA=gs::ElementA, LayoutA=gs::LayoutA, kAlignmentA=1, ElementB=gs::ElementB, LayoutB=gs::LayoutB, kAlignmentB=1, ElementC=gs::ElementOutput, LayoutC=gs::LayoutC, ElementAccumulator=gs::ElementAccumulator, OperatorClass=gs::MMAOp, ArchTag=cutlass::arch::Sm80, ThreadblockShape=cutlass::gemm::GemmShape<32, 32, 16>, WarpShape=cutlass::gemm::GemmShape<16, 16, 16>, InstructionShape=cutlass::gemm::GemmShape<8, 8, 4>, EpilogueOutputOp=cutlass::epilogue::thread::LinearCombination<gs::ElementOutput, 1, gs::ElementAccumulator, gs::ElementAccumulator, cutlass::epilogue::thread::ScaleType::Default, cutlass::FloatRoundStyle::round_to_nearest, gs::ElementOutput>, ThreadblockSwizzle=cutlass::gemm::threadblock::GemmBatchedIdentityThreadblockSwizzle, Stages=4, GroupScheduleMode_=cutlass::gemm::kernel::GroupScheduleMode::kDeviceOnly, Operator=cutlass::arch::OpMultiplyAdd, SharedMemoryClear=cutlass::gemm::SharedMemoryClearOption::kNone, PermuteDLayout=cutlass::layout::NoPermute]" at line 45 of xx.cu
cutlass/gemm/kernel/default_gemm_grouped.h(243): error: too few arguments for class template "cutlass::gemm::kernel::GemmGrouped"
typename DefaultGemmKernel::Mma,
^
detected during instantiation of class "cutlass::gemm::kernel::DefaultGemmGrouped<ElementA, LayoutA, cutlass::ComplexTransform::kNone, kAlignmentA, ElementB, LayoutB, cutlass::ComplexTransform::kNone, kAlignmentB, ElementC, LayoutC, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, GroupScheduleMode_, Operator, SharedMemoryClear, PermuteDLayout, std::enable_if<<expression>, void>::type> [with ElementA=gs::ElementA, LayoutA=gs::LayoutA, kAlignmentA=1, ElementB=gs::ElementB, LayoutB=gs::LayoutB, kAlignmentB=1, ElementC=gs::ElementOutput, LayoutC=gs::LayoutC, ElementAccumulator=gs::ElementAccumulator, OperatorClass=gs::MMAOp, ArchTag=cutlass::arch::Sm80, ThreadblockShape=cutlass::gemm::GemmShape<32, 32, 16>, WarpShape=cutlass::gemm::GemmShape<16, 16, 16>, InstructionShape=cutlass::gemm::GemmShape<8, 8, 4>, EpilogueOutputOp=cutlass::epilogue::thread::LinearCombination<gs::ElementOutput, 1, gs::ElementAccumulator, gs::ElementAccumulator, cutlass::epilogue::thread::ScaleType::Default, cutlass::FloatRoundStyle::round_to_nearest, gs::ElementOutput>, ThreadblockSwizzle=cutlass::gemm::threadblock::GemmBatchedIdentityThreadblockSwizzle, Stages=4, GroupScheduleMode_=cutlass::gemm::kernel::GroupScheduleMode::kDeviceOnly, Operator=cutlass::arch::OpMultiplyAdd, SharedMemoryClear=cutlass::gemm::SharedMemoryClearOption::kNone, PermuteDLayout=cutlass::layout::NoPermute]" at line 45 of xx.cu
cutlass-main/include/cutlass/gemm/kernel/default_gemm_grouped.h(243): error: expected a ";"
typename DefaultGemmKernel::Mma,
^
detected during instantiation of class "cutlass::gemm::kernel::DefaultGemmGrouped<ElementA, LayoutA, cutlass::ComplexTransform::kNone, kAlignmentA, ElementB, LayoutB, cutlass::ComplexTransform::kNone, kAlignmentB, ElementC, LayoutC, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, GroupScheduleMode_, Operator, SharedMemoryClear, PermuteDLayout, std::enable_if<<expression>, void>::type> [with ElementA=gs::ElementA, LayoutA=gs::LayoutA, kAlignmentA=1, ElementB=gs::ElementB, LayoutB=gs::LayoutB, kAlignmentB=1, ElementC=gs::ElementOutput, LayoutC=gs::LayoutC, ElementAccumulator=gs::ElementAccumulator, OperatorClass=gs::MMAOp, ArchTag=cutlass::arch::Sm80, ThreadblockShape=cutlass::gemm::GemmShape<32, 32, 16>, WarpShape=cutlass::gemm::GemmShape<16, 16, 16>, InstructionShape=cutlass::gemm::GemmShape<8, 8, 4>, EpilogueOutputOp=cutlass::epilogue::thread::LinearCombination<gs::ElementOutput, 1, gs::ElementAccumulator, gs::ElementAccumulator, cutlass::epilogue::thread::ScaleType::Default, cutlass::FloatRoundStyle::round_to_nearest, gs::ElementOutput>, ThreadblockSwizzle=cutlass::gemm::threadblock::GemmBatchedIdentityThreadblockSwizzle, Stages=4, GroupScheduleMode_=cutlass::gemm::kernel::GroupScheduleMode::kDeviceOnly, Operator=cutlass::arch::OpMultiplyAdd, SharedMemoryClear=cutlass::gemm::SharedMemoryClearOption::kNone, PermuteDLayout=cutlass::layout::NoPermute]" at line 45 of xx.cu
The text was updated successfully, but these errors were encountered:
I am trying GemmGrouped in cutlass. with
cutlass::arch::OpClassTensorOp
, the following codes can be compiled successfully. but withcutlass::arch::OpClassSimt
, there is compiling error.The reason why I try
OpClassSimt
is that I need do some grouped Gemm on sm70, with double precision. Currently, I cannot compile withOpClassSimt
+sm80. Thanks.compiling error for
cutlass::arch::OpClassSimt
The text was updated successfully, but these errors were encountered: