Skip to content

Commit

Permalink
AMDGPU/NewPM: Start filling out addIRPasses (llvm#102884)
Browse files Browse the repository at this point in the history
This is not complete, but gets AtomicExpand running. I was able
to get further than I expected; we're quite close to having all
the IR codegen passes ported.
  • Loading branch information
arsenm authored Aug 20, 2024
1 parent afeef4d commit 33e18b2
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 0 deletions.
100 changes: 100 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,16 @@
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
Expand Down Expand Up @@ -1769,6 +1775,70 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
ShadowStackGCLoweringPass>();
}

void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {
// TODO: Missing AMDGPURemoveIncompatibleFunctions

addPass(AMDGPUPrintfRuntimeBindingPass());
if (LowerCtorDtor)
addPass(AMDGPUCtorDtorLoweringPass());

if (isPassEnabled(EnableImageIntrinsicOptimizer))
addPass(AMDGPUImageIntrinsicOptimizerPass(TM));

// This can be disabled by passing ::Disable here or on the command line
// with --expand-variadics-override=disable.
addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering));

addPass(AMDGPUAlwaysInlinePass());
addPass(AlwaysInlinerPass());

// TODO: Missing OpenCLEnqueuedBlockLowering

// Runs before PromoteAlloca so the latter can account for function uses
if (EnableLowerModuleLDS)
addPass(AMDGPULowerModuleLDSPass(TM));

if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(InferAddressSpacesPass());

// Run atomic optimizer before Atomic Expand
if (TM.getOptLevel() >= CodeGenOptLevel::Less &&
(AMDGPUAtomicOptimizerStrategy != ScanOptions::None))
addPass(AMDGPUAtomicOptimizerPass(TM, AMDGPUAtomicOptimizerStrategy));

addPass(AtomicExpandPass());

if (TM.getOptLevel() > CodeGenOptLevel::None) {
addPass(AMDGPUPromoteAllocaPass(TM));
if (isPassEnabled(EnableScalarIRPasses))
addStraightLineScalarOptimizationPasses(addPass);

// TODO: Handle EnableAMDGPUAliasAnalysis

// TODO: May want to move later or split into an early and late one.
addPass(AMDGPUCodeGenPreparePass(TM));

// TODO: LICM
}

Base::addIRPasses(addPass);

// EarlyCSE is not always strong enough to clean up what LSR produces. For
// example, GVN can combine
//
// %0 = add %a, %b
// %1 = add %b, %a
//
// and
//
// %0 = shl nsw %a, 2
// %1 = shl %a, 2
//
// but EarlyCSE can do neither of them.
if (isPassEnabled(EnableScalarIRPasses))
addEarlyCSEOrGVNPass(addPass);
}

void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
// AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
// deleted soon.
Expand Down Expand Up @@ -1875,3 +1945,33 @@ bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
return false;
return Opt;
}

void AMDGPUCodeGenPassBuilder::addEarlyCSEOrGVNPass(AddIRPass &addPass) const {
if (TM.getOptLevel() == CodeGenOptLevel::Aggressive)
addPass(GVNPass());
else
addPass(EarlyCSEPass());
}

void AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses(
AddIRPass &addPass) const {
if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive))
addPass(LoopDataPrefetchPass());

addPass(SeparateConstOffsetFromGEPPass());

// ReassociateGEPs exposes more opportunities for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
addPass(StraightLineStrengthReducePass());

// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
// EarlyCSE can reuse.
addEarlyCSEOrGVNPass(addPass);

// Run NaryReassociate after EarlyCSE/GVN to be more effective.
addPass(NaryReassociatePass());

// NaryReassociate on GEPs creates redundant common expressions, so run
// EarlyCSE after it.
addPass(EarlyCSEPass());
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ class AMDGPUCodeGenPassBuilder
const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);

void addIRPasses(AddIRPass &) const;
void addCodeGenPrepare(AddIRPass &) const;
void addPreISel(AddIRPass &addPass) const;
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Expand All @@ -182,6 +183,8 @@ class AMDGPUCodeGenPassBuilder
/// given that a pass shall work at an optimization \p Level minimum.
bool isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
void addEarlyCSEOrGVNPass(AddIRPass &) const;
void addStraightLineScalarOptimizationPasses(AddIRPass &) const;
};

} // end namespace llvm
Expand Down

0 comments on commit 33e18b2

Please sign in to comment.