LLVM 22.0.0git
NVPTXTargetTransformInfo.h
Go to the documentation of this file.
1//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file a TargetTransformInfoImplBase conforming object specific to the
10/// NVPTX target machine. It uses the target's detailed information to
11/// provide more precise answers to certain TTI queries, while letting the
12/// target independent and default TTI implementations handle the rest.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
18
20#include "NVPTXTargetMachine.h"
21#include "NVPTXUtilities.h"
25#include <optional>
26
27namespace llvm {
28
29class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
31 typedef TargetTransformInfo TTI;
32 friend BaseT;
33
34 const NVPTXSubtarget *ST;
35 const NVPTXTargetLowering *TLI;
36
37 const NVPTXSubtarget *getST() const { return ST; };
38 const NVPTXTargetLowering *getTLI() const { return TLI; };
39
40public:
41 explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
42 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl()),
43 TLI(ST->getTargetLowering()) {}
44
45 bool hasBranchDivergence(const Function *F = nullptr) const override {
46 return true;
47 }
48
49 bool isSourceOfDivergence(const Value *V) const override;
50
51 unsigned getFlatAddressSpace() const override {
52 return AddressSpace::ADDRESS_SPACE_GENERIC;
53 }
54
55 bool
57 return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
58 AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
59 }
60
61 std::optional<Instruction *>
63
64 // Loads and stores can be vectorized if the alignment is at least as big as
65 // the load/store we want to vectorize.
66 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
67 unsigned AddrSpace) const override {
68 return Alignment >= ChainSizeInBytes;
69 }
70 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
71 unsigned AddrSpace) const override {
72 return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
73 }
74
75 // NVPTX has infinite registers of all kinds, but the actual machine doesn't.
76 // We conservatively return 1 here which is just enough to enable the
77 // vectorizers but disables heuristics based on the number of registers.
78 // FIXME: Return a more reasonable number, while keeping an eye on
79 // LoopVectorizer's unrolling heuristics.
80 unsigned getNumberOfRegisters(unsigned ClassID) const override { return 1; }
81
82 // Only <2 x half> should be vectorized, so always return 32 for the vector
83 // register size.
88 unsigned getMinVectorRegisterBitWidth() const override { return 32; }
89
90 bool shouldExpandReduction(const IntrinsicInst *II) const override {
91 // Turn off ExpandReductions pass for NVPTX, which doesn't have advanced
92 // swizzling operations. Our backend/Selection DAG can expand these
93 // reductions with less movs.
94 return false;
95 }
96
97 // We don't want to prevent inlining because of target-cpu and -features
98 // attributes that were added to newer versions of LLVM/Clang: There are
99 // no incompatible functions in PTX, ptxas will throw errors in such cases.
100 bool areInlineCompatible(const Function *Caller,
101 const Function *Callee) const override {
102 return true;
103 }
104
105 // Increase the inlining cost threshold by a factor of 11, reflecting that
106 // calls are particularly expensive in NVPTX.
107 unsigned getInliningThresholdMultiplier() const override { return 11; }
108
111 TTI::TargetCostKind CostKind) const override;
112
114 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
118 const Instruction *CxtI = nullptr) const override;
119
121 VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract,
122 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
123 ArrayRef<Value *> VL = {}) const override {
124 if (!InTy->getElementCount().isFixed())
126
127 auto VT = getTLI()->getValueType(DL, InTy);
128 auto NumElements = InTy->getElementCount().getFixedValue();
130 if (Insert && !VL.empty()) {
131 bool AllConstant = all_of(seq(NumElements), [&](int Idx) {
132 return !DemandedElts[Idx] || isa<Constant>(VL[Idx]);
133 });
134 if (AllConstant) {
135 Cost += TTI::TCC_Free;
136 Insert = false;
137 }
138 }
139 if (Insert && NVPTX::isPackedVectorTy(VT) && VT.is32BitVector()) {
140 // Can be built in a single 32-bit mov (64-bit regs are emulated in SASS
141 // with 2x 32-bit regs)
142 Cost += 1;
143 Insert = false;
144 }
145 if (Insert && VT == MVT::v4i8) {
146 InstructionCost Cost = 3; // 3 x PRMT
147 for (auto Idx : seq(NumElements))
148 if (DemandedElts[Idx])
149 Cost += 1; // zext operand to i32
150 Insert = false;
151 }
152 return Cost + BaseT::getScalarizationOverhead(InTy, DemandedElts, Insert,
153 Extract, CostKind,
154 ForPoisonSrc, VL);
155 }
156
157 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
159 OptimizationRemarkEmitter *ORE) const override;
160
161 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
162 TTI::PeelingPreferences &PP) const override;
163
164 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const override {
165 // Volatile loads/stores are only supported for shared and global address
166 // spaces, or for generic AS that maps to them.
167 if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
168 AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
169 AddrSpace == llvm::ADDRESS_SPACE_SHARED))
170 return false;
171
172 switch(I->getOpcode()){
173 default:
174 return false;
175 case Instruction::Load:
176 case Instruction::Store:
177 return true;
178 }
179 }
180
182 Intrinsic::ID IID) const override;
183
184 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;
185
187 Value *NewV) const override;
188 unsigned getAssumedAddrSpace(const Value *V) const override;
189
191 const Function &F,
192 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
193};
194
195} // end namespace llvm
196
197#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
uint64_t IntrinsicInst * II
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
unsigned getNumberOfRegisters(unsigned ClassID) const override
unsigned getFlatAddressSpace() const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
unsigned getInliningThresholdMultiplier() const override
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool isSourceOfDivergence(const Value *V) const override
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
unsigned getAssumedAddrSpace(const Value *V) const override
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
unsigned getMinVectorRegisterBitWidth() const override
bool hasBranchDivergence(const Function *F=nullptr) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const DataLayout & getDataLayout() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCC_Free
Expected to fold away in lowering.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:172
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isPackedVectorTy(EVT VT)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
InstructionCost Cost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Parameters that control the generic loop unrolling transformation.