llvm.org/doxygen/NVPTXTargetTransformInfo_8h_source.html

//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

/// \file

/// This file a TargetTransformInfoImplBase conforming object specific to the

/// NVPTX target machine. It uses the target's detailed information to

/// provide more precise answers to certain TTI queries, while letting the

/// target independent and default TTI implementations handle the rest.

///

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H

#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H


#include "MCTargetDesc/NVPTXBaseInfo.h"

#include "NVPTXTargetMachine.h"

#include "NVPTXUtilities.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/CodeGen/BasicTTIImpl.h"

#include "llvm/CodeGen/TargetLowering.h"

#include <optional>


namespace llvm {


class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {

  typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;

  typedef TargetTransformInfo TTI;

  friend BaseT;


  const NVPTXSubtarget *ST;

  const NVPTXTargetLowering *TLI;


  const NVPTXSubtarget *getST() const { return ST; };

  const NVPTXTargetLowering *getTLI() const { return TLI; };


public:


  explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)

      : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl()),

        TLI(ST->getTargetLowering()) {}


  bool hasBranchDivergence(const Function *F = nullptr) const override {

    return true;

  }


  bool isSourceOfDivergence(const Value *V) const override;


  unsigned getFlatAddressSpace() const override {

    return AddressSpace::ADDRESS_SPACE_GENERIC;

  }


  bool


  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {

    return AS != AddressSpace::ADDRESS_SPACE_SHARED &&

           AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;

  }


  std::optional<Instruction *>

  instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;


  // Loads and stores can be vectorized if the alignment is at least as big as

  // the load/store we want to vectorize.


  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,

                                   unsigned AddrSpace) const override {

    return Alignment >= ChainSizeInBytes;

  }


  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,

                                    unsigned AddrSpace) const override {

    return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);

  }


  // NVPTX has infinite registers of all kinds, but the actual machine doesn't.

  // We conservatively return 1 here which is just enough to enable the

  // vectorizers but disables heuristics based on the number of registers.

  // FIXME: Return a more reasonable number, while keeping an eye on

  // LoopVectorizer's unrolling heuristics.

  unsigned getNumberOfRegisters(unsigned ClassID) const override { return 1; }


  // Only <2 x half> should be vectorized, so always return 32 for the vector

  // register size.

  TypeSize


  getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override {

    return TypeSize::getFixed(32);

  }


  unsigned getMinVectorRegisterBitWidth() const override { return 32; }


  bool shouldExpandReduction(const IntrinsicInst *II) const override {

    // Turn off ExpandReductions pass for NVPTX, which doesn't have advanced

    // swizzling operations. Our backend/Selection DAG can expand these

    // reductions with less movs.

    return false;

  }


  // We don't want to prevent inlining because of target-cpu and -features

  // attributes that were added to newer versions of LLVM/Clang: There are

  // no incompatible functions in PTX, ptxas will throw errors in such cases.


  bool areInlineCompatible(const Function *Caller,

                           const Function *Callee) const override {

    return true;

  }


  // Increase the inlining cost threshold by a factor of 11, reflecting that

  // calls are particularly expensive in NVPTX.

  unsigned getInliningThresholdMultiplier() const override { return 11; }


  InstructionCost

  getInstructionCost(const User *U, ArrayRef<const Value *> Operands,

                     TTI::TargetCostKind CostKind) const override;


  InstructionCost getArithmeticInstrCost(

      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},

      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},

      ArrayRef<const Value *> Args = {},

      const Instruction *CxtI = nullptr) const override;


  InstructionCost getScalarizationOverhead(

      VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract,

      TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,

      ArrayRef<Value *> VL = {}) const override {

    if (!InTy->getElementCount().isFixed())

      return InstructionCost::getInvalid();


    auto VT = getTLI()->getValueType(DL, InTy);

    auto NumElements = InTy->getElementCount().getFixedValue();

    InstructionCost Cost = 0;

    if (Insert && !VL.empty()) {

      bool AllConstant = all_of(seq(NumElements), [&](int Idx) {

        return !DemandedElts[Idx] || isa<Constant>(VL[Idx]);

      });

      if (AllConstant) {

        Cost += TTI::TCC_Free;

        Insert = false;

      }

    }

    if (Insert && NVPTX::isPackedVectorTy(VT) && VT.is32BitVector()) {

      // Can be built in a single 32-bit mov (64-bit regs are emulated in SASS

      // with 2x 32-bit regs)

      Cost += 1;

      Insert = false;

    }

    if (Insert && VT == MVT::v4i8) {

      InstructionCost Cost = 3; // 3 x PRMT

      for (auto Idx : seq(NumElements))

        if (DemandedElts[Idx])

          Cost += 1; // zext operand to i32

      Insert = false;

    }

    return Cost + BaseT::getScalarizationOverhead(InTy, DemandedElts, Insert,

                                                  Extract, CostKind,

                                                  ForPoisonSrc, VL);

  }


  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                               TTI::UnrollingPreferences &UP,

                               OptimizationRemarkEmitter *ORE) const override;


  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                             TTI::PeelingPreferences &PP) const override;


  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const override {

    // Volatile loads/stores are only supported for shared and global address

    // spaces, or for generic AS that maps to them.

    if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||

          AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||

          AddrSpace == llvm::ADDRESS_SPACE_SHARED))

      return false;


    switch(I->getOpcode()){

    default:

      return false;

    case Instruction::Load:

    case Instruction::Store:

      return true;

    }

  }


  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,

                                  Intrinsic::ID IID) const override;


  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;


  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,

                                          Value *NewV) const override;

  unsigned getAssumedAddrSpace(const Value *V) const override;


  void collectKernelLaunchBounds(

      const Function &F,

      SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;

};


} // end namespace llvm


#endif

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

BasicTTIImpl.h
This file provides a helper that implements much of the TTI interface in terms of the target-independ...

CostKind
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

IntrinsicCostStrategy::InstructionCost
@ InstructionCost
Definition CostModel.cpp:52

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

Operands
mir Rename Register Operands
Definition MIRNamerPass.cpp:74

NVPTXBaseInfo.h

NVPTXTargetMachine.h

NVPTXUtilities.h

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::BasicTTIImplBase< NVPTXTTIImpl >::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Definition BasicTTIImpl.h:888

llvm::BasicTTIImplBase< NVPTXTTIImpl >::BasicTTIImplBase
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition BasicTTIImpl.h:367

llvm::Function
Definition Function.h:64

llvm::InstCombiner
The core instruction combiner logic.
Definition InstCombiner.h:48

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition InstructionCost.h:74

llvm::Instruction
Definition Instruction.h:69

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition IntrinsicInst.h:49

llvm::NVPTXSubtarget
Definition NVPTXSubtarget.h:31

llvm::NVPTXTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const override
Definition NVPTXTargetTransformInfo.h:80

llvm::NVPTXTTIImpl::getFlatAddressSpace
unsigned getFlatAddressSpace() const override
Definition NVPTXTargetTransformInfo.h:51

llvm::NVPTXTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Definition NVPTXTargetTransformInfo.h:85

llvm::NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
Definition NVPTXTargetTransformInfo.cpp:576

llvm::NVPTXTTIImpl::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
Definition NVPTXTargetTransformInfo.h:70

llvm::NVPTXTTIImpl::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
Definition NVPTXTargetTransformInfo.cpp:485

llvm::NVPTXTTIImpl::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
Definition NVPTXTargetTransformInfo.cpp:600

llvm::NVPTXTTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
Definition NVPTXTargetTransformInfo.h:100

llvm::NVPTXTTIImpl::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition NVPTXTargetTransformInfo.cpp:475

llvm::NVPTXTTIImpl::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const override
Definition NVPTXTargetTransformInfo.h:107

llvm::NVPTXTTIImpl::canHaveNonUndefGlobalInitializerInAddressSpace
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override
Definition NVPTXTargetTransformInfo.h:56

llvm::NVPTXTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition NVPTXTargetTransformInfo.cpp:513

llvm::NVPTXTTIImpl::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const override
Definition NVPTXTargetTransformInfo.h:90

llvm::NVPTXTTIImpl::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
Definition NVPTXTargetTransformInfo.h:66

llvm::NVPTXTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition NVPTXTargetTransformInfo.cpp:542

llvm::NVPTXTTIImpl::NVPTXTTIImpl
NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
Definition NVPTXTargetTransformInfo.h:41

llvm::NVPTXTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition NVPTXTargetTransformInfo.cpp:555

llvm::NVPTXTTIImpl::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const override
Definition NVPTXTargetTransformInfo.cpp:77

llvm::NVPTXTTIImpl::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
Definition NVPTXTargetTransformInfo.cpp:560

llvm::NVPTXTTIImpl::hasVolatileVariant
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const override
Definition NVPTXTargetTransformInfo.h:164

llvm::NVPTXTTIImpl::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
Definition NVPTXTargetTransformInfo.h:120

llvm::NVPTXTTIImpl::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const override
Definition NVPTXTargetTransformInfo.cpp:607

llvm::NVPTXTTIImpl::collectKernelLaunchBounds
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
Definition NVPTXTargetTransformInfo.cpp:629

llvm::NVPTXTTIImpl::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const override
Definition NVPTXTargetTransformInfo.h:88

llvm::NVPTXTTIImpl::hasBranchDivergence
bool hasBranchDivergence(const Function *F=nullptr) const override
Definition NVPTXTargetTransformInfo.h:45

llvm::NVPTXTargetLowering
Definition NVPTXISelLowering.h:111

llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition NVPTXTargetMachine.h:25

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:574

llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition TargetLowering.h:1726

llvm::TargetTransformInfoImplBase::getDataLayout
virtual const DataLayout & getDataLayout() const
Definition TargetTransformInfoImpl.h:50

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:220

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition TargetTransformInfo.h:272

llvm::TargetTransformInfo::OP_None
@ OP_None
Definition TargetTransformInfo.h:1158

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition TargetTransformInfo.h:1212

llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition TargetTransformInfo.h:298

llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition TargetTransformInfo.h:1150

llvm::TypeSize
Definition TypeSize.h:332

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45

llvm::User
Definition User.h:44

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:430

llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition DerivedTypes.h:695

llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200

llvm::details::FixedOrScalableQuantity::isFixed
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:172

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition AMDGPUMetadata.h:396

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::NVPTXAS::ADDRESS_SPACE_PARAM
@ ADDRESS_SPACE_PARAM
Definition NVPTXAddrSpace.h:30

llvm::NVPTXAS::ADDRESS_SPACE_GENERIC
@ ADDRESS_SPACE_GENERIC
Definition NVPTXAddrSpace.h:22

llvm::NVPTXAS::ADDRESS_SPACE_SHARED
@ ADDRESS_SPACE_SHARED
Definition NVPTXAddrSpace.h:24

llvm::NVPTXAS::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition NVPTXAddrSpace.h:23

llvm::NVPTX::isPackedVectorTy
bool isPackedVectorTy(EVT VT)
Definition NVPTXUtilities.h:108

llvm::cfg::UpdateKind::Insert
@ Insert
Definition CFGUpdate.h:26

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705

llvm::Cost
InstructionCost Cost
Definition FunctionSpecialization.h:103

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::seq
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::TargetTransformInfo::OperandValueInfo
Definition TargetTransformInfo.h:1166

llvm::TargetTransformInfo::PeelingPreferences
Definition TargetTransformInfo.h:678

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition TargetTransformInfo.h:548