LLVM 22.0.0git
llvm::AMDGPUSubtarget Class Referenceabstract

#include "Target/AMDGPU/AMDGPUSubtarget.h"

Inheritance diagram for llvm::AMDGPUSubtarget:
[legend]

Public Types

enum  Generation {
  INVALID = 0 , R600 = 1 , R700 = 2 , EVERGREEN = 3 ,
  NORTHERN_ISLANDS = 4 , SOUTHERN_ISLANDS = 5 , SEA_ISLANDS = 6 , VOLCANIC_ISLANDS = 7 ,
  GFX9 = 8 , GFX10 = 9 , GFX11 = 10 , GFX12 = 11
}

Public Member Functions

 AMDGPUSubtarget (Triple TT)
std::pair< unsigned, unsignedgetDefaultFlatWorkGroupSize (CallingConv::ID CC) const
std::pair< unsigned, unsignedgetFlatWorkGroupSizes (const Function &F) const
std::optional< unsignedgetReqdWorkGroupSize (const Function &F, unsigned Dim) const
bool hasWavefrontsEvenlySplittingXDim (const Function &F, bool REquiresUniformYZ=false) const
std::pair< unsigned, unsignedgetWavesPerEU (const Function &F) const
std::pair< unsigned, unsignedgetWavesPerEU (const Function &F, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
 Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned, unsignedgetWavesPerEU (std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes, const Function &F) const
 Overload which uses the specified values for the flat workgroup sizes and LDS space rather than querying the function itself.
std::pair< unsigned, unsignedgetEffectiveWavesPerEU (std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
 Returns the target minimum/maximum number of waves per EU.
unsigned getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
 Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
std::pair< unsigned, unsignedgetOccupancyWithWorkGroupSizes (uint32_t LDSBytes, const Function &F) const
 Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F and each workgroup running the function requires LDSBytes bytes of LDS space.
std::pair< unsigned, unsignedgetOccupancyWithWorkGroupSizes (uint32_t LDSBytes, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
 Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
std::pair< unsigned, unsignedgetOccupancyWithWorkGroupSizes (const MachineFunction &MF) const
 Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is MF.
bool isAmdHsaOS () const
bool isAmdPalOS () const
bool isMesa3DOS () const
bool isMesaKernel (const Function &F) const
bool isAmdHsaOrMesa (const Function &F) const
bool isGCN () const
bool isGCN3Encoding () const
bool has16BitInsts () const
bool hasTrue16BitInsts () const
 Return true if the subtarget supports True16 instructions.
bool useRealTrue16Insts () const
 Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
bool hasD16Writes32BitVgpr () const
bool hasBF16TransInsts () const
bool hasBF16ConversionInsts () const
bool hasBF16PackedInsts () const
bool hasMadMixInsts () const
bool hasFP8ConversionScaleInsts () const
bool hasBF8ConversionScaleInsts () const
bool hasFP4ConversionScaleInsts () const
bool hasFP6BF6ConversionScaleInsts () const
bool hasF16BF16ToFP6BF6ConversionScaleInsts () const
bool hasCvtPkF16F32Inst () const
bool hasF32ToF16BF16ConversionSRInsts () const
bool hasMadMacF32Insts () const
bool hasDsSrc2Insts () const
bool hasSDWA () const
bool hasVOP3PInsts () const
bool hasMulI24 () const
bool hasMulU24 () const
bool hasSMulHi () const
bool hasInv2PiInlineImm () const
bool hasFminFmaxLegacy () const
bool hasTrigReducedRange () const
bool hasFastFMAF32 () const
bool isPromoteAllocaEnabled () const
unsigned getWavefrontSize () const
unsigned getWavefrontSizeLog2 () const
unsigned getLocalMemorySize () const
 Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
unsigned getAddressableLocalMemorySize () const
 Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned getEUsPerCU () const
 Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.
Align getAlignmentForImplicitArgPtr () const
unsigned getExplicitKernelArgOffset () const
 Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
virtual unsigned getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const =0
virtual unsigned getMinFlatWorkGroupSize () const =0
virtual unsigned getMaxFlatWorkGroupSize () const =0
virtual unsigned getWavesPerEUForWorkGroup (unsigned FlatWorkGroupSize) const =0
virtual unsigned getMinWavesPerEU () const =0
unsigned getMaxWavesPerEU () const
unsigned getMaxWorkitemID (const Function &Kernel, unsigned Dimension) const
 Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
SmallVector< unsignedgetMaxNumWorkGroups (const Function &F) const
 Return the number of work groups for the function.
bool isSingleLaneExecution (const Function &Kernel) const
 Return true if only a single workitem can be active in a wave.
bool makeLIDRangeMetadata (Instruction *I) const
 Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned getImplicitArgNumBytes (const Function &F) const
uint64_t getExplicitKernArgSize (const Function &F, Align &MaxAlign) const
unsigned getKernArgSegmentSize (const Function &F, Align &MaxAlign) const
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour () const
virtual ~AMDGPUSubtarget ()=default

Static Public Member Functions

static const AMDGPUSubtargetget (const MachineFunction &MF)
static const AMDGPUSubtargetget (const TargetMachine &TM, const Function &F)

Protected Attributes

bool GCN3Encoding = false
bool Has16BitInsts = false
bool HasTrue16BitInsts = false
bool HasFP8ConversionScaleInsts = false
bool HasBF8ConversionScaleInsts = false
bool HasFP4ConversionScaleInsts = false
bool HasFP6BF6ConversionScaleInsts = false
bool HasF16BF16ToFP6BF6ConversionScaleInsts = false
bool HasCvtPkF16F32Inst = false
bool HasF32ToF16BF16ConversionSRInsts = false
bool EnableRealTrue16Insts = false
bool EnableD16Writes32BitVgpr = false
bool HasBF16TransInsts = false
bool HasBF16ConversionInsts = false
bool HasBF16PackedInsts = false
bool HasMadMixInsts = false
bool HasMadMacF32Insts = false
bool HasDsSrc2Insts = false
bool HasSDWA = false
bool HasVOP3PInsts = false
bool HasMulI24 = true
bool HasMulU24 = true
bool HasSMulHi = false
bool HasInv2PiInlineImm = false
bool HasFminFmaxLegacy = true
bool EnablePromoteAlloca = false
bool HasTrigReducedRange = false
bool FastFMAF32 = false
unsigned EUsPerCU = 4
unsigned MaxWavesPerEU = 10
unsigned LocalMemorySize = 0
unsigned AddressableLocalMemorySize = 0
char WavefrontSizeLog2 = 0

Detailed Description

Definition at line 30 of file AMDGPUSubtarget.h.

Member Enumeration Documentation

◆ Generation

Enumerator
INVALID 
R600 
R700 
EVERGREEN 
NORTHERN_ISLANDS 
SOUTHERN_ISLANDS 
SEA_ISLANDS 
VOLCANIC_ISLANDS 
GFX9 
GFX10 
GFX11 
GFX12 

Definition at line 32 of file AMDGPUSubtarget.h.

Constructor & Destructor Documentation

◆ AMDGPUSubtarget()

AMDGPUSubtarget::AMDGPUSubtarget ( Triple TT)

◆ ~AMDGPUSubtarget()

virtual llvm::AMDGPUSubtarget::~AMDGPUSubtarget ( )
virtualdefault

Member Function Documentation

◆ get() [1/2]

◆ get() [2/2]

const AMDGPUSubtarget & AMDGPUSubtarget::get ( const TargetMachine & TM,
const Function & F )
static

Definition at line 429 of file AMDGPUSubtarget.cpp.

References AMDGPUSubtarget(), and F.

◆ getAddressableLocalMemorySize()

unsigned llvm::AMDGPUSubtarget::getAddressableLocalMemorySize ( ) const
inline

Return the maximum number of bytes of LDS that can be allocated to a single workgroup.

For GFX10-GFX12 in WGP mode this is limited to 64k even though the WGP has 128k in total.

Definition at line 330 of file AMDGPUSubtarget.h.

References AddressableLocalMemorySize.

◆ getAlignmentForImplicitArgPtr()

Align llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr ( ) const
inline

Definition at line 339 of file AMDGPUSubtarget.h.

References isAmdHsaOS().

Referenced by getKernArgSegmentSize().

◆ getAMDGPUDwarfFlavour()

AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour ( ) const
Returns
Corresponding DWARF register number mapping flavour for the WavefrontSize.

Definition at line 418 of file AMDGPUSubtarget.cpp.

References getWavefrontSize(), llvm::Wave32, and llvm::Wave64.

◆ getDefaultFlatWorkGroupSize()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getDefaultFlatWorkGroupSize ( CallingConv::ID CC) const

◆ getEffectiveWavesPerEU()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getEffectiveWavesPerEU ( std::pair< unsigned, unsigned > RequestedWavesPerEU,
std::pair< unsigned, unsigned > FlatWorkGroupSizes,
unsigned LDSBytes ) const

Returns the target minimum/maximum number of waves per EU.

This is based on the minimum/maximum number of RequestedWavesPerEU and further limited by the maximum achievable occupancy derived from the range of FlatWorkGroupSizes and number of LDSBytes per workgroup.

Definition at line 186 of file AMDGPUSubtarget.cpp.

References llvm::Default, getMaxWavesPerEU(), getOccupancyWithWorkGroupSizes(), and getWavesPerEUForWorkGroup().

Referenced by getWavesPerEU().

◆ getEUsPerCU()

unsigned llvm::AMDGPUSubtarget::getEUsPerCU ( ) const
inline

Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.

This takes WGP mode vs. CU mode into account.

Definition at line 337 of file AMDGPUSubtarget.h.

References EUsPerCU.

Referenced by getMaxLocalMemSizeWithWaveCount(), and getOccupancyWithWorkGroupSizes().

◆ getExplicitKernArgSize()

uint64_t AMDGPUSubtarget::getExplicitKernArgSize ( const Function & F,
Align & MaxAlign ) const

◆ getExplicitKernelArgOffset()

unsigned llvm::AMDGPUSubtarget::getExplicitKernelArgOffset ( ) const
inline

Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.

Definition at line 345 of file AMDGPUSubtarget.h.

References llvm::Triple::AMDHSA, llvm::Triple::AMDPAL, llvm_unreachable, llvm::Triple::Mesa3D, and llvm::Triple::UnknownOS.

Referenced by getKernArgSegmentSize(), and llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel().

◆ getFlatWorkGroupSizes()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getFlatWorkGroupSizes ( const Function & F) const
Returns
Subtarget's default pair of minimum/maximum flat work group sizes for function F, or minimum/maximum flat work group sizes explicitly requested using "amdgpu-flat-work-group-size" attribute attached to function F.
Subtarget's default values if explicitly requested values cannot be converted to integer, or violate subtarget's specifications.

Definition at line 163 of file AMDGPUSubtarget.cpp.

References llvm::Default, F, getDefaultFlatWorkGroupSize(), llvm::AMDGPU::getIntegerPairAttribute(), getMaxFlatWorkGroupSize(), and getMinFlatWorkGroupSize().

Referenced by getMaxLocalMemSizeWithWaveCount(), getMaxWorkitemID(), getOccupancyWithWorkGroupSizes(), getWavesPerEU(), and makeLIDRangeMetadata().

◆ getImplicitArgNumBytes()

unsigned AMDGPUSubtarget::getImplicitArgNumBytes ( const Function & F) const
Returns
Number of bytes of arguments that are passed to a shader or kernel in addition to the explicit ones declared for the function.

Definition at line 352 of file AMDGPUSubtarget.cpp.

References llvm::AMDGPU::AMDHSA_COV5, assert(), F, llvm::AMDGPU::getAMDHSACodeObjectVersion(), llvm::AMDGPU::isKernel(), and isMesaKernel().

Referenced by getKernArgSegmentSize().

◆ getKernArgSegmentSize()

◆ getLocalMemorySize()

unsigned llvm::AMDGPUSubtarget::getLocalMemorySize ( ) const
inline

Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.

For GFX10-GFX12 in WGP mode this is 128k even though each workgroup is limited to 64k.

Definition at line 322 of file AMDGPUSubtarget.h.

References LocalMemorySize.

Referenced by getMaxLocalMemSizeWithWaveCount(), and getOccupancyWithWorkGroupSizes().

◆ getMaxFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize ( ) const
pure virtual
Returns
Maximum flat work group size supported by the subtarget.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getDefaultFlatWorkGroupSize(), and getFlatWorkGroupSizes().

◆ getMaxLocalMemSizeWithWaveCount()

unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount ( unsigned WaveCount,
const Function & F ) const

Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

Definition at line 49 of file AMDGPUSubtarget.cpp.

References F, getEUsPerCU(), getFlatWorkGroupSizes(), getLocalMemorySize(), and getWavefrontSize().

◆ getMaxNumWorkGroups()

SmallVector< unsigned > AMDGPUSubtarget::getMaxNumWorkGroups ( const Function & F) const

Return the number of work groups for the function.

Definition at line 438 of file AMDGPUSubtarget.cpp.

References F, and llvm::AMDGPU::getIntegerVecAttribute().

◆ getMaxWavesPerEU()

unsigned llvm::AMDGPUSubtarget::getMaxWavesPerEU ( ) const
inline
Returns
Maximum number of waves per execution unit supported by the subtarget without any kind of limitation.

Definition at line 382 of file AMDGPUSubtarget.h.

References MaxWavesPerEU.

Referenced by getEffectiveWavesPerEU(), getOccupancyWithWorkGroupSizes(), and getWavesPerEU().

◆ getMaxWorkGroupsPerCU()

virtual unsigned llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU ( unsigned FlatWorkGroupSize) const
pure virtual
Returns
Maximum number of work groups per compute unit supported by the subtarget and limited by given FlatWorkGroupSize.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getOccupancyWithWorkGroupSizes().

◆ getMaxWorkitemID()

unsigned AMDGPUSubtarget::getMaxWorkitemID ( const Function & Kernel,
unsigned Dimension ) const

Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.

Definition at line 267 of file AMDGPUSubtarget.cpp.

References getFlatWorkGroupSizes(), and getReqdWorkGroupSize().

Referenced by isSingleLaneExecution().

◆ getMinFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize ( ) const
pure virtual
Returns
Minimum flat work group size supported by the subtarget.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getFlatWorkGroupSizes().

◆ getMinWavesPerEU()

virtual unsigned llvm::AMDGPUSubtarget::getMinWavesPerEU ( ) const
pure virtual
Returns
Minimum number of waves per execution unit supported by the subtarget.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

◆ getOccupancyWithWorkGroupSizes() [1/3]

std::pair< unsigned, unsigned > AMDGPUSubtarget::getOccupancyWithWorkGroupSizes ( const MachineFunction & MF) const

Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is MF.

This notably depends on the range of allowed flat group sizes for the function, the amount of per-workgroup LDS space required by the function, and hardware characteristics.

Definition at line 142 of file AMDGPUSubtarget.cpp.

References llvm::MachineFunction::getFunction(), llvm::MachineFunction::getInfo(), and getOccupancyWithWorkGroupSizes().

◆ getOccupancyWithWorkGroupSizes() [2/3]

std::pair< unsigned, unsigned > llvm::AMDGPUSubtarget::getOccupancyWithWorkGroupSizes ( uint32_t LDSBytes,
const Function & F ) const
inline

Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F and each workgroup running the function requires LDSBytes bytes of LDS space.

This notably depends on the range of allowed flat group sizes for the function and hardware characteristics.

Definition at line 170 of file AMDGPUSubtarget.h.

References F, getFlatWorkGroupSizes(), and getOccupancyWithWorkGroupSizes().

Referenced by llvm::GCNSubtarget::computeOccupancy(), getEffectiveWavesPerEU(), getOccupancyWithWorkGroupSizes(), and getOccupancyWithWorkGroupSizes().

◆ getOccupancyWithWorkGroupSizes() [3/3]

std::pair< unsigned, unsigned > AMDGPUSubtarget::getOccupancyWithWorkGroupSizes ( uint32_t LDSBytes,
std::pair< unsigned, unsigned > FlatWorkGroupSizes ) const

Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.

FlatWorkGroupSizes should correspond to the function's value for getFlatWorkGroupSizes.

Definition at line 62 of file AMDGPUSubtarget.cpp.

References llvm::divideCeil(), getEUsPerCU(), getLocalMemorySize(), getMaxWavesPerEU(), getMaxWorkGroupsPerCU(), getWavefrontSize(), and std::swap().

◆ getReqdWorkGroupSize()

std::optional< unsigned > AMDGPUSubtarget::getReqdWorkGroupSize ( const Function & F,
unsigned Dim ) const
Returns
The required size of workgroups that will be used to execute F in the Dim dimension, if it is known (from !reqd_work_group_size metadata. Otherwise, returns std::nullopt.

Definition at line 237 of file AMDGPUSubtarget.cpp.

References llvm::mdconst::extract(), and llvm::GlobalObject::getMetadata().

Referenced by getMaxWorkitemID(), and makeLIDRangeMetadata().

◆ getWavefrontSize()

◆ getWavefrontSizeLog2()

unsigned llvm::AMDGPUSubtarget::getWavefrontSizeLog2 ( ) const
inline

◆ getWavesPerEU() [1/3]

std::pair< unsigned, unsigned > AMDGPUSubtarget::getWavesPerEU ( const Function & F) const
Returns
Subtarget's default pair of minimum/maximum number of waves per execution unit for function F, or minimum/maximum number of waves per execution unit explicitly requested using "amdgpu-waves-per-eu" attribute attached to function F.
Subtarget's default values if explicitly requested values cannot be converted to integer, violate subtarget's specifications, or are not compatible with minimum/maximum number of waves limited by flat work group size, register usage, and/or lds usage.

Definition at line 213 of file AMDGPUSubtarget.cpp.

References F, getFlatWorkGroupSizes(), llvm::AMDGPU::getIntegerPairAttribute(), and getWavesPerEU().

Referenced by llvm::GCNSubtarget::getMaxNumSGPRs(), llvm::GCNSubtarget::getMaxNumVGPRs(), and getWavesPerEU().

◆ getWavesPerEU() [2/3]

std::pair< unsigned, unsigned > llvm::AMDGPUSubtarget::getWavesPerEU ( const Function & F,
std::pair< unsigned, unsigned > FlatWorkGroupSizes ) const

Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.

FlatWorkGroupSizes Should correspond to the function's value for getFlatWorkGroupSizes.

References F.

◆ getWavesPerEU() [3/3]

std::pair< unsigned, unsigned > AMDGPUSubtarget::getWavesPerEU ( std::pair< unsigned, unsigned > FlatWorkGroupSizes,
unsigned LDSBytes,
const Function & F ) const

Overload which uses the specified values for the flat workgroup sizes and LDS space rather than querying the function itself.

FlatWorkGroupSizes should correspond to the function's value for getFlatWorkGroupSizes and LDSBytes to the per-workgroup LDS allocation.

Definition at line 225 of file AMDGPUSubtarget.cpp.

References llvm::Default, F, getEffectiveWavesPerEU(), llvm::AMDGPU::getIntegerPairAttribute(), and getMaxWavesPerEU().

◆ getWavesPerEUForWorkGroup()

virtual unsigned llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup ( unsigned FlatWorkGroupSize) const
pure virtual
Returns
Number of waves per execution unit required to support the given FlatWorkGroupSize.

Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.

Referenced by getEffectiveWavesPerEU().

◆ has16BitInsts()

bool llvm::AMDGPUSubtarget::has16BitInsts ( ) const
inline

Definition at line 213 of file AMDGPUSubtarget.h.

References Has16BitInsts.

◆ hasBF16ConversionInsts()

bool llvm::AMDGPUSubtarget::hasBF16ConversionInsts ( ) const
inline

Definition at line 232 of file AMDGPUSubtarget.h.

References HasBF16ConversionInsts.

◆ hasBF16PackedInsts()

bool llvm::AMDGPUSubtarget::hasBF16PackedInsts ( ) const
inline

Definition at line 236 of file AMDGPUSubtarget.h.

References HasBF16PackedInsts.

◆ hasBF16TransInsts()

bool llvm::AMDGPUSubtarget::hasBF16TransInsts ( ) const
inline

Definition at line 230 of file AMDGPUSubtarget.h.

References HasBF16TransInsts.

◆ hasBF8ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasBF8ConversionScaleInsts ( ) const
inline

Definition at line 244 of file AMDGPUSubtarget.h.

References HasBF8ConversionScaleInsts.

◆ hasCvtPkF16F32Inst()

bool llvm::AMDGPUSubtarget::hasCvtPkF16F32Inst ( ) const
inline

Definition at line 256 of file AMDGPUSubtarget.h.

References HasCvtPkF16F32Inst.

◆ hasD16Writes32BitVgpr()

bool AMDGPUSubtarget::hasD16Writes32BitVgpr ( ) const

Definition at line 41 of file AMDGPUSubtarget.cpp.

References EnableD16Writes32BitVgpr.

◆ hasDsSrc2Insts()

bool llvm::AMDGPUSubtarget::hasDsSrc2Insts ( ) const
inline

Definition at line 266 of file AMDGPUSubtarget.h.

References HasDsSrc2Insts.

◆ hasF16BF16ToFP6BF6ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasF16BF16ToFP6BF6ConversionScaleInsts ( ) const
inline

Definition at line 252 of file AMDGPUSubtarget.h.

References HasF16BF16ToFP6BF6ConversionScaleInsts.

◆ hasF32ToF16BF16ConversionSRInsts()

bool llvm::AMDGPUSubtarget::hasF32ToF16BF16ConversionSRInsts ( ) const
inline

Definition at line 258 of file AMDGPUSubtarget.h.

References HasF32ToF16BF16ConversionSRInsts.

◆ hasFastFMAF32()

bool llvm::AMDGPUSubtarget::hasFastFMAF32 ( ) const
inline

Definition at line 302 of file AMDGPUSubtarget.h.

References FastFMAF32.

◆ hasFminFmaxLegacy()

bool llvm::AMDGPUSubtarget::hasFminFmaxLegacy ( ) const
inline

Definition at line 294 of file AMDGPUSubtarget.h.

References HasFminFmaxLegacy.

◆ hasFP4ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasFP4ConversionScaleInsts ( ) const
inline

Definition at line 246 of file AMDGPUSubtarget.h.

References HasFP4ConversionScaleInsts.

◆ hasFP6BF6ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasFP6BF6ConversionScaleInsts ( ) const
inline

Definition at line 248 of file AMDGPUSubtarget.h.

References HasFP6BF6ConversionScaleInsts.

◆ hasFP8ConversionScaleInsts()

bool llvm::AMDGPUSubtarget::hasFP8ConversionScaleInsts ( ) const
inline

Definition at line 242 of file AMDGPUSubtarget.h.

References HasFP8ConversionScaleInsts.

◆ hasInv2PiInlineImm()

bool llvm::AMDGPUSubtarget::hasInv2PiInlineImm ( ) const
inline

Definition at line 290 of file AMDGPUSubtarget.h.

References HasInv2PiInlineImm.

◆ hasMadMacF32Insts()

bool llvm::AMDGPUSubtarget::hasMadMacF32Insts ( ) const
inline

Definition at line 262 of file AMDGPUSubtarget.h.

References HasMadMacF32Insts, and isGCN().

◆ hasMadMixInsts()

bool llvm::AMDGPUSubtarget::hasMadMixInsts ( ) const
inline

Definition at line 238 of file AMDGPUSubtarget.h.

References HasMadMixInsts.

◆ hasMulI24()

bool llvm::AMDGPUSubtarget::hasMulI24 ( ) const
inline

Definition at line 278 of file AMDGPUSubtarget.h.

References HasMulI24.

◆ hasMulU24()

bool llvm::AMDGPUSubtarget::hasMulU24 ( ) const
inline

Definition at line 282 of file AMDGPUSubtarget.h.

References HasMulU24.

◆ hasSDWA()

bool llvm::AMDGPUSubtarget::hasSDWA ( ) const
inline

Definition at line 270 of file AMDGPUSubtarget.h.

References HasSDWA.

◆ hasSMulHi()

bool llvm::AMDGPUSubtarget::hasSMulHi ( ) const
inline

Definition at line 286 of file AMDGPUSubtarget.h.

References HasSMulHi.

◆ hasTrigReducedRange()

bool llvm::AMDGPUSubtarget::hasTrigReducedRange ( ) const
inline

Definition at line 298 of file AMDGPUSubtarget.h.

References HasTrigReducedRange.

◆ hasTrue16BitInsts()

bool llvm::AMDGPUSubtarget::hasTrue16BitInsts ( ) const
inline

Return true if the subtarget supports True16 instructions.

Definition at line 218 of file AMDGPUSubtarget.h.

References HasTrue16BitInsts.

Referenced by useRealTrue16Insts().

◆ hasVOP3PInsts()

bool llvm::AMDGPUSubtarget::hasVOP3PInsts ( ) const
inline

Definition at line 274 of file AMDGPUSubtarget.h.

References HasVOP3PInsts.

◆ hasWavefrontsEvenlySplittingXDim()

bool AMDGPUSubtarget::hasWavefrontsEvenlySplittingXDim ( const Function & F,
bool REquiresUniformYZ = false ) const
Returns
true if F will execute in a manner that leaves the X dimensions of the workitem ID evenly tiling wavefronts - that is, if X / wavefrontsize is uniform. This is true if either the Y and Z block dimensions are known to always be 1 or if the X dimension will always be a power of 2. If RequireUniformYZ is true, it also ensures that the Y and Z workitem IDs will be uniform (so, while a (32, 2, 1) launch with wavesize64 would ordinarily pass this test, it won't with \pRequiresUniformYZ).

This information is currently only gathered from the !reqd_work_group_size metadata on F, but this may be improved in the future.

Definition at line 245 of file AMDGPUSubtarget.cpp.

References llvm::mdconst::extract(), F, getWavefrontSize(), and llvm::isPowerOf2_32().

◆ isAmdHsaOrMesa()

bool llvm::AMDGPUSubtarget::isAmdHsaOrMesa ( const Function & F) const
inline

Definition at line 203 of file AMDGPUSubtarget.h.

References F, isAmdHsaOS(), and isMesaKernel().

◆ isAmdHsaOS()

◆ isAmdPalOS()

bool llvm::AMDGPUSubtarget::isAmdPalOS ( ) const
inline

Definition at line 193 of file AMDGPUSubtarget.h.

References llvm::Triple::AMDPAL.

Referenced by llvm::AMDGPUAsmPrinter::runOnMachineFunction().

◆ isGCN()

bool llvm::AMDGPUSubtarget::isGCN ( ) const
inline

Definition at line 207 of file AMDGPUSubtarget.h.

Referenced by hasMadMacF32Insts().

◆ isGCN3Encoding()

bool llvm::AMDGPUSubtarget::isGCN3Encoding ( ) const
inline

Definition at line 209 of file AMDGPUSubtarget.h.

References GCN3Encoding.

◆ isMesa3DOS()

bool llvm::AMDGPUSubtarget::isMesa3DOS ( ) const
inline

Definition at line 197 of file AMDGPUSubtarget.h.

References llvm::Triple::Mesa3D.

Referenced by llvm::GCNSubtarget::isMesaGfxShader(), and isMesaKernel().

◆ isMesaKernel()

bool AMDGPUSubtarget::isMesaKernel ( const Function & F) const

Definition at line 263 of file AMDGPUSubtarget.cpp.

References F, isMesa3DOS(), and llvm::AMDGPU::isShader().

Referenced by getImplicitArgNumBytes(), and isAmdHsaOrMesa().

◆ isPromoteAllocaEnabled()

bool llvm::AMDGPUSubtarget::isPromoteAllocaEnabled ( ) const
inline

Definition at line 306 of file AMDGPUSubtarget.h.

References EnablePromoteAlloca.

◆ isSingleLaneExecution()

bool AMDGPUSubtarget::isSingleLaneExecution ( const Function & Kernel) const

Return true if only a single workitem can be active in a wave.

Definition at line 275 of file AMDGPUSubtarget.cpp.

References getMaxWorkitemID(), and I.

◆ makeLIDRangeMetadata()

bool AMDGPUSubtarget::makeLIDRangeMetadata ( Instruction * I) const

Creates value range metadata on an workitemid.* intrinsic call or load.

Definition at line 284 of file AMDGPUSubtarget.cpp.

References llvm::MDBuilder::createRange(), llvm::dyn_cast(), F, getFlatWorkGroupSizes(), getReqdWorkGroupSize(), I, llvm::Lower, Range, and llvm::Upper.

◆ useRealTrue16Insts()

bool AMDGPUSubtarget::useRealTrue16Insts ( ) const

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.

Fake True16 instructions are identical to non-fake ones except that they take 32-bit registers as operands and always use their low halves.

Definition at line 37 of file AMDGPUSubtarget.cpp.

References EnableRealTrue16Insts, and hasTrue16BitInsts().

Member Data Documentation

◆ AddressableLocalMemorySize

unsigned llvm::AMDGPUSubtarget::AddressableLocalMemorySize = 0
protected

◆ EnableD16Writes32BitVgpr

bool llvm::AMDGPUSubtarget::EnableD16Writes32BitVgpr = false
protected

Definition at line 62 of file AMDGPUSubtarget.h.

Referenced by hasD16Writes32BitVgpr().

◆ EnablePromoteAlloca

bool llvm::AMDGPUSubtarget::EnablePromoteAlloca = false
protected

Definition at line 76 of file AMDGPUSubtarget.h.

Referenced by isPromoteAllocaEnabled().

◆ EnableRealTrue16Insts

bool llvm::AMDGPUSubtarget::EnableRealTrue16Insts = false
protected

Definition at line 61 of file AMDGPUSubtarget.h.

Referenced by useRealTrue16Insts().

◆ EUsPerCU

unsigned llvm::AMDGPUSubtarget::EUsPerCU = 4
protected

Definition at line 79 of file AMDGPUSubtarget.h.

Referenced by llvm::GCNSubtarget::GCNSubtarget(), and getEUsPerCU().

◆ FastFMAF32

bool llvm::AMDGPUSubtarget::FastFMAF32 = false
protected

Definition at line 78 of file AMDGPUSubtarget.h.

Referenced by hasFastFMAF32().

◆ GCN3Encoding

bool llvm::AMDGPUSubtarget::GCN3Encoding = false
protected

Definition at line 51 of file AMDGPUSubtarget.h.

Referenced by isGCN3Encoding().

◆ Has16BitInsts

bool llvm::AMDGPUSubtarget::Has16BitInsts = false
protected

Definition at line 52 of file AMDGPUSubtarget.h.

Referenced by has16BitInsts().

◆ HasBF16ConversionInsts

bool llvm::AMDGPUSubtarget::HasBF16ConversionInsts = false
protected

Definition at line 64 of file AMDGPUSubtarget.h.

Referenced by hasBF16ConversionInsts().

◆ HasBF16PackedInsts

bool llvm::AMDGPUSubtarget::HasBF16PackedInsts = false
protected

Definition at line 65 of file AMDGPUSubtarget.h.

Referenced by hasBF16PackedInsts().

◆ HasBF16TransInsts

bool llvm::AMDGPUSubtarget::HasBF16TransInsts = false
protected

Definition at line 63 of file AMDGPUSubtarget.h.

Referenced by hasBF16TransInsts().

◆ HasBF8ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasBF8ConversionScaleInsts = false
protected

Definition at line 55 of file AMDGPUSubtarget.h.

Referenced by hasBF8ConversionScaleInsts().

◆ HasCvtPkF16F32Inst

bool llvm::AMDGPUSubtarget::HasCvtPkF16F32Inst = false
protected

Definition at line 59 of file AMDGPUSubtarget.h.

Referenced by hasCvtPkF16F32Inst().

◆ HasDsSrc2Insts

bool llvm::AMDGPUSubtarget::HasDsSrc2Insts = false
protected

Definition at line 68 of file AMDGPUSubtarget.h.

Referenced by hasDsSrc2Insts().

◆ HasF16BF16ToFP6BF6ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasF16BF16ToFP6BF6ConversionScaleInsts = false
protected

Definition at line 58 of file AMDGPUSubtarget.h.

Referenced by hasF16BF16ToFP6BF6ConversionScaleInsts().

◆ HasF32ToF16BF16ConversionSRInsts

bool llvm::AMDGPUSubtarget::HasF32ToF16BF16ConversionSRInsts = false
protected

Definition at line 60 of file AMDGPUSubtarget.h.

Referenced by hasF32ToF16BF16ConversionSRInsts().

◆ HasFminFmaxLegacy

bool llvm::AMDGPUSubtarget::HasFminFmaxLegacy = true
protected

◆ HasFP4ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasFP4ConversionScaleInsts = false
protected

Definition at line 56 of file AMDGPUSubtarget.h.

Referenced by hasFP4ConversionScaleInsts().

◆ HasFP6BF6ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasFP6BF6ConversionScaleInsts = false
protected

Definition at line 57 of file AMDGPUSubtarget.h.

Referenced by hasFP6BF6ConversionScaleInsts().

◆ HasFP8ConversionScaleInsts

bool llvm::AMDGPUSubtarget::HasFP8ConversionScaleInsts = false
protected

Definition at line 54 of file AMDGPUSubtarget.h.

Referenced by hasFP8ConversionScaleInsts().

◆ HasInv2PiInlineImm

bool llvm::AMDGPUSubtarget::HasInv2PiInlineImm = false
protected

Definition at line 74 of file AMDGPUSubtarget.h.

Referenced by hasInv2PiInlineImm().

◆ HasMadMacF32Insts

bool llvm::AMDGPUSubtarget::HasMadMacF32Insts = false
protected

Definition at line 67 of file AMDGPUSubtarget.h.

Referenced by hasMadMacF32Insts().

◆ HasMadMixInsts

bool llvm::AMDGPUSubtarget::HasMadMixInsts = false
protected

Definition at line 66 of file AMDGPUSubtarget.h.

Referenced by hasMadMixInsts().

◆ HasMulI24

bool llvm::AMDGPUSubtarget::HasMulI24 = true
protected

◆ HasMulU24

bool llvm::AMDGPUSubtarget::HasMulU24 = true
protected

◆ HasSDWA

bool llvm::AMDGPUSubtarget::HasSDWA = false
protected

Definition at line 69 of file AMDGPUSubtarget.h.

Referenced by hasSDWA().

◆ HasSMulHi

bool llvm::AMDGPUSubtarget::HasSMulHi = false
protected

◆ HasTrigReducedRange

bool llvm::AMDGPUSubtarget::HasTrigReducedRange = false
protected

Definition at line 77 of file AMDGPUSubtarget.h.

Referenced by hasTrigReducedRange().

◆ HasTrue16BitInsts

bool llvm::AMDGPUSubtarget::HasTrue16BitInsts = false
protected

Definition at line 53 of file AMDGPUSubtarget.h.

Referenced by hasTrue16BitInsts().

◆ HasVOP3PInsts

bool llvm::AMDGPUSubtarget::HasVOP3PInsts = false
protected

Definition at line 70 of file AMDGPUSubtarget.h.

Referenced by hasVOP3PInsts().

◆ LocalMemorySize

unsigned llvm::AMDGPUSubtarget::LocalMemorySize = 0
protected

◆ MaxWavesPerEU

unsigned llvm::AMDGPUSubtarget::MaxWavesPerEU = 10
protected

Definition at line 80 of file AMDGPUSubtarget.h.

Referenced by llvm::GCNSubtarget::GCNSubtarget(), and getMaxWavesPerEU().

◆ WavefrontSizeLog2

char llvm::AMDGPUSubtarget::WavefrontSizeLog2 = 0
protected

The documentation for this class was generated from the following files: