29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
71 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
72 ? Def->getOperand(1).getReg()
76bool AMDGPUInstructionSelector::isVCC(
Register Reg,
82 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
84 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
86 const LLT Ty =
MRI.getType(Reg);
90 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
94 const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);
95 return RB->
getID() == AMDGPU::VCCRegBankID;
98bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
99 unsigned NewOpc)
const {
100 MI.setDesc(TII.get(NewOpc));
115 if (!DstRC || DstRC != SrcRC)
122bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
125 I.setDesc(TII.get(TargetOpcode::COPY));
132 if (isVCC(DstReg, *MRI)) {
133 if (SrcReg == AMDGPU::SCC) {
141 if (!isVCC(SrcReg, *MRI)) {
149 std::optional<ValueAndVReg> ConstVal =
153 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
155 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
157 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
165 const int64_t NoMods = 0;
166 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
172 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
180 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
185 And.setOperandDead(3);
187 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
193 if (!
MRI->getRegClassOrNull(SrcReg))
194 MRI->setRegClass(SrcReg, SrcRC);
208 if (MO.getReg().isPhysical())
220bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
225 STI.
isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
227 .
addReg(
I.getOperand(1).getReg())
232 Register DstReg =
I.getOperand(0).getReg();
239bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
243 Register DstReg =
I.getOperand(0).getReg();
244 Register SrcReg =
I.getOperand(1).getReg();
245 std::optional<ValueAndVReg> Arg =
249 const int64_t
Value = Arg->
Value.getZExtValue();
251 unsigned Opcode = STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
264 unsigned SelectOpcode =
265 STI.
isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
274bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
275 Register DstReg =
I.getOperand(0).getReg();
276 Register SrcReg =
I.getOperand(1).getReg();
281 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
288bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
289 const Register DefReg =
I.getOperand(0).getReg();
290 const LLT DefTy =
MRI->getType(DefReg);
302 MRI->getRegClassOrRegBank(DefReg);
305 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
312 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
322 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
323 const Register SrcReg =
I.getOperand(i).getReg();
327 const LLT SrcTy =
MRI->getType(SrcReg);
335 I.setDesc(TII.get(TargetOpcode::PHI));
342 unsigned SubIdx)
const {
346 Register DstReg =
MRI->createVirtualRegister(&SubRC);
349 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
351 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
352 .
addReg(Reg, 0, ComposedSubIdx);
377 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
379 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
381 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
387bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
388 Register DstReg =
I.getOperand(0).getReg();
392 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
393 DstRB->
getID() != AMDGPU::VCCRegBankID)
396 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
408bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
411 Register DstReg =
I.getOperand(0).getReg();
413 LLT Ty =
MRI->getType(DstReg);
419 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
420 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
424 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
427 .
add(
I.getOperand(1))
428 .
add(
I.getOperand(2))
435 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
436 I.setDesc(TII.get(Opc));
442 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
448 .
add(
I.getOperand(1))
449 .
add(
I.getOperand(2))
455 assert(!Sub &&
"illegal sub should not reach here");
458 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
460 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
462 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
463 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
464 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
465 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
467 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
468 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
471 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
474 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
480 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
481 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
497 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
511bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
516 Register Dst0Reg =
I.getOperand(0).getReg();
517 Register Dst1Reg =
I.getOperand(1).getReg();
518 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
519 I.getOpcode() == AMDGPU::G_UADDE;
520 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
521 I.getOpcode() == AMDGPU::G_USUBE;
523 if (isVCC(Dst1Reg, *MRI)) {
524 unsigned NoCarryOpc =
525 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
526 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
527 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
533 Register Src0Reg =
I.getOperand(2).getReg();
534 Register Src1Reg =
I.getOperand(3).getReg();
537 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
538 .
addReg(
I.getOperand(4).getReg());
541 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
542 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
544 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
545 .
add(
I.getOperand(2))
546 .
add(
I.getOperand(3));
548 if (
MRI->use_nodbg_empty(Dst1Reg)) {
551 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
553 if (!
MRI->getRegClassOrNull(Dst1Reg))
554 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
564 AMDGPU::SReg_32RegClass, *MRI))
571bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
575 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
579 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
580 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
582 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
583 I.setDesc(TII.get(Opc));
585 I.addImplicitDefUseOperands(*
MF);
590bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
592 Register DstReg =
I.getOperand(0).getReg();
593 Register SrcReg =
I.getOperand(1).getReg();
594 LLT DstTy =
MRI->getType(DstReg);
595 LLT SrcTy =
MRI->getType(SrcReg);
600 unsigned Offset =
I.getOperand(2).getImm();
601 if (
Offset % 32 != 0 || DstSize > 128)
621 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
626 *SrcRC,
I.getOperand(1));
628 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
635bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
638 LLT DstTy =
MRI->getType(DstReg);
639 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
655 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
656 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
670 MI.eraseFromParent();
674bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
676 const int NumDst =
MI.getNumOperands() - 1;
682 LLT DstTy =
MRI->getType(DstReg0);
683 LLT SrcTy =
MRI->getType(SrcReg);
699 for (
int I = 0, E = NumDst;
I != E; ++
I) {
701 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
702 .
addReg(SrcReg, 0, SubRegs[
I]);
705 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
715 MI.eraseFromParent();
719bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
720 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
721 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
725 LLT SrcTy =
MRI->getType(Src0);
729 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
730 return selectG_MERGE_VALUES(
MI);
737 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
742 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
745 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
746 DstBank->
getID() == AMDGPU::VGPRRegBankID);
747 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
760 const int64_t K0 = ConstSrc0->Value.getSExtValue();
761 const int64_t K1 = ConstSrc1->Value.getSExtValue();
769 MI.eraseFromParent();
775 MI.eraseFromParent();
787 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
788 MI.setDesc(TII.get(AMDGPU::COPY));
791 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
798 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
799 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
805 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
812 MI.eraseFromParent();
837 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
838 if (Shift0 && Shift1) {
839 Opc = AMDGPU::S_PACK_HH_B32_B16;
840 MI.getOperand(1).setReg(ShiftSrc0);
841 MI.getOperand(2).setReg(ShiftSrc1);
843 Opc = AMDGPU::S_PACK_LH_B32_B16;
844 MI.getOperand(2).setReg(ShiftSrc1);
848 if (ConstSrc1 && ConstSrc1->Value == 0) {
850 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
855 MI.eraseFromParent();
859 Opc = AMDGPU::S_PACK_HL_B32_B16;
860 MI.getOperand(1).setReg(ShiftSrc0);
864 MI.setDesc(TII.get(Opc));
868bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
874 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
876 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
883bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
886 Register DstReg =
I.getOperand(0).getReg();
887 Register Src0Reg =
I.getOperand(1).getReg();
888 Register Src1Reg =
I.getOperand(2).getReg();
889 LLT Src1Ty =
MRI->getType(Src1Reg);
891 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
894 int64_t
Offset =
I.getOperand(3).getImm();
897 if (
Offset % 32 != 0 || InsSize % 32 != 0)
905 if (
SubReg == AMDGPU::NoSubRegister)
923 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
924 if (!Src0RC || !Src1RC)
933 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
942bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
945 Register OffsetReg =
MI.getOperand(2).getReg();
946 Register WidthReg =
MI.getOperand(3).getReg();
949 "scalar BFX instructions are expanded in regbankselect");
950 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
951 "64-bit vector BFX instructions are expanded in regbankselect");
956 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
957 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
962 MI.eraseFromParent();
966bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
985 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
991 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1007 MI.eraseFromParent();
1016bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1025 Register LaneSelect =
MI.getOperand(3).getReg();
1028 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1030 std::optional<ValueAndVReg> ConstSelect =
1036 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1039 std::optional<ValueAndVReg> ConstVal =
1046 MIB.
addImm(ConstVal->Value.getSExtValue());
1056 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1064 MI.eraseFromParent();
1070bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1074 LLT Ty =
MRI->getType(Dst0);
1077 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1079 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1090 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1092 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1105 MI.eraseFromParent();
1109bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1110 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1111 switch (IntrinsicID) {
1112 case Intrinsic::amdgcn_if_break: {
1117 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1118 .
add(
I.getOperand(0))
1119 .
add(
I.getOperand(2))
1120 .
add(
I.getOperand(3));
1122 Register DstReg =
I.getOperand(0).getReg();
1123 Register Src0Reg =
I.getOperand(2).getReg();
1124 Register Src1Reg =
I.getOperand(3).getReg();
1126 I.eraseFromParent();
1128 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1133 case Intrinsic::amdgcn_interp_p1_f16:
1134 return selectInterpP1F16(
I);
1135 case Intrinsic::amdgcn_wqm:
1136 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1137 case Intrinsic::amdgcn_softwqm:
1138 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1139 case Intrinsic::amdgcn_strict_wwm:
1140 case Intrinsic::amdgcn_wwm:
1141 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1142 case Intrinsic::amdgcn_strict_wqm:
1143 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1144 case Intrinsic::amdgcn_writelane:
1145 return selectWritelane(
I);
1146 case Intrinsic::amdgcn_div_scale:
1147 return selectDivScale(
I);
1148 case Intrinsic::amdgcn_icmp:
1149 case Intrinsic::amdgcn_fcmp:
1152 return selectIntrinsicCmp(
I);
1153 case Intrinsic::amdgcn_ballot:
1154 return selectBallot(
I);
1155 case Intrinsic::amdgcn_reloc_constant:
1156 return selectRelocConstant(
I);
1157 case Intrinsic::amdgcn_groupstaticsize:
1158 return selectGroupStaticSize(
I);
1159 case Intrinsic::returnaddress:
1160 return selectReturnAddress(
I);
1161 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1162 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1163 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1164 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1165 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1166 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1167 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1168 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1169 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1170 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1171 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1172 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1173 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1174 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1175 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1176 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1177 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1178 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1179 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1180 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1181 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1182 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1183 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1184 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1185 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1186 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1187 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1188 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1189 return selectSMFMACIntrin(
I);
1190 case Intrinsic::amdgcn_permlane16_swap:
1191 case Intrinsic::amdgcn_permlane32_swap:
1192 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1203 if (
Size == 16 && !ST.has16BitInsts())
1206 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1207 unsigned FakeS16Opc,
unsigned S32Opc,
1211 return ST.hasTrue16BitInsts()
1212 ? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc
1223 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1224 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1225 AMDGPU::V_CMP_NE_U64_e64);
1227 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1228 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1229 AMDGPU::V_CMP_EQ_U64_e64);
1231 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1232 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1233 AMDGPU::V_CMP_GT_I64_e64);
1235 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1236 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1237 AMDGPU::V_CMP_GE_I64_e64);
1239 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1240 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1241 AMDGPU::V_CMP_LT_I64_e64);
1243 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1244 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1245 AMDGPU::V_CMP_LE_I64_e64);
1247 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1248 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1249 AMDGPU::V_CMP_GT_U64_e64);
1251 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1252 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1253 AMDGPU::V_CMP_GE_U64_e64);
1255 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1256 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1257 AMDGPU::V_CMP_LT_U64_e64);
1259 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1260 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1261 AMDGPU::V_CMP_LE_U64_e64);
1264 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1265 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1266 AMDGPU::V_CMP_EQ_F64_e64);
1268 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1269 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1270 AMDGPU::V_CMP_GT_F64_e64);
1272 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1273 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1274 AMDGPU::V_CMP_GE_F64_e64);
1276 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1277 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1278 AMDGPU::V_CMP_LT_F64_e64);
1280 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1281 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1282 AMDGPU::V_CMP_LE_F64_e64);
1284 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1285 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1286 AMDGPU::V_CMP_NEQ_F64_e64);
1288 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1289 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1290 AMDGPU::V_CMP_O_F64_e64);
1292 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1293 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1294 AMDGPU::V_CMP_U_F64_e64);
1296 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1297 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1298 AMDGPU::V_CMP_NLG_F64_e64);
1300 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1301 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1302 AMDGPU::V_CMP_NLE_F64_e64);
1304 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1305 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1306 AMDGPU::V_CMP_NLT_F64_e64);
1308 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1309 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1310 AMDGPU::V_CMP_NGE_F64_e64);
1312 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1313 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1314 AMDGPU::V_CMP_NGT_F64_e64);
1316 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1317 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1318 AMDGPU::V_CMP_NEQ_F64_e64);
1320 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1321 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1322 AMDGPU::V_CMP_TRU_F64_e64);
1324 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1325 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1326 AMDGPU::V_CMP_F_F64_e64);
1331 unsigned Size)
const {
1338 return AMDGPU::S_CMP_LG_U64;
1340 return AMDGPU::S_CMP_EQ_U64;
1349 return AMDGPU::S_CMP_LG_U32;
1351 return AMDGPU::S_CMP_EQ_U32;
1353 return AMDGPU::S_CMP_GT_I32;
1355 return AMDGPU::S_CMP_GE_I32;
1357 return AMDGPU::S_CMP_LT_I32;
1359 return AMDGPU::S_CMP_LE_I32;
1361 return AMDGPU::S_CMP_GT_U32;
1363 return AMDGPU::S_CMP_GE_U32;
1365 return AMDGPU::S_CMP_LT_U32;
1367 return AMDGPU::S_CMP_LE_U32;
1369 return AMDGPU::S_CMP_EQ_F32;
1371 return AMDGPU::S_CMP_GT_F32;
1373 return AMDGPU::S_CMP_GE_F32;
1375 return AMDGPU::S_CMP_LT_F32;
1377 return AMDGPU::S_CMP_LE_F32;
1379 return AMDGPU::S_CMP_LG_F32;
1381 return AMDGPU::S_CMP_O_F32;
1383 return AMDGPU::S_CMP_U_F32;
1385 return AMDGPU::S_CMP_NLG_F32;
1387 return AMDGPU::S_CMP_NLE_F32;
1389 return AMDGPU::S_CMP_NLT_F32;
1391 return AMDGPU::S_CMP_NGE_F32;
1393 return AMDGPU::S_CMP_NGT_F32;
1395 return AMDGPU::S_CMP_NEQ_F32;
1407 return AMDGPU::S_CMP_EQ_F16;
1409 return AMDGPU::S_CMP_GT_F16;
1411 return AMDGPU::S_CMP_GE_F16;
1413 return AMDGPU::S_CMP_LT_F16;
1415 return AMDGPU::S_CMP_LE_F16;
1417 return AMDGPU::S_CMP_LG_F16;
1419 return AMDGPU::S_CMP_O_F16;
1421 return AMDGPU::S_CMP_U_F16;
1423 return AMDGPU::S_CMP_NLG_F16;
1425 return AMDGPU::S_CMP_NLE_F16;
1427 return AMDGPU::S_CMP_NLT_F16;
1429 return AMDGPU::S_CMP_NGE_F16;
1431 return AMDGPU::S_CMP_NGT_F16;
1433 return AMDGPU::S_CMP_NEQ_F16;
1442bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1447 Register SrcReg =
I.getOperand(2).getReg();
1452 Register CCReg =
I.getOperand(0).getReg();
1453 if (!isVCC(CCReg, *MRI)) {
1454 int Opcode = getS_CMPOpcode(Pred,
Size);
1458 .
add(
I.getOperand(2))
1459 .
add(
I.getOperand(3));
1460 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1465 I.eraseFromParent();
1469 if (
I.getOpcode() == AMDGPU::G_FCMP)
1477 I.getOperand(0).getReg())
1478 .
add(
I.getOperand(2))
1479 .
add(
I.getOperand(3));
1483 I.eraseFromParent();
1487bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1488 Register Dst =
I.getOperand(0).getReg();
1489 if (isVCC(Dst, *MRI))
1492 LLT DstTy =
MRI->getType(Dst);
1498 Register SrcReg =
I.getOperand(2).getReg();
1507 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1508 I.eraseFromParent();
1519 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1520 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1522 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1524 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1525 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1527 SelectedMI.
addImm(Src0Mods);
1528 SelectedMI.
addReg(Src0Reg);
1530 SelectedMI.
addImm(Src1Mods);
1531 SelectedMI.
addReg(Src1Reg);
1541 I.eraseFromParent();
1552 if (
MI->getParent() !=
MBB)
1556 if (
MI->getOpcode() == AMDGPU::COPY) {
1557 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1558 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1559 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1560 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1565 if (isa<GAnyCmp>(
MI))
1577bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1580 Register DstReg =
I.getOperand(0).getReg();
1581 Register SrcReg =
I.getOperand(2).getReg();
1582 const unsigned BallotSize =
MRI->getType(DstReg).getSizeInBits();
1587 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1590 std::optional<ValueAndVReg> Arg =
1595 if (BallotSize != WaveSize) {
1600 const int64_t
Value = Arg->
Value.getZExtValue();
1603 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1620 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1631 if (BallotSize != WaveSize) {
1632 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1634 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1641 I.eraseFromParent();
1645bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1646 Register DstReg =
I.getOperand(0).getReg();
1652 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1657 auto *RelocSymbol = cast<GlobalVariable>(
1662 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1665 I.eraseFromParent();
1669bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1672 Register DstReg =
I.getOperand(0).getReg();
1674 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1675 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1692 I.eraseFromParent();
1696bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1703 unsigned Depth =
I.getOperand(2).getImm();
1716 I.eraseFromParent();
1727 AMDGPU::SReg_64RegClass,
DL);
1730 I.eraseFromParent();
1734bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1738 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1739 .
add(
MI.getOperand(1));
1742 MI.eraseFromParent();
1744 if (!
MRI->getRegClassOrNull(Reg))
1749bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1755 unsigned IndexOperand =
MI.getOperand(7).getImm();
1756 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1757 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1759 if (WaveDone && !WaveRelease)
1762 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1763 IndexOperand &= ~0x3f;
1764 unsigned CountDw = 0;
1767 CountDw = (IndexOperand >> 24) & 0xf;
1768 IndexOperand &= ~(0xf << 24);
1770 if (CountDw < 1 || CountDw > 4) {
1772 "ds_ordered_count: dword count must be between 1 and 4");
1779 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1782 unsigned Offset0 = OrderedCountIndex << 2;
1783 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1786 Offset1 |= (CountDw - 1) << 6;
1789 Offset1 |= ShaderType << 2;
1791 unsigned Offset = Offset0 | (Offset1 << 8);
1800 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1809 MI.eraseFromParent();
1815 case Intrinsic::amdgcn_ds_gws_init:
1816 return AMDGPU::DS_GWS_INIT;
1817 case Intrinsic::amdgcn_ds_gws_barrier:
1818 return AMDGPU::DS_GWS_BARRIER;
1819 case Intrinsic::amdgcn_ds_gws_sema_v:
1820 return AMDGPU::DS_GWS_SEMA_V;
1821 case Intrinsic::amdgcn_ds_gws_sema_br:
1822 return AMDGPU::DS_GWS_SEMA_BR;
1823 case Intrinsic::amdgcn_ds_gws_sema_p:
1824 return AMDGPU::DS_GWS_SEMA_P;
1825 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1826 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1832bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1834 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1839 const bool HasVSrc =
MI.getNumOperands() == 3;
1840 assert(HasVSrc ||
MI.getNumOperands() == 2);
1842 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1844 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1858 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1859 Readfirstlane = OffsetDef;
1864 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1874 std::tie(BaseOffset, ImmOffset) =
1877 if (Readfirstlane) {
1887 AMDGPU::SReg_32RegClass, *MRI))
1891 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1919 MI.eraseFromParent();
1923bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1924 bool IsAppend)
const {
1925 Register PtrBase =
MI.getOperand(2).getReg();
1926 LLT PtrTy =
MRI->getType(PtrBase);
1930 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1933 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1934 PtrBase =
MI.getOperand(2).getReg();
1940 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1951 MI.eraseFromParent();
1955bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
1963bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1970 if (IntrinsicID == Intrinsic::amdgcn_s_barrier ||
1971 IntrinsicID == Intrinsic::amdgcn_s_barrier_wait) {
1976 MI.eraseFromParent();
1981 if (STI.
hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
1989 MI.eraseFromParent();
2001 TFE = (TexFailCtrl & 0x1) ?
true :
false;
2003 LWE = (TexFailCtrl & 0x2) ?
true :
false;
2006 return TexFailCtrl == 0;
2009bool AMDGPUInstructionSelector::selectImageIntrinsic(
2018 unsigned IntrOpcode =
Intr->BaseOpcode;
2023 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2027 int NumVDataDwords = -1;
2028 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2029 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2035 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
2039 bool IsTexFail =
false;
2041 TFE, LWE, IsTexFail))
2044 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
2045 const bool IsA16 = (
Flags & 1) != 0;
2046 const bool IsG16 = (
Flags & 2) != 0;
2049 if (IsA16 && !STI.
hasG16() && !IsG16)
2053 unsigned DMaskLanes = 0;
2055 if (BaseOpcode->
Atomic) {
2056 VDataOut =
MI.getOperand(0).getReg();
2057 VDataIn =
MI.getOperand(2).getReg();
2058 LLT Ty =
MRI->getType(VDataIn);
2061 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2066 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2068 DMask = Is64Bit ? 0xf : 0x3;
2069 NumVDataDwords = Is64Bit ? 4 : 2;
2071 DMask = Is64Bit ? 0x3 : 0x1;
2072 NumVDataDwords = Is64Bit ? 2 : 1;
2075 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
2078 if (BaseOpcode->
Store) {
2079 VDataIn =
MI.getOperand(1).getReg();
2080 VDataTy =
MRI->getType(VDataIn);
2085 VDataOut =
MI.getOperand(0).getReg();
2086 VDataTy =
MRI->getType(VDataOut);
2087 NumVDataDwords = DMaskLanes;
2090 NumVDataDwords = (DMaskLanes + 1) / 2;
2095 if (Subtarget->
hasG16() && IsG16) {
2099 IntrOpcode = G16MappingInfo->
G16;
2103 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2105 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
2112 int NumVAddrRegs = 0;
2113 int NumVAddrDwords = 0;
2114 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
2117 if (!AddrOp.
isReg())
2125 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
2132 NumVAddrRegs != 1 &&
2134 : NumVAddrDwords == NumVAddrRegs);
2135 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2146 NumVDataDwords, NumVAddrDwords);
2147 }
else if (IsGFX11Plus) {
2149 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2150 : AMDGPU::MIMGEncGfx11Default,
2151 NumVDataDwords, NumVAddrDwords);
2152 }
else if (IsGFX10Plus) {
2154 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2155 : AMDGPU::MIMGEncGfx10Default,
2156 NumVDataDwords, NumVAddrDwords);
2160 NumVDataDwords, NumVAddrDwords);
2164 <<
"requested image instruction is not supported on this GPU\n");
2171 NumVDataDwords, NumVAddrDwords);
2174 NumVDataDwords, NumVAddrDwords);
2184 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
2187 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2188 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2191 if (!
MRI->use_empty(VDataOut)) {
2204 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2206 if (
SrcOp.isReg()) {
2212 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2214 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2225 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2227 MIB.
addImm(IsA16 ? -1 : 0);
2241 MIB.
addImm(IsD16 ? -1 : 0);
2243 MI.eraseFromParent();
2251bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2262 unsigned Offset =
MI.getOperand(6).getImm();
2264 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2272 MI.eraseFromParent();
2276bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2278 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2279 switch (IntrinsicID) {
2280 case Intrinsic::amdgcn_end_cf:
2281 return selectEndCfIntrinsic(
I);
2282 case Intrinsic::amdgcn_ds_ordered_add:
2283 case Intrinsic::amdgcn_ds_ordered_swap:
2284 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2285 case Intrinsic::amdgcn_ds_gws_init:
2286 case Intrinsic::amdgcn_ds_gws_barrier:
2287 case Intrinsic::amdgcn_ds_gws_sema_v:
2288 case Intrinsic::amdgcn_ds_gws_sema_br:
2289 case Intrinsic::amdgcn_ds_gws_sema_p:
2290 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2291 return selectDSGWSIntrinsic(
I, IntrinsicID);
2292 case Intrinsic::amdgcn_ds_append:
2293 return selectDSAppendConsume(
I,
true);
2294 case Intrinsic::amdgcn_ds_consume:
2295 return selectDSAppendConsume(
I,
false);
2296 case Intrinsic::amdgcn_init_whole_wave:
2297 return selectInitWholeWave(
I);
2298 case Intrinsic::amdgcn_s_barrier:
2299 case Intrinsic::amdgcn_s_barrier_signal:
2300 case Intrinsic::amdgcn_s_barrier_wait:
2301 return selectSBarrier(
I);
2302 case Intrinsic::amdgcn_raw_buffer_load_lds:
2303 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2304 case Intrinsic::amdgcn_struct_buffer_load_lds:
2305 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2306 return selectBufferLoadLds(
I);
2307 case Intrinsic::amdgcn_global_load_lds:
2308 return selectGlobalLoadLds(
I);
2309 case Intrinsic::amdgcn_exp_compr:
2313 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2314 F.getContext().diagnose(NoFpRet);
2318 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2319 return selectDSBvhStackIntrinsic(
I);
2320 case Intrinsic::amdgcn_s_barrier_init:
2321 case Intrinsic::amdgcn_s_barrier_signal_var:
2322 return selectNamedBarrierInit(
I, IntrinsicID);
2323 case Intrinsic::amdgcn_s_barrier_join:
2324 case Intrinsic::amdgcn_s_get_named_barrier_state:
2325 return selectNamedBarrierInst(
I, IntrinsicID);
2326 case Intrinsic::amdgcn_s_get_barrier_state:
2327 return selectSGetBarrierState(
I, IntrinsicID);
2328 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2329 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2334bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2341 Register DstReg =
I.getOperand(0).getReg();
2346 if (!isVCC(CCReg, *MRI)) {
2347 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2348 AMDGPU::S_CSELECT_B32;
2355 if (!
MRI->getRegClassOrNull(CCReg))
2358 .
add(
I.getOperand(2))
2359 .
add(
I.getOperand(3));
2364 I.eraseFromParent();
2373 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2375 .
add(
I.getOperand(3))
2377 .
add(
I.getOperand(2))
2378 .
add(
I.getOperand(1));
2381 I.eraseFromParent();
2385bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2386 Register DstReg =
I.getOperand(0).getReg();
2387 Register SrcReg =
I.getOperand(1).getReg();
2388 const LLT DstTy =
MRI->getType(DstReg);
2389 const LLT SrcTy =
MRI->getType(SrcReg);
2404 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2413 if (!SrcRC || !DstRC)
2422 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2427 .
addReg(SrcReg, 0, AMDGPU::lo16);
2428 I.eraseFromParent();
2436 Register LoReg =
MRI->createVirtualRegister(DstRC);
2437 Register HiReg =
MRI->createVirtualRegister(DstRC);
2439 .
addReg(SrcReg, 0, AMDGPU::sub0);
2441 .
addReg(SrcReg, 0, AMDGPU::sub1);
2443 if (IsVALU && STI.
hasSDWA()) {
2447 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2457 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2458 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2459 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2461 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2471 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2472 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2473 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2485 And.setOperandDead(3);
2486 Or.setOperandDead(3);
2490 I.eraseFromParent();
2498 unsigned SubRegIdx =
2500 if (SubRegIdx == AMDGPU::NoSubRegister)
2506 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2510 if (SrcWithSubRC != SrcRC) {
2515 I.getOperand(1).setSubReg(SubRegIdx);
2518 I.setDesc(TII.get(TargetOpcode::COPY));
2524 Mask = maskTrailingOnes<unsigned>(
Size);
2525 int SignedMask =
static_cast<int>(Mask);
2526 return SignedMask >= -16 && SignedMask <= 64;
2530const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2534 if (
auto *RB = dyn_cast<const RegisterBank *>(RegClassOrBank))
2538 if (
auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
2543bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2544 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2545 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2548 const Register DstReg =
I.getOperand(0).getReg();
2549 const Register SrcReg =
I.getOperand(1).getReg();
2551 const LLT DstTy =
MRI->getType(DstReg);
2552 const LLT SrcTy =
MRI->getType(SrcReg);
2553 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2560 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2563 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2565 return selectCOPY(
I);
2568 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2571 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2573 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2574 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2580 I.eraseFromParent();
2586 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2596 I.eraseFromParent();
2600 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2606 I.eraseFromParent();
2610 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2612 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2616 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2617 const unsigned SextOpc = SrcSize == 8 ?
2618 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2621 I.eraseFromParent();
2627 if (DstSize > 32 && SrcSize == 32) {
2628 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2629 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2644 I.eraseFromParent();
2649 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2650 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2653 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2655 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2656 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2657 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2659 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2670 I.eraseFromParent();
2686 I.eraseFromParent();
2721 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2728 assert(Mask.size() == 2);
2730 if (Mask[0] == 1 && Mask[1] <= 1) {
2738bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2742 Register Dst =
I.getOperand(0).getReg();
2744 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2747 Register Src =
I.getOperand(1).getReg();
2753 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2755 I.eraseFromParent();
2763bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2777 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2792 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2793 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2794 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2795 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2797 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2798 .
addReg(Src, 0, AMDGPU::sub0);
2799 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2800 .
addReg(Src, 0, AMDGPU::sub1);
2801 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2805 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2810 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2815 MI.eraseFromParent();
2820bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2823 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2830 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2831 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2832 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2833 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2839 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2840 .
addReg(Src, 0, AMDGPU::sub0);
2841 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2842 .
addReg(Src, 0, AMDGPU::sub1);
2843 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2848 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2852 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2858 MI.eraseFromParent();
2863 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2866void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2869 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2871 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2875 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2880 for (
unsigned i = 1; i != 3; ++i) {
2887 assert(GEPInfo.Imm == 0);
2892 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2893 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2895 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2899 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2902bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2903 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2906bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2907 if (!
MI.hasOneMemOperand())
2917 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2918 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2924 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2926 AMDGPU::SGPRRegBankID;
2929 return I &&
I->getMetadata(
"amdgpu.uniform");
2933 for (
const GEPInfo &GEPInfo : AddrInfo) {
2934 if (!GEPInfo.VgprParts.empty())
2940void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2941 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2948 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2953bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2960 if (Reg.isPhysical())
2964 const unsigned Opcode =
MI.getOpcode();
2966 if (Opcode == AMDGPU::COPY)
2969 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2970 Opcode == AMDGPU::G_XOR)
2974 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2975 return GI->is(Intrinsic::amdgcn_class);
2977 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2980bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2995 if (!isVCC(CondReg, *MRI)) {
2999 CondPhysReg = AMDGPU::SCC;
3000 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3001 ConstrainRC = &AMDGPU::SReg_32RegClass;
3009 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3010 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3013 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3020 CondPhysReg =
TRI.getVCC();
3021 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3022 ConstrainRC =
TRI.getBoolRC();
3025 if (!
MRI->getRegClassOrNull(CondReg))
3026 MRI->setRegClass(CondReg, ConstrainRC);
3028 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3031 .
addMBB(
I.getOperand(1).getMBB());
3033 I.eraseFromParent();
3037bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3039 Register DstReg =
I.getOperand(0).getReg();
3041 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3042 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3047 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3050bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3051 Register DstReg =
I.getOperand(0).getReg();
3052 Register SrcReg =
I.getOperand(1).getReg();
3053 Register MaskReg =
I.getOperand(2).getReg();
3054 LLT Ty =
MRI->getType(DstReg);
3055 LLT MaskTy =
MRI->getType(MaskReg);
3062 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3072 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3073 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3076 !CanCopyLow32 && !CanCopyHi32) {
3077 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3081 I.eraseFromParent();
3085 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3087 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3092 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3101 "ptrmask should have been narrowed during legalize");
3103 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3109 I.eraseFromParent();
3113 Register HiReg =
MRI->createVirtualRegister(&RegRC);
3114 Register LoReg =
MRI->createVirtualRegister(&RegRC);
3117 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3118 .
addReg(SrcReg, 0, AMDGPU::sub0);
3119 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3120 .
addReg(SrcReg, 0, AMDGPU::sub1);
3129 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
3130 MaskedLo =
MRI->createVirtualRegister(&RegRC);
3132 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3133 .
addReg(MaskReg, 0, AMDGPU::sub0);
3134 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3143 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
3144 MaskedHi =
MRI->createVirtualRegister(&RegRC);
3146 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3147 .
addReg(MaskReg, 0, AMDGPU::sub1);
3148 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3153 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3158 I.eraseFromParent();
3164static std::pair<Register, unsigned>
3171 std::tie(IdxBaseReg,
Offset) =
3173 if (IdxBaseReg == AMDGPU::NoRegister) {
3177 IdxBaseReg = IdxReg;
3184 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3185 return std::pair(IdxReg, SubRegs[0]);
3186 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3189bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3195 LLT DstTy =
MRI->getType(DstReg);
3196 LLT SrcTy =
MRI->getType(SrcReg);
3204 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3208 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3210 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3211 if (!SrcRC || !DstRC)
3226 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3230 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3233 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3237 MI.eraseFromParent();
3245 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3247 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3250 MI.eraseFromParent();
3261 MI.eraseFromParent();
3266bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3273 LLT VecTy =
MRI->getType(DstReg);
3274 LLT ValTy =
MRI->getType(ValReg);
3286 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3290 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3292 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3300 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3304 std::tie(IdxReg,
SubReg) =
3307 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3314 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3318 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3323 MI.eraseFromParent();
3335 MI.eraseFromParent();
3339bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3342 unsigned Size =
MI.getOperand(3).getImm();
3345 const bool HasVIndex =
MI.getNumOperands() == 9;
3349 VIndex =
MI.getOperand(4).getReg();
3353 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3354 std::optional<ValueAndVReg> MaybeVOffset =
3356 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3362 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3363 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3364 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3365 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3368 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3369 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3370 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3371 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3374 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3375 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3376 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3377 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3383 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3384 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3385 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3386 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3392 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3393 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3394 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3395 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3402 .
add(
MI.getOperand(2));
3406 if (HasVIndex && HasVOffset) {
3407 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3408 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3415 }
else if (HasVIndex) {
3417 }
else if (HasVOffset) {
3421 MIB.
add(
MI.getOperand(1));
3422 MIB.
add(
MI.getOperand(5 + OpOffset));
3423 MIB.
add(
MI.getOperand(6 + OpOffset));
3425 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3435 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3437 StorePtrI.
V =
nullptr;
3451 MI.eraseFromParent();
3463 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3466 assert(Def->getNumOperands() == 3 &&
3469 return Def->getOperand(1).getReg();
3475bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3477 unsigned Size =
MI.getOperand(3).getImm();
3483 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3486 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3489 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3494 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3499 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3506 .
add(
MI.getOperand(2));
3512 if (!isSGPR(
Addr)) {
3514 if (isSGPR(AddrDef->Reg)) {
3515 Addr = AddrDef->Reg;
3516 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3519 if (isSGPR(SAddr)) {
3520 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3532 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3544 MIB.
add(
MI.getOperand(4))
3545 .
add(
MI.getOperand(5));
3549 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3559 sizeof(int32_t),
Align(4));
3563 MI.eraseFromParent();
3567bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3568 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3569 MI.removeOperand(1);
3570 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3576bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3579 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3580 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3582 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3583 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3585 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3586 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3588 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3589 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3591 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3592 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3594 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3595 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3597 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3598 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3600 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3601 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3603 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3604 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3606 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3607 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3609 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3610 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3612 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3613 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3615 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3616 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3618 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3619 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3621 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3622 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3624 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3625 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3627 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3628 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3630 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3631 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3633 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3634 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3636 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3637 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3639 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3640 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3642 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3643 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3645 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3646 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3648 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3649 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3651 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3652 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3654 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3655 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3657 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3658 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3660 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3661 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3667 auto VDst_In =
MI.getOperand(4);
3669 MI.setDesc(TII.get(Opc));
3670 MI.removeOperand(4);
3671 MI.removeOperand(1);
3672 MI.addOperand(VDst_In);
3673 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3677bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3679 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3682 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3686 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3687 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3688 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3690 MI.removeOperand(2);
3691 MI.setDesc(TII.get(Opcode));
3700bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3704 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3709 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3720 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3724 MI.eraseFromParent();
3733 unsigned NumOpcodes = 0;
3746 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3757 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3771 if (Src.size() == 3) {
3778 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3779 if (Src[
I] ==
LHS) {
3789 Bits = SrcBits[Src.size()];
3795 switch (
MI->getOpcode()) {
3796 case TargetOpcode::G_AND:
3797 case TargetOpcode::G_OR:
3798 case TargetOpcode::G_XOR: {
3803 if (!getOperandBits(
LHS, LHSBits) ||
3804 !getOperandBits(
RHS, RHSBits)) {
3806 return std::make_pair(0, 0);
3812 NumOpcodes +=
Op.first;
3813 LHSBits =
Op.second;
3818 NumOpcodes +=
Op.first;
3819 RHSBits =
Op.second;
3824 return std::make_pair(0, 0);
3828 switch (
MI->getOpcode()) {
3829 case TargetOpcode::G_AND:
3830 TTbl = LHSBits & RHSBits;
3832 case TargetOpcode::G_OR:
3833 TTbl = LHSBits | RHSBits;
3835 case TargetOpcode::G_XOR:
3836 TTbl = LHSBits ^ RHSBits;
3842 return std::make_pair(NumOpcodes + 1, TTbl);
3845bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
3851 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3857 unsigned NumOpcodes;
3859 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
3863 if (NumOpcodes < 2 || Src.empty())
3867 if (NumOpcodes == 2 && IsB32) {
3875 }
else if (NumOpcodes < 4) {
3882 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
3887 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3889 if (RB->
getID() != AMDGPU::SGPRRegBankID)
3895 Register NewReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3906 while (Src.size() < 3)
3907 Src.push_back(Src[0]);
3924 MI.eraseFromParent();
3929bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3942 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3952 MI.eraseFromParent();
3958 if (!
I.isPreISelOpcode()) {
3960 return selectCOPY(
I);
3964 switch (
I.getOpcode()) {
3965 case TargetOpcode::G_AND:
3966 case TargetOpcode::G_OR:
3967 case TargetOpcode::G_XOR:
3968 if (selectBITOP3(
I))
3972 return selectG_AND_OR_XOR(
I);
3973 case TargetOpcode::G_ADD:
3974 case TargetOpcode::G_SUB:
3975 case TargetOpcode::G_PTR_ADD:
3978 return selectG_ADD_SUB(
I);
3979 case TargetOpcode::G_UADDO:
3980 case TargetOpcode::G_USUBO:
3981 case TargetOpcode::G_UADDE:
3982 case TargetOpcode::G_USUBE:
3983 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3984 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3985 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3986 return selectG_AMDGPU_MAD_64_32(
I);
3987 case TargetOpcode::G_INTTOPTR:
3988 case TargetOpcode::G_BITCAST:
3989 case TargetOpcode::G_PTRTOINT:
3990 case TargetOpcode::G_FREEZE:
3991 return selectCOPY(
I);
3992 case TargetOpcode::G_FNEG:
3995 return selectG_FNEG(
I);
3996 case TargetOpcode::G_FABS:
3999 return selectG_FABS(
I);
4000 case TargetOpcode::G_EXTRACT:
4001 return selectG_EXTRACT(
I);
4002 case TargetOpcode::G_MERGE_VALUES:
4003 case TargetOpcode::G_CONCAT_VECTORS:
4004 return selectG_MERGE_VALUES(
I);
4005 case TargetOpcode::G_UNMERGE_VALUES:
4006 return selectG_UNMERGE_VALUES(
I);
4007 case TargetOpcode::G_BUILD_VECTOR:
4008 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4009 return selectG_BUILD_VECTOR(
I);
4010 case TargetOpcode::G_IMPLICIT_DEF:
4011 return selectG_IMPLICIT_DEF(
I);
4012 case TargetOpcode::G_INSERT:
4013 return selectG_INSERT(
I);
4014 case TargetOpcode::G_INTRINSIC:
4015 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4016 return selectG_INTRINSIC(
I);
4017 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4018 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4019 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4020 case TargetOpcode::G_ICMP:
4021 case TargetOpcode::G_FCMP:
4022 if (selectG_ICMP_or_FCMP(
I))
4025 case TargetOpcode::G_LOAD:
4026 case TargetOpcode::G_ZEXTLOAD:
4027 case TargetOpcode::G_SEXTLOAD:
4028 case TargetOpcode::G_STORE:
4029 case TargetOpcode::G_ATOMIC_CMPXCHG:
4030 case TargetOpcode::G_ATOMICRMW_XCHG:
4031 case TargetOpcode::G_ATOMICRMW_ADD:
4032 case TargetOpcode::G_ATOMICRMW_SUB:
4033 case TargetOpcode::G_ATOMICRMW_AND:
4034 case TargetOpcode::G_ATOMICRMW_OR:
4035 case TargetOpcode::G_ATOMICRMW_XOR:
4036 case TargetOpcode::G_ATOMICRMW_MIN:
4037 case TargetOpcode::G_ATOMICRMW_MAX:
4038 case TargetOpcode::G_ATOMICRMW_UMIN:
4039 case TargetOpcode::G_ATOMICRMW_UMAX:
4040 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4041 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4042 case TargetOpcode::G_ATOMICRMW_FADD:
4043 case TargetOpcode::G_ATOMICRMW_FMIN:
4044 case TargetOpcode::G_ATOMICRMW_FMAX:
4045 return selectG_LOAD_STORE_ATOMICRMW(
I);
4046 case TargetOpcode::G_SELECT:
4047 return selectG_SELECT(
I);
4048 case TargetOpcode::G_TRUNC:
4049 return selectG_TRUNC(
I);
4050 case TargetOpcode::G_SEXT:
4051 case TargetOpcode::G_ZEXT:
4052 case TargetOpcode::G_ANYEXT:
4053 case TargetOpcode::G_SEXT_INREG:
4060 return selectG_SZA_EXT(
I);
4061 case TargetOpcode::G_FPEXT:
4062 if (selectG_FPEXT(
I))
4065 case TargetOpcode::G_BRCOND:
4066 return selectG_BRCOND(
I);
4067 case TargetOpcode::G_GLOBAL_VALUE:
4068 return selectG_GLOBAL_VALUE(
I);
4069 case TargetOpcode::G_PTRMASK:
4070 return selectG_PTRMASK(
I);
4071 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4072 return selectG_EXTRACT_VECTOR_ELT(
I);
4073 case TargetOpcode::G_INSERT_VECTOR_ELT:
4074 return selectG_INSERT_VECTOR_ELT(
I);
4075 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4076 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4077 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4078 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4079 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4082 assert(
Intr &&
"not an image intrinsic with image pseudo");
4083 return selectImageIntrinsic(
I,
Intr);
4085 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
4086 return selectBVHIntrinsic(
I);
4087 case AMDGPU::G_SBFX:
4088 case AMDGPU::G_UBFX:
4089 return selectG_SBFX_UBFX(
I);
4090 case AMDGPU::G_SI_CALL:
4091 I.setDesc(TII.get(AMDGPU::SI_CALL));
4093 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4094 return selectWaveAddress(
I);
4095 case AMDGPU::G_STACKRESTORE:
4096 return selectStackRestore(
I);
4098 return selectPHI(
I);
4099 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4100 return selectCOPY_SCC_VCC(
I);
4101 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4102 return selectCOPY_VCC_SCC(
I);
4103 case AMDGPU::G_AMDGPU_READANYLANE:
4104 return selectReadAnyLane(
I);
4105 case TargetOpcode::G_CONSTANT:
4106 case TargetOpcode::G_FCONSTANT:
4114AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4121std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4122 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4126 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4127 Src =
MI->getOperand(1).getReg();
4130 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4135 if (LHS &&
LHS->isZero()) {
4137 Src =
MI->getOperand(2).getReg();
4141 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4142 Src =
MI->getOperand(1).getReg();
4149 return std::pair(Src, Mods);
4152Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4154 bool ForceVGPR)
const {
4155 if ((Mods != 0 || ForceVGPR) &&
4163 TII.get(AMDGPU::COPY), VGPRSrc)
4175AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4182AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4185 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4189 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4198AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4201 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4207 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4216AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4225AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4228 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4232 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4239AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4243 std::tie(Src, Mods) =
4244 selectVOP3ModsImpl(Root.
getReg(),
false);
4248 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4255AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4258 std::tie(Src, Mods) =
4259 selectVOP3ModsImpl(Root.
getReg(),
true,
4264 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4271AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4274 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4281std::pair<Register, unsigned>
4282AMDGPUInstructionSelector::selectVOP3PModsImpl(
4287 if (
MI->getOpcode() == AMDGPU::G_FNEG &&
4292 Src =
MI->getOperand(1).getReg();
4293 MI =
MRI.getVRegDef(Src);
4304 return std::pair(Src, Mods);
4308AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
4314 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
4323AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
4329 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
4338AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
4343 "expected i1 value");
4353AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
4356 "expected i1 value");
4370 switch (Elts.
size()) {
4372 DstRegClass = &AMDGPU::VReg_256RegClass;
4375 DstRegClass = &AMDGPU::VReg_128RegClass;
4378 DstRegClass = &AMDGPU::VReg_64RegClass;
4385 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
4386 .addDef(
MRI.createVirtualRegister(DstRegClass));
4387 for (
unsigned i = 0; i < Elts.
size(); ++i) {
4398 if (ModOpcode == TargetOpcode::G_FNEG) {
4402 for (
auto El : Elts) {
4408 if (Elts.size() != NegAbsElts.
size()) {
4417 assert(ModOpcode == TargetOpcode::G_FABS);
4425AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
4430 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
4431 assert(BV->getNumSources() > 0);
4434 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
4437 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
4438 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
4445 if (BV->getNumSources() == EltsF32.
size()) {
4456AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
4462 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4470 if (CV->getNumSources() == EltsV2F16.
size()) {
4482AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
4488 assert(CV->getNumSources() > 0);
4491 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
4495 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4496 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
4503 if (CV->getNumSources() == EltsV2F16.
size()) {
4515AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4516 std::optional<FPValueAndVReg> FPValReg;
4520 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4540AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4546 std::optional<ValueAndVReg> ShiftAmt;
4548 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4549 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4550 Key = ShiftAmt->Value.getZExtValue() / 8;
4561AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4568 std::optional<ValueAndVReg> ShiftAmt;
4570 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4571 ShiftAmt->Value.getZExtValue() == 16) {
4583AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4586 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4596AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4599 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4607 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4614AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4617 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4625 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4631bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4641 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4643 if (AddrInfo.
empty())
4646 const GEPInfo &GEPI = AddrInfo[0];
4647 std::optional<int64_t> EncodedImm;
4652 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4653 AddrInfo.
size() > 1) {
4654 const GEPInfo &GEPI2 = AddrInfo[1];
4655 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4658 Base = GEPI2.SgprParts[0];
4659 *SOffset = OffsetReg;
4669 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
4681 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4682 Base = GEPI.SgprParts[0];
4688 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4694 Base = GEPI.SgprParts[0];
4695 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4696 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4701 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4703 Base = GEPI.SgprParts[0];
4704 *SOffset = OffsetReg;
4713AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4716 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4717 return std::nullopt;
4724AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4726 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4728 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4729 return std::nullopt;
4731 const GEPInfo &GEPInfo = AddrInfo[0];
4732 Register PtrReg = GEPInfo.SgprParts[0];
4733 std::optional<int64_t> EncodedImm =
4736 return std::nullopt;
4745AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4747 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4748 return std::nullopt;
4755AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4758 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4759 return std::nullopt;
4766std::pair<Register, int>
4767AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4777 int64_t ConstOffset;
4778 std::tie(PtrBase, ConstOffset) =
4779 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4782 !isFlatScratchBaseLegal(Root.
getReg())))
4785 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
4789 return std::pair(PtrBase, ConstOffset);
4793AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4803AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4813AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4824AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4827 int64_t ConstOffset;
4828 int64_t ImmOffset = 0;
4832 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4834 if (ConstOffset != 0) {
4838 ImmOffset = ConstOffset;
4841 if (isSGPR(PtrBaseDef->Reg)) {
4842 if (ConstOffset > 0) {
4848 int64_t SplitImmOffset, RemainderOffset;
4852 if (isUInt<32>(RemainderOffset)) {
4856 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4858 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4860 .
addImm(RemainderOffset);
4877 unsigned NumLiterals =
4881 return std::nullopt;
4888 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4893 if (isSGPR(SAddr)) {
4894 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4914 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4915 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4916 return std::nullopt;
4922 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4924 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4935AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4938 int64_t ConstOffset;
4939 int64_t ImmOffset = 0;
4943 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4945 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4949 ImmOffset = ConstOffset;
4953 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4954 int FI = AddrDef->MI->getOperand(1).
getIndex();
4963 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4964 Register LHS = AddrDef->MI->getOperand(1).getReg();
4965 Register RHS = AddrDef->MI->getOperand(2).getReg();
4969 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4970 isSGPR(RHSDef->Reg)) {
4971 int FI = LHSDef->MI->getOperand(1).getIndex();
4975 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4977 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4985 return std::nullopt;
4994bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
5005 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
5007 return (VMax & 3) + (
SMax & 3) >= 4;
5011AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
5014 int64_t ConstOffset;
5015 int64_t ImmOffset = 0;
5019 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
5022 if (ConstOffset != 0 &&
5025 ImmOffset = ConstOffset;
5029 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
5030 return std::nullopt;
5032 Register RHS = AddrDef->MI->getOperand(2).getReg();
5034 return std::nullopt;
5036 Register LHS = AddrDef->MI->getOperand(1).getReg();
5039 if (OrigAddr !=
Addr) {
5040 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
5041 return std::nullopt;
5043 if (!isFlatScratchBaseLegalSV(OrigAddr))
5044 return std::nullopt;
5047 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
5048 return std::nullopt;
5050 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5051 int FI = LHSDef->MI->getOperand(1).getIndex();
5060 return std::nullopt;
5070AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
5079 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5084 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5108 std::optional<int> FI;
5113 int64_t ConstOffset;
5114 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
5115 if (ConstOffset != 0) {
5120 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
5126 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
5149bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
5162bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
5164 unsigned Size)
const {
5165 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
5167 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
5180 return Addr->getOpcode() == TargetOpcode::G_OR ||
5181 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
5188bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
5202 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
5203 std::optional<ValueAndVReg> RhsValReg =
5209 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
5210 RhsValReg->Value.getSExtValue() > -0x40000000)
5219bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
5237bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
5246 std::optional<DefinitionAndSourceRegister> BaseDef =
5248 std::optional<ValueAndVReg> RHSOffset =
5258 (RHSOffset->Value.getSExtValue() < 0 &&
5259 RHSOffset->Value.getSExtValue() > -0x40000000)))
5262 Register LHS = BaseDef->MI->getOperand(1).getReg();
5263 Register RHS = BaseDef->MI->getOperand(2).getReg();
5267bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
5268 unsigned ShAmtBits)
const {
5269 assert(
MI.getOpcode() == TargetOpcode::G_AND);
5271 std::optional<APInt>
RHS =
5276 if (
RHS->countr_one() >= ShAmtBits)
5280 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
5284AMDGPUInstructionSelector::selectMUBUFScratchOffset(
5289 std::optional<DefinitionAndSourceRegister>
Def =
5291 assert(Def &&
"this shouldn't be an optional result");
5346std::pair<Register, unsigned>
5347AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
5349 int64_t ConstAddr = 0;
5353 std::tie(PtrBase,
Offset) =
5354 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5357 if (isDSOffsetLegal(PtrBase,
Offset)) {
5359 return std::pair(PtrBase,
Offset);
5361 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
5370 return std::pair(Root.
getReg(), 0);
5374AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
5377 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
5385AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
5386 return selectDSReadWrite2(Root, 4);
5390AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
5391 return selectDSReadWrite2(Root, 8);
5395AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
5396 unsigned Size)
const {
5399 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
5407std::pair<Register, unsigned>
5408AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
5409 unsigned Size)
const {
5411 int64_t ConstAddr = 0;
5415 std::tie(PtrBase,
Offset) =
5416 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5419 int64_t OffsetValue0 =
Offset;
5421 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
5423 return std::pair(PtrBase, OffsetValue0 /
Size);
5425 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
5433 return std::pair(Root.
getReg(), 0);
5440std::pair<Register, int64_t>
5441AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
5444 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
5448 std::optional<ValueAndVReg> MaybeOffset =
5464 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5465 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5466 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5467 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5469 B.buildInstr(AMDGPU::S_MOV_B32)
5472 B.buildInstr(AMDGPU::S_MOV_B32)
5479 B.buildInstr(AMDGPU::REG_SEQUENCE)
5482 .addImm(AMDGPU::sub0)
5484 .addImm(AMDGPU::sub1);
5488 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5489 B.buildInstr(AMDGPU::S_MOV_B64)
5494 B.buildInstr(AMDGPU::REG_SEQUENCE)
5497 .addImm(AMDGPU::sub0_sub1)
5499 .addImm(AMDGPU::sub2_sub3);
5506 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5515 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5522AMDGPUInstructionSelector::MUBUFAddressData
5523AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5524 MUBUFAddressData
Data;
5530 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5531 if (isUInt<32>(
Offset)) {
5538 Data.N2 = InputAdd->getOperand(1).getReg();
5539 Data.N3 = InputAdd->getOperand(2).getReg();
5554bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5561 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5567void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5573 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5574 B.buildInstr(AMDGPU::S_MOV_B32)
5580bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5588 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5589 if (!shouldUseAddr64(AddrData))
5595 Offset = AddrData.Offset;
5601 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5603 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5616 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5627 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5631bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5639 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5640 if (shouldUseAddr64(AddrData))
5646 Offset = AddrData.Offset;
5652 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5657AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5663 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5679 MIB.
addReg(AMDGPU::SGPR_NULL);
5693AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5698 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5709 MIB.
addReg(AMDGPU::SGPR_NULL);
5721AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5726 SOffset = AMDGPU::SGPR_NULL;
5732static std::optional<uint64_t>
5736 if (!OffsetVal || !isInt<32>(*OffsetVal))
5737 return std::nullopt;
5738 return Lo_32(*OffsetVal);
5742AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5743 std::optional<uint64_t> OffsetVal =
5748 std::optional<int64_t> EncodedImm =
5757AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5764 std::optional<int64_t> EncodedImm =
5773AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5781 return std::nullopt;
5783 std::optional<int64_t> EncodedOffset =
5786 return std::nullopt;
5793std::pair<Register, unsigned>
5794AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5795 bool &Matched)
const {
5800 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5810 const auto CheckAbsNeg = [&]() {
5815 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
5846AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5851 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5862AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5866 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5874bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5878 Register CCReg =
I.getOperand(0).getReg();
5880 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5881 .
addImm(
I.getOperand(2).getImm());
5885 I.eraseFromParent();
5890bool AMDGPUInstructionSelector::selectSGetBarrierState(
5895 std::optional<int64_t> BarValImm =
5899 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5904 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
5905 : AMDGPU::S_GET_BARRIER_STATE_M0;
5908 auto DstReg =
I.getOperand(0).getReg();
5910 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
5917 I.eraseFromParent();
5922 if (HasInlineConst) {
5926 case Intrinsic::amdgcn_s_barrier_join:
5927 return AMDGPU::S_BARRIER_JOIN_IMM;
5928 case Intrinsic::amdgcn_s_get_named_barrier_state:
5929 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5935 case Intrinsic::amdgcn_s_barrier_join:
5936 return AMDGPU::S_BARRIER_JOIN_M0;
5937 case Intrinsic::amdgcn_s_get_named_barrier_state:
5938 return AMDGPU::S_GET_BARRIER_STATE_M0;
5943bool AMDGPUInstructionSelector::selectNamedBarrierInit(
5951 Register TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5957 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5964 Register TmpReg2 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5970 Register TmpReg3 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5971 constexpr unsigned ShAmt = 16;
5977 Register TmpReg4 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5987 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
5988 ? AMDGPU::S_BARRIER_INIT_M0
5989 : AMDGPU::S_BARRIER_SIGNAL_M0;
5993 I.eraseFromParent();
5997bool AMDGPUInstructionSelector::selectNamedBarrierInst(
6001 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
6004 std::optional<int64_t> BarValImm =
6009 Register TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6015 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6021 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6030 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
6031 auto DstReg =
I.getOperand(0).getReg();
6033 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6040 auto BarId = ((*BarValImm) >> 4) & 0x3F;
6044 I.eraseFromParent();
6051 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6052 "Expected G_CONSTANT");
6053 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
6059 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6060 "Expected G_CONSTANT");
6061 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
6068 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);
6069 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
6075 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6076 "Expected G_CONSTANT");
6077 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6096 MIB.
addImm(
MI.getOperand(OpIdx).getImm() != 0);
6102 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6106void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
6108 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6113void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
6115 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6116 MIB.
addImm((
MI.getOperand(OpIdx).getImm() & 0x2)
6121void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
6123 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6128void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
6130 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6131 MIB.
addImm((
MI.getOperand(OpIdx).getImm() & 0x1)
6136void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
6138 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6143void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
6145 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6150void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
6152 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6157void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
6159 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6167 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6168 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
6176 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6177 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
6183void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
6185 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6186 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
6201 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
6203 assert(ExpVal != INT_MIN);
6214 MIB.
addImm((
MI.getOperand(OpIdx).getImm() + 3) % 4);
6218void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
6220 unsigned Val =
MI.getOperand(OpIdx).getImm();
6229bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
6233bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
MCRegister getExec() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.