LLVM 20.0.0git
RISCVVLOptimizer.cpp
Go to the documentation of this file.
1//===-------------- RISCVVLOptimizer.cpp - VL Optimizer -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass reduces the VL where possible at the MI level, before VSETVLI
10// instructions are inserted.
11//
12// The purpose of this optimization is to make the VL argument, for instructions
13// that have a VL argument, as small as possible. This is implemented by
14// visiting each instruction in reverse order and checking that if it has a VL
15// argument, whether the VL can be reduced.
16//
17//===---------------------------------------------------------------------===//
18
19#include "RISCV.h"
20#include "RISCVSubtarget.h"
21#include "llvm/ADT/SetVector.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-vl-optimizer"
29#define PASS_NAME "RISC-V VL Optimizer"
30
31namespace {
32
33class RISCVVLOptimizer : public MachineFunctionPass {
35 const MachineDominatorTree *MDT;
36
37public:
38 static char ID;
39
40 RISCVVLOptimizer() : MachineFunctionPass(ID) {}
41
42 bool runOnMachineFunction(MachineFunction &MF) override;
43
44 void getAnalysisUsage(AnalysisUsage &AU) const override {
45 AU.setPreservesCFG();
48 }
49
50 StringRef getPassName() const override { return PASS_NAME; }
51
52private:
53 std::optional<MachineOperand> getMinimumVLForUser(MachineOperand &UserOp);
54 /// Returns the largest common VL MachineOperand that may be used to optimize
55 /// MI. Returns std::nullopt if it failed to find a suitable VL.
56 std::optional<MachineOperand> checkUsers(MachineInstr &MI);
57 bool tryReduceVL(MachineInstr &MI);
58 bool isCandidate(const MachineInstr &MI) const;
59};
60
61} // end anonymous namespace
62
63char RISCVVLOptimizer::ID = 0;
64INITIALIZE_PASS_BEGIN(RISCVVLOptimizer, DEBUG_TYPE, PASS_NAME, false, false)
67
69 return new RISCVVLOptimizer();
70}
71
72/// Return true if R is a physical or virtual vector register, false otherwise.
74 if (R.isPhysical())
75 return RISCV::VRRegClass.contains(R);
76 const TargetRegisterClass *RC = MRI->getRegClass(R);
77 return RISCVRI::isVRegClass(RC->TSFlags);
78}
79
80/// Represents the EMUL and EEW of a MachineOperand.
82 // Represent as 1,2,4,8, ... and fractional indicator. This is because
83 // EMUL can take on values that don't map to RISCVII::VLMUL values exactly.
84 // For example, a mask operand can have an EMUL less than MF8.
85 std::optional<std::pair<unsigned, bool>> EMUL;
86
87 unsigned Log2EEW;
88
90 : EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) {}
91
92 OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)
93 : EMUL(EMUL), Log2EEW(Log2EEW) {}
94
96
97 OperandInfo() = delete;
98
99 static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
100 return A.Log2EEW == B.Log2EEW && A.EMUL->first == B.EMUL->first &&
101 A.EMUL->second == B.EMUL->second;
102 }
103
104 static bool EEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
105 return A.Log2EEW == B.Log2EEW;
106 }
107
108 void print(raw_ostream &OS) const {
109 if (EMUL) {
110 OS << "EMUL: m";
111 if (EMUL->second)
112 OS << "f";
113 OS << EMUL->first;
114 } else
115 OS << "EMUL: unknown\n";
116 OS << ", EEW: " << (1 << Log2EEW);
117 }
118};
119
122 OI.print(OS);
123 return OS;
124}
125
128 const std::optional<OperandInfo> &OI) {
129 if (OI)
130 OI->print(OS);
131 else
132 OS << "nullopt";
133 return OS;
134}
135
136namespace llvm {
137namespace RISCVVType {
138/// Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and
139/// SEW are from the TSFlags of MI.
140static std::pair<unsigned, bool>
142 RISCVII::VLMUL MIVLMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
143 auto [MILMUL, MILMULIsFractional] = RISCVVType::decodeVLMUL(MIVLMUL);
144 unsigned MILog2SEW =
145 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
146
147 // Mask instructions will have 0 as the SEW operand. But the LMUL of these
148 // instructions is calculated is as if the SEW operand was 3 (e8).
149 if (MILog2SEW == 0)
150 MILog2SEW = 3;
151
152 unsigned MISEW = 1 << MILog2SEW;
153
154 unsigned EEW = 1 << Log2EEW;
155 // Calculate (EEW/SEW)*LMUL preserving fractions less than 1. Use GCD
156 // to put fraction in simplest form.
157 unsigned Num = EEW, Denom = MISEW;
158 int GCD = MILMULIsFractional ? std::gcd(Num, Denom * MILMUL)
159 : std::gcd(Num * MILMUL, Denom);
160 Num = MILMULIsFractional ? Num / GCD : Num * MILMUL / GCD;
161 Denom = MILMULIsFractional ? Denom * MILMUL / GCD : Denom / GCD;
162 return std::make_pair(Num > Denom ? Num : Denom, Denom > Num);
163}
164} // end namespace RISCVVType
165} // end namespace llvm
166
167/// Dest has EEW=SEW. Source EEW=SEW/Factor (i.e. F2 => EEW/2).
168/// SEW comes from TSFlags of MI.
169static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
170 const MachineInstr &MI,
171 const MachineOperand &MO) {
172 unsigned MILog2SEW =
173 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
174
175 if (MO.getOperandNo() == 0)
176 return MILog2SEW;
177
178 unsigned MISEW = 1 << MILog2SEW;
179 unsigned EEW = MISEW / Factor;
180 unsigned Log2EEW = Log2_32(EEW);
181
182 return Log2EEW;
183}
184
185/// Check whether MO is a mask operand of MI.
186static bool isMaskOperand(const MachineInstr &MI, const MachineOperand &MO,
187 const MachineRegisterInfo *MRI) {
188
189 if (!MO.isReg() || !isVectorRegClass(MO.getReg(), MRI))
190 return false;
191
192 const MCInstrDesc &Desc = MI.getDesc();
193 return Desc.operands()[MO.getOperandNo()].RegClass == RISCV::VMV0RegClassID;
194}
195
196static std::optional<unsigned>
198 const MachineInstr &MI = *MO.getParent();
200 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
201 assert(RVV && "Could not find MI in PseudoTable");
202
203 // MI has a SEW associated with it. The RVV specification defines
204 // the EEW of each operand and definition in relation to MI.SEW.
205 unsigned MILog2SEW =
206 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
207
208 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());
209 const bool IsTied = RISCVII::isTiedPseudo(MI.getDesc().TSFlags);
210
211 bool IsMODef = MO.getOperandNo() == 0;
212
213 // All mask operands have EEW=1
214 if (isMaskOperand(MI, MO, MRI))
215 return 0;
216
217 // switch against BaseInstr to reduce number of cases that need to be
218 // considered.
219 switch (RVV->BaseInstr) {
220
221 // 6. Configuration-Setting Instructions
222 // Configuration setting instructions do not read or write vector registers
223 case RISCV::VSETIVLI:
224 case RISCV::VSETVL:
225 case RISCV::VSETVLI:
226 llvm_unreachable("Configuration setting instructions do not read or write "
227 "vector registers");
228
229 // Vector Loads and Stores
230 // Vector Unit-Stride Instructions
231 // Vector Strided Instructions
232 /// Dest EEW encoded in the instruction
233 case RISCV::VLM_V:
234 case RISCV::VSM_V:
235 return 0;
236 case RISCV::VLE8_V:
237 case RISCV::VSE8_V:
238 case RISCV::VLSE8_V:
239 case RISCV::VSSE8_V:
240 return 3;
241 case RISCV::VLE16_V:
242 case RISCV::VSE16_V:
243 case RISCV::VLSE16_V:
244 case RISCV::VSSE16_V:
245 return 4;
246 case RISCV::VLE32_V:
247 case RISCV::VSE32_V:
248 case RISCV::VLSE32_V:
249 case RISCV::VSSE32_V:
250 return 5;
251 case RISCV::VLE64_V:
252 case RISCV::VSE64_V:
253 case RISCV::VLSE64_V:
254 case RISCV::VSSE64_V:
255 return 6;
256
257 // Vector Indexed Instructions
258 // vs(o|u)xei<eew>.v
259 // Dest/Data (operand 0) EEW=SEW. Source EEW=<eew>.
260 case RISCV::VLUXEI8_V:
261 case RISCV::VLOXEI8_V:
262 case RISCV::VSUXEI8_V:
263 case RISCV::VSOXEI8_V: {
264 if (MO.getOperandNo() == 0)
265 return MILog2SEW;
266 return 3;
267 }
268 case RISCV::VLUXEI16_V:
269 case RISCV::VLOXEI16_V:
270 case RISCV::VSUXEI16_V:
271 case RISCV::VSOXEI16_V: {
272 if (MO.getOperandNo() == 0)
273 return MILog2SEW;
274 return 4;
275 }
276 case RISCV::VLUXEI32_V:
277 case RISCV::VLOXEI32_V:
278 case RISCV::VSUXEI32_V:
279 case RISCV::VSOXEI32_V: {
280 if (MO.getOperandNo() == 0)
281 return MILog2SEW;
282 return 5;
283 }
284 case RISCV::VLUXEI64_V:
285 case RISCV::VLOXEI64_V:
286 case RISCV::VSUXEI64_V:
287 case RISCV::VSOXEI64_V: {
288 if (MO.getOperandNo() == 0)
289 return MILog2SEW;
290 return 6;
291 }
292
293 // Vector Integer Arithmetic Instructions
294 // Vector Single-Width Integer Add and Subtract
295 case RISCV::VADD_VI:
296 case RISCV::VADD_VV:
297 case RISCV::VADD_VX:
298 case RISCV::VSUB_VV:
299 case RISCV::VSUB_VX:
300 case RISCV::VRSUB_VI:
301 case RISCV::VRSUB_VX:
302 // Vector Bitwise Logical Instructions
303 // Vector Single-Width Shift Instructions
304 // EEW=SEW.
305 case RISCV::VAND_VI:
306 case RISCV::VAND_VV:
307 case RISCV::VAND_VX:
308 case RISCV::VOR_VI:
309 case RISCV::VOR_VV:
310 case RISCV::VOR_VX:
311 case RISCV::VXOR_VI:
312 case RISCV::VXOR_VV:
313 case RISCV::VXOR_VX:
314 case RISCV::VSLL_VI:
315 case RISCV::VSLL_VV:
316 case RISCV::VSLL_VX:
317 case RISCV::VSRL_VI:
318 case RISCV::VSRL_VV:
319 case RISCV::VSRL_VX:
320 case RISCV::VSRA_VI:
321 case RISCV::VSRA_VV:
322 case RISCV::VSRA_VX:
323 // Vector Integer Min/Max Instructions
324 // EEW=SEW.
325 case RISCV::VMINU_VV:
326 case RISCV::VMINU_VX:
327 case RISCV::VMIN_VV:
328 case RISCV::VMIN_VX:
329 case RISCV::VMAXU_VV:
330 case RISCV::VMAXU_VX:
331 case RISCV::VMAX_VV:
332 case RISCV::VMAX_VX:
333 // Vector Single-Width Integer Multiply Instructions
334 // Source and Dest EEW=SEW.
335 case RISCV::VMUL_VV:
336 case RISCV::VMUL_VX:
337 case RISCV::VMULH_VV:
338 case RISCV::VMULH_VX:
339 case RISCV::VMULHU_VV:
340 case RISCV::VMULHU_VX:
341 case RISCV::VMULHSU_VV:
342 case RISCV::VMULHSU_VX:
343 // Vector Integer Divide Instructions
344 // EEW=SEW.
345 case RISCV::VDIVU_VV:
346 case RISCV::VDIVU_VX:
347 case RISCV::VDIV_VV:
348 case RISCV::VDIV_VX:
349 case RISCV::VREMU_VV:
350 case RISCV::VREMU_VX:
351 case RISCV::VREM_VV:
352 case RISCV::VREM_VX:
353 // Vector Single-Width Integer Multiply-Add Instructions
354 // EEW=SEW.
355 case RISCV::VMACC_VV:
356 case RISCV::VMACC_VX:
357 case RISCV::VNMSAC_VV:
358 case RISCV::VNMSAC_VX:
359 case RISCV::VMADD_VV:
360 case RISCV::VMADD_VX:
361 case RISCV::VNMSUB_VV:
362 case RISCV::VNMSUB_VX:
363 // Vector Integer Merge Instructions
364 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
365 // EEW=SEW, except the mask operand has EEW=1. Mask operand is handled
366 // before this switch.
367 case RISCV::VMERGE_VIM:
368 case RISCV::VMERGE_VVM:
369 case RISCV::VMERGE_VXM:
370 case RISCV::VADC_VIM:
371 case RISCV::VADC_VVM:
372 case RISCV::VADC_VXM:
373 case RISCV::VSBC_VVM:
374 case RISCV::VSBC_VXM:
375 // Vector Integer Move Instructions
376 // Vector Fixed-Point Arithmetic Instructions
377 // Vector Single-Width Saturating Add and Subtract
378 // Vector Single-Width Averaging Add and Subtract
379 // EEW=SEW.
380 case RISCV::VMV_V_I:
381 case RISCV::VMV_V_V:
382 case RISCV::VMV_V_X:
383 case RISCV::VSADDU_VI:
384 case RISCV::VSADDU_VV:
385 case RISCV::VSADDU_VX:
386 case RISCV::VSADD_VI:
387 case RISCV::VSADD_VV:
388 case RISCV::VSADD_VX:
389 case RISCV::VSSUBU_VV:
390 case RISCV::VSSUBU_VX:
391 case RISCV::VSSUB_VV:
392 case RISCV::VSSUB_VX:
393 case RISCV::VAADDU_VV:
394 case RISCV::VAADDU_VX:
395 case RISCV::VAADD_VV:
396 case RISCV::VAADD_VX:
397 case RISCV::VASUBU_VV:
398 case RISCV::VASUBU_VX:
399 case RISCV::VASUB_VV:
400 case RISCV::VASUB_VX:
401 // Vector Single-Width Fractional Multiply with Rounding and Saturation
402 // EEW=SEW. The instruction produces 2*SEW product internally but
403 // saturates to fit into SEW bits.
404 case RISCV::VSMUL_VV:
405 case RISCV::VSMUL_VX:
406 // Vector Single-Width Scaling Shift Instructions
407 // EEW=SEW.
408 case RISCV::VSSRL_VI:
409 case RISCV::VSSRL_VV:
410 case RISCV::VSSRL_VX:
411 case RISCV::VSSRA_VI:
412 case RISCV::VSSRA_VV:
413 case RISCV::VSSRA_VX:
414 // Vector Permutation Instructions
415 // Integer Scalar Move Instructions
416 // Floating-Point Scalar Move Instructions
417 // EEW=SEW.
418 case RISCV::VMV_X_S:
419 case RISCV::VMV_S_X:
420 case RISCV::VFMV_F_S:
421 case RISCV::VFMV_S_F:
422 // Vector Slide Instructions
423 // EEW=SEW.
424 case RISCV::VSLIDEUP_VI:
425 case RISCV::VSLIDEUP_VX:
426 case RISCV::VSLIDEDOWN_VI:
427 case RISCV::VSLIDEDOWN_VX:
428 case RISCV::VSLIDE1UP_VX:
429 case RISCV::VFSLIDE1UP_VF:
430 case RISCV::VSLIDE1DOWN_VX:
431 case RISCV::VFSLIDE1DOWN_VF:
432 // Vector Register Gather Instructions
433 // EEW=SEW. For mask operand, EEW=1.
434 case RISCV::VRGATHER_VI:
435 case RISCV::VRGATHER_VV:
436 case RISCV::VRGATHER_VX:
437 // Vector Compress Instruction
438 // EEW=SEW.
439 case RISCV::VCOMPRESS_VM:
440 // Vector Element Index Instruction
441 case RISCV::VID_V:
442 // Vector Single-Width Floating-Point Add/Subtract Instructions
443 case RISCV::VFADD_VF:
444 case RISCV::VFADD_VV:
445 case RISCV::VFSUB_VF:
446 case RISCV::VFSUB_VV:
447 case RISCV::VFRSUB_VF:
448 // Vector Single-Width Floating-Point Multiply/Divide Instructions
449 case RISCV::VFMUL_VF:
450 case RISCV::VFMUL_VV:
451 case RISCV::VFDIV_VF:
452 case RISCV::VFDIV_VV:
453 case RISCV::VFRDIV_VF:
454 // Vector Floating-Point Square-Root Instruction
455 case RISCV::VFSQRT_V:
456 // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
457 case RISCV::VFRSQRT7_V:
458 // Vector Floating-Point Reciprocal Estimate Instruction
459 case RISCV::VFREC7_V:
460 // Vector Floating-Point MIN/MAX Instructions
461 case RISCV::VFMIN_VF:
462 case RISCV::VFMIN_VV:
463 case RISCV::VFMAX_VF:
464 case RISCV::VFMAX_VV:
465 // Vector Floating-Point Sign-Injection Instructions
466 case RISCV::VFSGNJ_VF:
467 case RISCV::VFSGNJ_VV:
468 case RISCV::VFSGNJN_VV:
469 case RISCV::VFSGNJN_VF:
470 case RISCV::VFSGNJX_VF:
471 case RISCV::VFSGNJX_VV:
472 // Vector Floating-Point Classify Instruction
473 case RISCV::VFCLASS_V:
474 // Vector Floating-Point Move Instruction
475 case RISCV::VFMV_V_F:
476 // Single-Width Floating-Point/Integer Type-Convert Instructions
477 case RISCV::VFCVT_XU_F_V:
478 case RISCV::VFCVT_X_F_V:
479 case RISCV::VFCVT_RTZ_XU_F_V:
480 case RISCV::VFCVT_RTZ_X_F_V:
481 case RISCV::VFCVT_F_XU_V:
482 case RISCV::VFCVT_F_X_V:
483 // Vector Floating-Point Merge Instruction
484 case RISCV::VFMERGE_VFM:
485 // Vector count population in mask vcpop.m
486 // vfirst find-first-set mask bit
487 case RISCV::VCPOP_M:
488 case RISCV::VFIRST_M:
489 return MILog2SEW;
490
491 // Vector Widening Integer Add/Subtract
492 // Def uses EEW=2*SEW . Operands use EEW=SEW.
493 case RISCV::VWADDU_VV:
494 case RISCV::VWADDU_VX:
495 case RISCV::VWSUBU_VV:
496 case RISCV::VWSUBU_VX:
497 case RISCV::VWADD_VV:
498 case RISCV::VWADD_VX:
499 case RISCV::VWSUB_VV:
500 case RISCV::VWSUB_VX:
501 case RISCV::VWSLL_VI:
502 // Vector Widening Integer Multiply Instructions
503 // Destination EEW=2*SEW. Source EEW=SEW.
504 case RISCV::VWMUL_VV:
505 case RISCV::VWMUL_VX:
506 case RISCV::VWMULSU_VV:
507 case RISCV::VWMULSU_VX:
508 case RISCV::VWMULU_VV:
509 case RISCV::VWMULU_VX:
510 // Vector Widening Integer Multiply-Add Instructions
511 // Destination EEW=2*SEW. Source EEW=SEW.
512 // A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which
513 // is then added to the 2*SEW-bit Dest. These instructions never have a
514 // passthru operand.
515 case RISCV::VWMACCU_VV:
516 case RISCV::VWMACCU_VX:
517 case RISCV::VWMACC_VV:
518 case RISCV::VWMACC_VX:
519 case RISCV::VWMACCSU_VV:
520 case RISCV::VWMACCSU_VX:
521 case RISCV::VWMACCUS_VX:
522 // Vector Widening Floating-Point Fused Multiply-Add Instructions
523 case RISCV::VFWMACC_VF:
524 case RISCV::VFWMACC_VV:
525 case RISCV::VFWNMACC_VF:
526 case RISCV::VFWNMACC_VV:
527 case RISCV::VFWMSAC_VF:
528 case RISCV::VFWMSAC_VV:
529 case RISCV::VFWNMSAC_VF:
530 case RISCV::VFWNMSAC_VV:
531 // Vector Widening Floating-Point Add/Subtract Instructions
532 // Dest EEW=2*SEW. Source EEW=SEW.
533 case RISCV::VFWADD_VV:
534 case RISCV::VFWADD_VF:
535 case RISCV::VFWSUB_VV:
536 case RISCV::VFWSUB_VF:
537 // Vector Widening Floating-Point Multiply
538 case RISCV::VFWMUL_VF:
539 case RISCV::VFWMUL_VV:
540 // Widening Floating-Point/Integer Type-Convert Instructions
541 case RISCV::VFWCVT_XU_F_V:
542 case RISCV::VFWCVT_X_F_V:
543 case RISCV::VFWCVT_RTZ_XU_F_V:
544 case RISCV::VFWCVT_RTZ_X_F_V:
545 case RISCV::VFWCVT_F_XU_V:
546 case RISCV::VFWCVT_F_X_V:
547 case RISCV::VFWCVT_F_F_V:
548 case RISCV::VFWCVTBF16_F_F_V:
549 return IsMODef ? MILog2SEW + 1 : MILog2SEW;
550
551 // Def and Op1 uses EEW=2*SEW. Op2 uses EEW=SEW.
552 case RISCV::VWADDU_WV:
553 case RISCV::VWADDU_WX:
554 case RISCV::VWSUBU_WV:
555 case RISCV::VWSUBU_WX:
556 case RISCV::VWADD_WV:
557 case RISCV::VWADD_WX:
558 case RISCV::VWSUB_WV:
559 case RISCV::VWSUB_WX:
560 // Vector Widening Floating-Point Add/Subtract Instructions
561 case RISCV::VFWADD_WF:
562 case RISCV::VFWADD_WV:
563 case RISCV::VFWSUB_WF:
564 case RISCV::VFWSUB_WV: {
565 bool IsOp1 = (HasPassthru && !IsTied) ? MO.getOperandNo() == 2
566 : MO.getOperandNo() == 1;
567 bool TwoTimes = IsMODef || IsOp1;
568 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
569 }
570
571 // Vector Integer Extension
572 case RISCV::VZEXT_VF2:
573 case RISCV::VSEXT_VF2:
574 return getIntegerExtensionOperandEEW(2, MI, MO);
575 case RISCV::VZEXT_VF4:
576 case RISCV::VSEXT_VF4:
577 return getIntegerExtensionOperandEEW(4, MI, MO);
578 case RISCV::VZEXT_VF8:
579 case RISCV::VSEXT_VF8:
580 return getIntegerExtensionOperandEEW(8, MI, MO);
581
582 // Vector Narrowing Integer Right Shift Instructions
583 // Destination EEW=SEW, Op 1 has EEW=2*SEW. Op2 has EEW=SEW
584 case RISCV::VNSRL_WX:
585 case RISCV::VNSRL_WI:
586 case RISCV::VNSRL_WV:
587 case RISCV::VNSRA_WI:
588 case RISCV::VNSRA_WV:
589 case RISCV::VNSRA_WX:
590 // Vector Narrowing Fixed-Point Clip Instructions
591 // Destination and Op1 EEW=SEW. Op2 EEW=2*SEW.
592 case RISCV::VNCLIPU_WI:
593 case RISCV::VNCLIPU_WV:
594 case RISCV::VNCLIPU_WX:
595 case RISCV::VNCLIP_WI:
596 case RISCV::VNCLIP_WV:
597 case RISCV::VNCLIP_WX:
598 // Narrowing Floating-Point/Integer Type-Convert Instructions
599 case RISCV::VFNCVT_XU_F_W:
600 case RISCV::VFNCVT_X_F_W:
601 case RISCV::VFNCVT_RTZ_XU_F_W:
602 case RISCV::VFNCVT_RTZ_X_F_W:
603 case RISCV::VFNCVT_F_XU_W:
604 case RISCV::VFNCVT_F_X_W:
605 case RISCV::VFNCVT_F_F_W:
606 case RISCV::VFNCVT_ROD_F_F_W:
607 case RISCV::VFNCVTBF16_F_F_W: {
608 assert(!IsTied);
609 bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
610 bool TwoTimes = IsOp1;
611 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
612 }
613
614 // Vector Mask Instructions
615 // Vector Mask-Register Logical Instructions
616 // vmsbf.m set-before-first mask bit
617 // vmsif.m set-including-first mask bit
618 // vmsof.m set-only-first mask bit
619 // EEW=1
620 // We handle the cases when operand is a v0 mask operand above the switch,
621 // but these instructions may use non-v0 mask operands and need to be handled
622 // specifically.
623 case RISCV::VMAND_MM:
624 case RISCV::VMNAND_MM:
625 case RISCV::VMANDN_MM:
626 case RISCV::VMXOR_MM:
627 case RISCV::VMOR_MM:
628 case RISCV::VMNOR_MM:
629 case RISCV::VMORN_MM:
630 case RISCV::VMXNOR_MM:
631 case RISCV::VMSBF_M:
632 case RISCV::VMSIF_M:
633 case RISCV::VMSOF_M: {
634 return MILog2SEW;
635 }
636
637 // Vector Iota Instruction
638 // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled
639 // before this switch.
640 case RISCV::VIOTA_M: {
641 if (IsMODef || MO.getOperandNo() == 1)
642 return MILog2SEW;
643 return 0;
644 }
645
646 // Vector Integer Compare Instructions
647 // Dest EEW=1. Source EEW=SEW.
648 case RISCV::VMSEQ_VI:
649 case RISCV::VMSEQ_VV:
650 case RISCV::VMSEQ_VX:
651 case RISCV::VMSNE_VI:
652 case RISCV::VMSNE_VV:
653 case RISCV::VMSNE_VX:
654 case RISCV::VMSLTU_VV:
655 case RISCV::VMSLTU_VX:
656 case RISCV::VMSLT_VV:
657 case RISCV::VMSLT_VX:
658 case RISCV::VMSLEU_VV:
659 case RISCV::VMSLEU_VI:
660 case RISCV::VMSLEU_VX:
661 case RISCV::VMSLE_VV:
662 case RISCV::VMSLE_VI:
663 case RISCV::VMSLE_VX:
664 case RISCV::VMSGTU_VI:
665 case RISCV::VMSGTU_VX:
666 case RISCV::VMSGT_VI:
667 case RISCV::VMSGT_VX:
668 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
669 // Dest EEW=1. Source EEW=SEW. Mask source operand handled above this switch.
670 case RISCV::VMADC_VIM:
671 case RISCV::VMADC_VVM:
672 case RISCV::VMADC_VXM:
673 case RISCV::VMSBC_VVM:
674 case RISCV::VMSBC_VXM:
675 // Dest EEW=1. Source EEW=SEW.
676 case RISCV::VMADC_VV:
677 case RISCV::VMADC_VI:
678 case RISCV::VMADC_VX:
679 case RISCV::VMSBC_VV:
680 case RISCV::VMSBC_VX:
681 // 13.13. Vector Floating-Point Compare Instructions
682 // Dest EEW=1. Source EEW=SEW
683 case RISCV::VMFEQ_VF:
684 case RISCV::VMFEQ_VV:
685 case RISCV::VMFNE_VF:
686 case RISCV::VMFNE_VV:
687 case RISCV::VMFLT_VF:
688 case RISCV::VMFLT_VV:
689 case RISCV::VMFLE_VF:
690 case RISCV::VMFLE_VV:
691 case RISCV::VMFGT_VF:
692 case RISCV::VMFGE_VF: {
693 if (IsMODef)
694 return 0;
695 return MILog2SEW;
696 }
697
698 // Vector Reduction Operations
699 // Vector Single-Width Integer Reduction Instructions
700 case RISCV::VREDAND_VS:
701 case RISCV::VREDMAX_VS:
702 case RISCV::VREDMAXU_VS:
703 case RISCV::VREDMIN_VS:
704 case RISCV::VREDMINU_VS:
705 case RISCV::VREDOR_VS:
706 case RISCV::VREDSUM_VS:
707 case RISCV::VREDXOR_VS:
708 // Vector Single-Width Floating-Point Reduction Instructions
709 case RISCV::VFREDMAX_VS:
710 case RISCV::VFREDMIN_VS:
711 case RISCV::VFREDOSUM_VS:
712 case RISCV::VFREDUSUM_VS: {
713 return MILog2SEW;
714 }
715
716 // Vector Widening Integer Reduction Instructions
717 // The Dest and VS1 read only element 0 for the vector register. Return
718 // 2*EEW for these. VS2 has EEW=SEW and EMUL=LMUL.
719 case RISCV::VWREDSUM_VS:
720 case RISCV::VWREDSUMU_VS:
721 // Vector Widening Floating-Point Reduction Instructions
722 case RISCV::VFWREDOSUM_VS:
723 case RISCV::VFWREDUSUM_VS: {
724 bool TwoTimes = IsMODef || MO.getOperandNo() == 3;
725 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
726 }
727
728 default:
729 return std::nullopt;
730 }
731}
732
733static std::optional<OperandInfo>
735 const MachineInstr &MI = *MO.getParent();
737 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
738 assert(RVV && "Could not find MI in PseudoTable");
739
740 std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO, MRI);
741 if (!Log2EEW)
742 return std::nullopt;
743
744 switch (RVV->BaseInstr) {
745 // Vector Reduction Operations
746 // Vector Single-Width Integer Reduction Instructions
747 // Vector Widening Integer Reduction Instructions
748 // Vector Widening Floating-Point Reduction Instructions
749 // The Dest and VS1 only read element 0 of the vector register. Return just
750 // the EEW for these.
751 case RISCV::VREDAND_VS:
752 case RISCV::VREDMAX_VS:
753 case RISCV::VREDMAXU_VS:
754 case RISCV::VREDMIN_VS:
755 case RISCV::VREDMINU_VS:
756 case RISCV::VREDOR_VS:
757 case RISCV::VREDSUM_VS:
758 case RISCV::VREDXOR_VS:
759 case RISCV::VWREDSUM_VS:
760 case RISCV::VWREDSUMU_VS:
761 case RISCV::VFWREDOSUM_VS:
762 case RISCV::VFWREDUSUM_VS:
763 if (MO.getOperandNo() != 2)
764 return OperandInfo(*Log2EEW);
765 break;
766 };
767
768 // All others have EMUL=EEW/SEW*LMUL
770 *Log2EEW);
771}
772
773/// Return true if this optimization should consider MI for VL reduction. This
774/// white-list approach simplifies this optimization for instructions that may
775/// have more complex semantics with relation to how it uses VL.
776static bool isSupportedInstr(const MachineInstr &MI) {
778 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
779
780 if (!RVV)
781 return false;
782
783 switch (RVV->BaseInstr) {
784 // Vector Unit-Stride Instructions
785 // Vector Strided Instructions
786 case RISCV::VLM_V:
787 case RISCV::VLE8_V:
788 case RISCV::VLSE8_V:
789 case RISCV::VLE16_V:
790 case RISCV::VLSE16_V:
791 case RISCV::VLE32_V:
792 case RISCV::VLSE32_V:
793 case RISCV::VLE64_V:
794 case RISCV::VLSE64_V:
795 // Vector Indexed Instructions
796 case RISCV::VLUXEI8_V:
797 case RISCV::VLOXEI8_V:
798 case RISCV::VLUXEI16_V:
799 case RISCV::VLOXEI16_V:
800 case RISCV::VLUXEI32_V:
801 case RISCV::VLOXEI32_V:
802 case RISCV::VLUXEI64_V:
803 case RISCV::VLOXEI64_V: {
804 for (const MachineMemOperand *MMO : MI.memoperands())
805 if (MMO->isVolatile())
806 return false;
807 return true;
808 }
809
810 // Vector Single-Width Integer Add and Subtract
811 case RISCV::VADD_VI:
812 case RISCV::VADD_VV:
813 case RISCV::VADD_VX:
814 case RISCV::VSUB_VV:
815 case RISCV::VSUB_VX:
816 case RISCV::VRSUB_VI:
817 case RISCV::VRSUB_VX:
818 // Vector Bitwise Logical Instructions
819 // Vector Single-Width Shift Instructions
820 case RISCV::VAND_VI:
821 case RISCV::VAND_VV:
822 case RISCV::VAND_VX:
823 case RISCV::VOR_VI:
824 case RISCV::VOR_VV:
825 case RISCV::VOR_VX:
826 case RISCV::VXOR_VI:
827 case RISCV::VXOR_VV:
828 case RISCV::VXOR_VX:
829 case RISCV::VSLL_VI:
830 case RISCV::VSLL_VV:
831 case RISCV::VSLL_VX:
832 case RISCV::VSRL_VI:
833 case RISCV::VSRL_VV:
834 case RISCV::VSRL_VX:
835 case RISCV::VSRA_VI:
836 case RISCV::VSRA_VV:
837 case RISCV::VSRA_VX:
838 // Vector Widening Integer Add/Subtract
839 case RISCV::VWADDU_VV:
840 case RISCV::VWADDU_VX:
841 case RISCV::VWSUBU_VV:
842 case RISCV::VWSUBU_VX:
843 case RISCV::VWADD_VV:
844 case RISCV::VWADD_VX:
845 case RISCV::VWSUB_VV:
846 case RISCV::VWSUB_VX:
847 case RISCV::VWADDU_WV:
848 case RISCV::VWADDU_WX:
849 case RISCV::VWSUBU_WV:
850 case RISCV::VWSUBU_WX:
851 case RISCV::VWADD_WV:
852 case RISCV::VWADD_WX:
853 case RISCV::VWSUB_WV:
854 case RISCV::VWSUB_WX:
855 // Vector Integer Extension
856 case RISCV::VZEXT_VF2:
857 case RISCV::VSEXT_VF2:
858 case RISCV::VZEXT_VF4:
859 case RISCV::VSEXT_VF4:
860 case RISCV::VZEXT_VF8:
861 case RISCV::VSEXT_VF8:
862 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
863 // FIXME: Add support
864 case RISCV::VMADC_VV:
865 case RISCV::VMADC_VI:
866 case RISCV::VMADC_VX:
867 case RISCV::VMSBC_VV:
868 case RISCV::VMSBC_VX:
869 // Vector Narrowing Integer Right Shift Instructions
870 case RISCV::VNSRL_WX:
871 case RISCV::VNSRL_WI:
872 case RISCV::VNSRL_WV:
873 case RISCV::VNSRA_WI:
874 case RISCV::VNSRA_WV:
875 case RISCV::VNSRA_WX:
876 // Vector Integer Compare Instructions
877 case RISCV::VMSEQ_VI:
878 case RISCV::VMSEQ_VV:
879 case RISCV::VMSEQ_VX:
880 case RISCV::VMSNE_VI:
881 case RISCV::VMSNE_VV:
882 case RISCV::VMSNE_VX:
883 case RISCV::VMSLTU_VV:
884 case RISCV::VMSLTU_VX:
885 case RISCV::VMSLT_VV:
886 case RISCV::VMSLT_VX:
887 case RISCV::VMSLEU_VV:
888 case RISCV::VMSLEU_VI:
889 case RISCV::VMSLEU_VX:
890 case RISCV::VMSLE_VV:
891 case RISCV::VMSLE_VI:
892 case RISCV::VMSLE_VX:
893 case RISCV::VMSGTU_VI:
894 case RISCV::VMSGTU_VX:
895 case RISCV::VMSGT_VI:
896 case RISCV::VMSGT_VX:
897 // Vector Integer Min/Max Instructions
898 case RISCV::VMINU_VV:
899 case RISCV::VMINU_VX:
900 case RISCV::VMIN_VV:
901 case RISCV::VMIN_VX:
902 case RISCV::VMAXU_VV:
903 case RISCV::VMAXU_VX:
904 case RISCV::VMAX_VV:
905 case RISCV::VMAX_VX:
906 // Vector Single-Width Integer Multiply Instructions
907 case RISCV::VMUL_VV:
908 case RISCV::VMUL_VX:
909 case RISCV::VMULH_VV:
910 case RISCV::VMULH_VX:
911 case RISCV::VMULHU_VV:
912 case RISCV::VMULHU_VX:
913 case RISCV::VMULHSU_VV:
914 case RISCV::VMULHSU_VX:
915 // Vector Integer Divide Instructions
916 case RISCV::VDIVU_VV:
917 case RISCV::VDIVU_VX:
918 case RISCV::VDIV_VV:
919 case RISCV::VDIV_VX:
920 case RISCV::VREMU_VV:
921 case RISCV::VREMU_VX:
922 case RISCV::VREM_VV:
923 case RISCV::VREM_VX:
924 // Vector Widening Integer Multiply Instructions
925 case RISCV::VWMUL_VV:
926 case RISCV::VWMUL_VX:
927 case RISCV::VWMULSU_VV:
928 case RISCV::VWMULSU_VX:
929 case RISCV::VWMULU_VV:
930 case RISCV::VWMULU_VX:
931 // Vector Single-Width Integer Multiply-Add Instructions
932 case RISCV::VMACC_VV:
933 case RISCV::VMACC_VX:
934 case RISCV::VNMSAC_VV:
935 case RISCV::VNMSAC_VX:
936 case RISCV::VMADD_VV:
937 case RISCV::VMADD_VX:
938 case RISCV::VNMSUB_VV:
939 case RISCV::VNMSUB_VX:
940 // Vector Integer Merge Instructions
941 case RISCV::VMERGE_VIM:
942 case RISCV::VMERGE_VVM:
943 case RISCV::VMERGE_VXM:
944 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
945 case RISCV::VADC_VIM:
946 case RISCV::VADC_VVM:
947 case RISCV::VADC_VXM:
948 // Vector Widening Integer Multiply-Add Instructions
949 case RISCV::VWMACCU_VV:
950 case RISCV::VWMACCU_VX:
951 case RISCV::VWMACC_VV:
952 case RISCV::VWMACC_VX:
953 case RISCV::VWMACCSU_VV:
954 case RISCV::VWMACCSU_VX:
955 case RISCV::VWMACCUS_VX:
956 // Vector Integer Merge Instructions
957 // FIXME: Add support
958 // Vector Integer Move Instructions
959 // FIXME: Add support
960 case RISCV::VMV_V_I:
961 case RISCV::VMV_V_X:
962 case RISCV::VMV_V_V:
963 // Vector Single-Width Averaging Add and Subtract
964 case RISCV::VAADDU_VV:
965 case RISCV::VAADDU_VX:
966 case RISCV::VAADD_VV:
967 case RISCV::VAADD_VX:
968 case RISCV::VASUBU_VV:
969 case RISCV::VASUBU_VX:
970 case RISCV::VASUB_VV:
971 case RISCV::VASUB_VX:
972
973 // Vector Crypto
974 case RISCV::VWSLL_VI:
975
976 // Vector Mask Instructions
977 // Vector Mask-Register Logical Instructions
978 // vmsbf.m set-before-first mask bit
979 // vmsif.m set-including-first mask bit
980 // vmsof.m set-only-first mask bit
981 // Vector Iota Instruction
982 // Vector Element Index Instruction
983 case RISCV::VMAND_MM:
984 case RISCV::VMNAND_MM:
985 case RISCV::VMANDN_MM:
986 case RISCV::VMXOR_MM:
987 case RISCV::VMOR_MM:
988 case RISCV::VMNOR_MM:
989 case RISCV::VMORN_MM:
990 case RISCV::VMXNOR_MM:
991 case RISCV::VMSBF_M:
992 case RISCV::VMSIF_M:
993 case RISCV::VMSOF_M:
994 case RISCV::VIOTA_M:
995 case RISCV::VID_V:
996 // Vector Single-Width Floating-Point Add/Subtract Instructions
997 case RISCV::VFADD_VF:
998 case RISCV::VFADD_VV:
999 case RISCV::VFSUB_VF:
1000 case RISCV::VFSUB_VV:
1001 case RISCV::VFRSUB_VF:
1002 // Vector Widening Floating-Point Add/Subtract Instructions
1003 case RISCV::VFWADD_VV:
1004 case RISCV::VFWADD_VF:
1005 case RISCV::VFWSUB_VV:
1006 case RISCV::VFWSUB_VF:
1007 case RISCV::VFWADD_WF:
1008 case RISCV::VFWADD_WV:
1009 case RISCV::VFWSUB_WF:
1010 case RISCV::VFWSUB_WV:
1011 // Vector Single-Width Floating-Point Multiply/Divide Instructions
1012 case RISCV::VFMUL_VF:
1013 case RISCV::VFMUL_VV:
1014 case RISCV::VFDIV_VF:
1015 case RISCV::VFDIV_VV:
1016 case RISCV::VFRDIV_VF:
1017 // Vector Widening Floating-Point Multiply
1018 case RISCV::VFWMUL_VF:
1019 case RISCV::VFWMUL_VV:
1020 // Vector Floating-Point MIN/MAX Instructions
1021 case RISCV::VFMIN_VF:
1022 case RISCV::VFMIN_VV:
1023 case RISCV::VFMAX_VF:
1024 case RISCV::VFMAX_VV:
1025 // Vector Floating-Point Sign-Injection Instructions
1026 case RISCV::VFSGNJ_VF:
1027 case RISCV::VFSGNJ_VV:
1028 case RISCV::VFSGNJN_VV:
1029 case RISCV::VFSGNJN_VF:
1030 case RISCV::VFSGNJX_VF:
1031 case RISCV::VFSGNJX_VV:
1032 // Vector Floating-Point Compare Instructions
1033 case RISCV::VMFEQ_VF:
1034 case RISCV::VMFEQ_VV:
1035 case RISCV::VMFNE_VF:
1036 case RISCV::VMFNE_VV:
1037 case RISCV::VMFLT_VF:
1038 case RISCV::VMFLT_VV:
1039 case RISCV::VMFLE_VF:
1040 case RISCV::VMFLE_VV:
1041 case RISCV::VMFGT_VF:
1042 case RISCV::VMFGE_VF:
1043 // Single-Width Floating-Point/Integer Type-Convert Instructions
1044 case RISCV::VFCVT_XU_F_V:
1045 case RISCV::VFCVT_X_F_V:
1046 case RISCV::VFCVT_RTZ_XU_F_V:
1047 case RISCV::VFCVT_RTZ_X_F_V:
1048 case RISCV::VFCVT_F_XU_V:
1049 case RISCV::VFCVT_F_X_V:
1050 // Widening Floating-Point/Integer Type-Convert Instructions
1051 case RISCV::VFWCVT_XU_F_V:
1052 case RISCV::VFWCVT_X_F_V:
1053 case RISCV::VFWCVT_RTZ_XU_F_V:
1054 case RISCV::VFWCVT_RTZ_X_F_V:
1055 case RISCV::VFWCVT_F_XU_V:
1056 case RISCV::VFWCVT_F_X_V:
1057 case RISCV::VFWCVT_F_F_V:
1058 case RISCV::VFWCVTBF16_F_F_V:
1059 // Narrowing Floating-Point/Integer Type-Convert Instructions
1060 case RISCV::VFNCVT_XU_F_W:
1061 case RISCV::VFNCVT_X_F_W:
1062 case RISCV::VFNCVT_RTZ_XU_F_W:
1063 case RISCV::VFNCVT_RTZ_X_F_W:
1064 case RISCV::VFNCVT_F_XU_W:
1065 case RISCV::VFNCVT_F_X_W:
1066 case RISCV::VFNCVT_F_F_W:
1067 case RISCV::VFNCVT_ROD_F_F_W:
1068 case RISCV::VFNCVTBF16_F_F_W:
1069 return true;
1070 }
1071
1072 return false;
1073}
1074
1075/// Return true if MO is a vector operand but is used as a scalar operand.
1077 MachineInstr *MI = MO.getParent();
1079 RISCVVPseudosTable::getPseudoInfo(MI->getOpcode());
1080
1081 if (!RVV)
1082 return false;
1083
1084 switch (RVV->BaseInstr) {
1085 // Reductions only use vs1[0] of vs1
1086 case RISCV::VREDAND_VS:
1087 case RISCV::VREDMAX_VS:
1088 case RISCV::VREDMAXU_VS:
1089 case RISCV::VREDMIN_VS:
1090 case RISCV::VREDMINU_VS:
1091 case RISCV::VREDOR_VS:
1092 case RISCV::VREDSUM_VS:
1093 case RISCV::VREDXOR_VS:
1094 case RISCV::VWREDSUM_VS:
1095 case RISCV::VWREDSUMU_VS:
1096 case RISCV::VFREDMAX_VS:
1097 case RISCV::VFREDMIN_VS:
1098 case RISCV::VFREDOSUM_VS:
1099 case RISCV::VFREDUSUM_VS:
1100 case RISCV::VFWREDOSUM_VS:
1101 case RISCV::VFWREDUSUM_VS:
1102 return MO.getOperandNo() == 3;
1103 case RISCV::VMV_X_S:
1104 case RISCV::VFMV_F_S:
1105 return MO.getOperandNo() == 1;
1106 default:
1107 return false;
1108 }
1109}
1110
1111/// Return true if MI may read elements past VL.
1112static bool mayReadPastVL(const MachineInstr &MI) {
1114 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
1115 if (!RVV)
1116 return true;
1117
1118 switch (RVV->BaseInstr) {
1119 // vslidedown instructions may read elements past VL. They are handled
1120 // according to current tail policy.
1121 case RISCV::VSLIDEDOWN_VI:
1122 case RISCV::VSLIDEDOWN_VX:
1123 case RISCV::VSLIDE1DOWN_VX:
1124 case RISCV::VFSLIDE1DOWN_VF:
1125
1126 // vrgather instructions may read the source vector at any index < VLMAX,
1127 // regardless of VL.
1128 case RISCV::VRGATHER_VI:
1129 case RISCV::VRGATHER_VV:
1130 case RISCV::VRGATHER_VX:
1131 case RISCV::VRGATHEREI16_VV:
1132 return true;
1133
1134 default:
1135 return false;
1136 }
1137}
1138
1139bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
1140 const MCInstrDesc &Desc = MI.getDesc();
1141 if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
1142 return false;
1143 if (MI.getNumDefs() != 1)
1144 return false;
1145
1146 // If we're not using VLMAX, then we need to be careful whether we are using
1147 // TA/TU when there is a non-undef Passthru. But when we are using VLMAX, it
1148 // does not matter whether we are using TA/TU with a non-undef Passthru, since
1149 // there are no tail elements to be preserved.
1150 unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1151 const MachineOperand &VLOp = MI.getOperand(VLOpNum);
1152 if (VLOp.isReg() || VLOp.getImm() != RISCV::VLMaxSentinel) {
1153 // If MI has a non-undef passthru, we will not try to optimize it since
1154 // that requires us to preserve tail elements according to TA/TU.
1155 // Otherwise, The MI has an undef Passthru, so it doesn't matter whether we
1156 // are using TA/TU.
1157 bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);
1158 unsigned PassthruOpIdx = MI.getNumExplicitDefs();
1159 if (HasPassthru &&
1160 MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) {
1161 LLVM_DEBUG(
1162 dbgs() << " Not a candidate because it uses non-undef passthru"
1163 " with non-VLMAX VL\n");
1164 return false;
1165 }
1166 }
1167
1168 // If the VL is 1, then there is no need to reduce it. This is an
1169 // optimization, not needed to preserve correctness.
1170 if (VLOp.isImm() && VLOp.getImm() == 1) {
1171 LLVM_DEBUG(dbgs() << " Not a candidate because VL is already 1\n");
1172 return false;
1173 }
1174
1175 if (MI.mayRaiseFPException()) {
1176 LLVM_DEBUG(dbgs() << "Not a candidate because may raise FP exception\n");
1177 return false;
1178 }
1179
1180 // Some instructions that produce vectors have semantics that make it more
1181 // difficult to determine whether the VL can be reduced. For example, some
1182 // instructions, such as reductions, may write lanes past VL to a scalar
1183 // register. Other instructions, such as some loads or stores, may write
1184 // lower lanes using data from higher lanes. There may be other complex
1185 // semantics not mentioned here that make it hard to determine whether
1186 // the VL can be optimized. As a result, a white-list of supported
1187 // instructions is used. Over time, more instructions can be supported
1188 // upon careful examination of their semantics under the logic in this
1189 // optimization.
1190 // TODO: Use a better approach than a white-list, such as adding
1191 // properties to instructions using something like TSFlags.
1192 if (!isSupportedInstr(MI)) {
1193 LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction\n");
1194 return false;
1195 }
1196
1197 assert(MI.getOperand(0).isReg() &&
1198 isVectorRegClass(MI.getOperand(0).getReg(), MRI) &&
1199 "All supported instructions produce a vector register result");
1200
1201 LLVM_DEBUG(dbgs() << "Found a candidate for VL reduction: " << MI << "\n");
1202 return true;
1203}
1204
1205std::optional<MachineOperand>
1206RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
1207 const MachineInstr &UserMI = *UserOp.getParent();
1208 const MCInstrDesc &Desc = UserMI.getDesc();
1209
1210 if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
1211 LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
1212 " use VLMAX\n");
1213 return std::nullopt;
1214 }
1215
1216 // Instructions like reductions may use a vector register as a scalar
1217 // register. In this case, we should treat it as only reading the first lane.
1218 if (isVectorOpUsedAsScalarOp(UserOp)) {
1219 [[maybe_unused]] Register R = UserOp.getReg();
1220 [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
1221 assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
1222 "Expect LMUL 1 register class for vector as scalar operands!");
1223 LLVM_DEBUG(dbgs() << " Used this operand as a scalar operand\n");
1224
1225 return MachineOperand::CreateImm(1);
1226 }
1227
1228 unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1229 const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
1230 // Looking for an immediate or a register VL that isn't X0.
1231 assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
1232 "Did not expect X0 VL");
1233 return VLOp;
1234}
1235
1236std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
1237 // FIXME: Avoid visiting each user for each time we visit something on the
1238 // worklist, combined with an extra visit from the outer loop. Restructure
1239 // along lines of an instcombine style worklist which integrates the outer
1240 // pass.
1241 std::optional<MachineOperand> CommonVL;
1242 for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
1243 const MachineInstr &UserMI = *UserOp.getParent();
1244 LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n");
1245 if (mayReadPastVL(UserMI)) {
1246 LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
1247 return std::nullopt;
1248 }
1249
1250 // Tied operands might pass through.
1251 if (UserOp.isTied()) {
1252 LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n");
1253 return std::nullopt;
1254 }
1255
1256 auto VLOp = getMinimumVLForUser(UserOp);
1257 if (!VLOp)
1258 return std::nullopt;
1259
1260 // Use the largest VL among all the users. If we cannot determine this
1261 // statically, then we cannot optimize the VL.
1262 if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, *VLOp)) {
1263 CommonVL = *VLOp;
1264 LLVM_DEBUG(dbgs() << " User VL is: " << VLOp << "\n");
1265 } else if (!RISCV::isVLKnownLE(*VLOp, *CommonVL)) {
1266 LLVM_DEBUG(dbgs() << " Abort because cannot determine a common VL\n");
1267 return std::nullopt;
1268 }
1269
1270 if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) {
1271 LLVM_DEBUG(dbgs() << " Abort due to lack of SEW operand\n");
1272 return std::nullopt;
1273 }
1274
1275 std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp, MRI);
1276 std::optional<OperandInfo> ProducerInfo =
1277 getOperandInfo(MI.getOperand(0), MRI);
1278 if (!ConsumerInfo || !ProducerInfo) {
1279 LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n");
1280 LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
1281 LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
1282 return std::nullopt;
1283 }
1284
1285 // If the operand is used as a scalar operand, then the EEW must be
1286 // compatible. Otherwise, the EMUL *and* EEW must be compatible.
1287 bool IsVectorOpUsedAsScalarOp = isVectorOpUsedAsScalarOp(UserOp);
1288 if ((IsVectorOpUsedAsScalarOp &&
1289 !OperandInfo::EEWAreEqual(*ConsumerInfo, *ProducerInfo)) ||
1290 (!IsVectorOpUsedAsScalarOp &&
1291 !OperandInfo::EMULAndEEWAreEqual(*ConsumerInfo, *ProducerInfo))) {
1292 LLVM_DEBUG(
1293 dbgs()
1294 << " Abort due to incompatible information for EMUL or EEW.\n");
1295 LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
1296 LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
1297 return std::nullopt;
1298 }
1299 }
1300
1301 return CommonVL;
1302}
1303
1304bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) {
1305 LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");
1306
1307 auto CommonVL = checkUsers(MI);
1308 if (!CommonVL)
1309 return false;
1310
1311 assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&
1312 "Expected VL to be an Imm or virtual Reg");
1313
1314 unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
1315 MachineOperand &VLOp = MI.getOperand(VLOpNum);
1316
1317 if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {
1318 LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
1319 return false;
1320 }
1321
1322 if (CommonVL->isIdenticalTo(VLOp)) {
1323 LLVM_DEBUG(
1324 dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n");
1325 return false;
1326 }
1327
1328 if (CommonVL->isImm()) {
1329 LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
1330 << CommonVL->getImm() << " for " << MI << "\n");
1331 VLOp.ChangeToImmediate(CommonVL->getImm());
1332 return true;
1333 }
1334 const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
1335 if (!MDT->dominates(VLMI, &MI))
1336 return false;
1337 LLVM_DEBUG(
1338 dbgs() << " Reduce VL from " << VLOp << " to "
1339 << printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo())
1340 << " for " << MI << "\n");
1341
1342 // All our checks passed. We can reduce VL.
1343 VLOp.ChangeToRegister(CommonVL->getReg(), false);
1344 return true;
1345}
1346
1347bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
1348 if (skipFunction(MF.getFunction()))
1349 return false;
1350
1351 MRI = &MF.getRegInfo();
1352 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1353
1355 if (!ST.hasVInstructions())
1356 return false;
1357
1359 auto PushOperands = [this, &Worklist](MachineInstr &MI,
1360 bool IgnoreSameBlock) {
1361 for (auto &Op : MI.operands()) {
1362 if (!Op.isReg() || !Op.isUse() || !Op.getReg().isVirtual() ||
1363 !isVectorRegClass(Op.getReg(), MRI))
1364 continue;
1365
1366 MachineInstr *DefMI = MRI->getVRegDef(Op.getReg());
1367 if (!isCandidate(*DefMI))
1368 continue;
1369
1370 if (IgnoreSameBlock && DefMI->getParent() == MI.getParent())
1371 continue;
1372
1373 Worklist.insert(DefMI);
1374 }
1375 };
1376
1377 // Do a first pass eagerly rewriting in roughly reverse instruction
1378 // order, populate the worklist with any instructions we might need to
1379 // revisit. We avoid adding definitions to the worklist if they're
1380 // in the same block - we're about to visit them anyways.
1381 bool MadeChange = false;
1382 for (MachineBasicBlock &MBB : MF) {
1383 // Avoid unreachable blocks as they have degenerate dominance
1384 if (!MDT->isReachableFromEntry(&MBB))
1385 continue;
1386
1387 for (auto &MI : make_range(MBB.rbegin(), MBB.rend())) {
1388 if (!isCandidate(MI))
1389 continue;
1390 if (!tryReduceVL(MI))
1391 continue;
1392 MadeChange = true;
1393 PushOperands(MI, /*IgnoreSameBlock*/ true);
1394 }
1395 }
1396
1397 while (!Worklist.empty()) {
1398 assert(MadeChange);
1399 MachineInstr &MI = *Worklist.pop_back_val();
1401 if (!tryReduceVL(MI))
1402 continue;
1403 PushOperands(MI, /*IgnoreSameBlock*/ false);
1404 }
1405
1406 return MadeChange;
1407}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282
#define LLVM_DEBUG(...)
Definition: Debug.h:106
IRTranslator LLVM IR MI
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static bool mayReadPastVL(const MachineInstr &MI)
Return true if MI may read elements past VL.
static LLVM_ATTRIBUTE_UNUSED raw_ostream & operator<<(raw_ostream &OS, const OperandInfo &OI)
static unsigned getIntegerExtensionOperandEEW(unsigned Factor, const MachineInstr &MI, const MachineOperand &MO)
Dest has EEW=SEW.
static bool isVectorOpUsedAsScalarOp(MachineOperand &MO)
Return true if MO is a vector operand but is used as a scalar operand.
static std::optional< unsigned > getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI)
static bool isVectorRegClass(Register R, const MachineRegisterInfo *MRI)
Return true if R is a physical or virtual vector register, false otherwise.
static bool isSupportedInstr(const MachineInstr &MI)
Return true if this optimization should consider MI for VL reduction.
#define PASS_NAME
#define DEBUG_TYPE
static bool isMaskOperand(const MachineInstr &MI, const MachineOperand &MO, const MachineRegisterInfo *MRI)
Check whether MO is a mask operand of MI.
static std::optional< OperandInfo > getOperandInfo(const MachineOperand &MO, const MachineRegisterInfo *MRI)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
#define PASS_NAME
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
reverse_iterator rend()
reverse_iterator rbegin()
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:71
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:349
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:574
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
const uint8_t TSFlags
Configurable target specific flags.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static bool isTiedPseudo(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
static bool isVRegClass(uint64_t TSFlags)
static std::pair< unsigned, bool > getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI)
Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and SEW are from the TSFlags o...
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS)
Given two VL operands, do we know that LHS <= RHS?
static constexpr int64_t VLMaxSentinel
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FunctionPass * createRISCVVLOptimizerPass()
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Represents the EMUL and EEW of a MachineOperand.
static bool EEWAreEqual(const OperandInfo &A, const OperandInfo &B)
OperandInfo(std::pair< unsigned, bool > EMUL, unsigned Log2EEW)
OperandInfo(unsigned Log2EEW)
void print(raw_ostream &OS) const
static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B)
OperandInfo()=delete
OperandInfo(RISCVII::VLMUL EMUL, unsigned Log2EEW)
std::optional< std::pair< unsigned, bool > > EMUL
Description of the encoding of one expression Op.