LLVM 20.0.0git
MachineCSE.cpp
Go to the documentation of this file.
1//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global common subexpression elimination on machine
10// instructions using a scoped hash table based value numbering scheme. It
11// must be run while the machine function is still in SSA form.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
32#include "llvm/CodeGen/Passes.h"
38#include "llvm/MC/MCRegister.h"
40#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
45#include <cassert>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "machine-cse"
52
53STATISTIC(NumCoalesces, "Number of copies coalesced");
54STATISTIC(NumCSEs, "Number of common subexpression eliminated");
55STATISTIC(NumPREs, "Number of partial redundant expression"
56 " transformed to fully redundant");
57STATISTIC(NumPhysCSEs,
58 "Number of physreg referencing common subexpr eliminated");
59STATISTIC(NumCrossBBCSEs,
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
62
63// Threshold to avoid excessive cost to compute isProfitableToCSE.
64static cl::opt<int>
65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
66 cl::desc("Threshold for the size of CSUses"));
67
69 "aggressive-machine-cse", cl::Hidden, cl::init(false),
70 cl::desc("Override the profitability heuristics for Machine CSE"));
71
72namespace {
73
74class MachineCSEImpl {
75 const TargetInstrInfo *TII = nullptr;
76 const TargetRegisterInfo *TRI = nullptr;
77 MachineDominatorTree *DT = nullptr;
78 MachineRegisterInfo *MRI = nullptr;
79 MachineBlockFrequencyInfo *MBFI = nullptr;
80
81public:
82 MachineCSEImpl(MachineDominatorTree *DT, MachineBlockFrequencyInfo *MBFI)
83 : DT(DT), MBFI(MBFI) {}
84 bool run(MachineFunction &MF);
85
86private:
87 using AllocatorTy =
90 using ScopedHTType =
92 AllocatorTy>;
93 using ScopeType = ScopedHTType::ScopeTy;
94 using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
95
96 unsigned LookAheadLimit = 0;
99 PREMap;
100 ScopedHTType VNT;
102 unsigned CurrVN = 0;
103
104 bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB);
105 bool isPhysDefTriviallyDead(MCRegister Reg,
108 bool hasLivePhysRegDefUses(const MachineInstr *MI,
109 const MachineBasicBlock *MBB,
110 SmallSet<MCRegister, 8> &PhysRefs,
111 PhysDefVector &PhysDefs, bool &PhysUseDef) const;
112 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
113 SmallSet<MCRegister, 8> &PhysRefs,
114 PhysDefVector &PhysDefs, bool &NonLocal) const;
115 bool isCSECandidate(MachineInstr *MI);
116 bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB,
118 void EnterScope(MachineBasicBlock *MBB);
119 void ExitScope(MachineBasicBlock *MBB);
120 bool ProcessBlockCSE(MachineBasicBlock *MBB);
121 void ExitScopeIfDone(MachineDomTreeNode *Node,
123 bool PerformCSE(MachineDomTreeNode *Node);
124
125 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
126 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
127 bool PerformSimplePRE(MachineDominatorTree *DT);
128 /// Heuristics to see if it's profitable to move common computations of MBB
129 /// and MBB1 to CandidateBB.
130 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
132 void releaseMemory();
133};
134
135class MachineCSELegacy : public MachineFunctionPass {
136public:
137 static char ID; // Pass identification
138
139 MachineCSELegacy() : MachineFunctionPass(ID) {
141 }
142
143 bool runOnMachineFunction(MachineFunction &MF) override;
144
145 void getAnalysisUsage(AnalysisUsage &AU) const override {
146 AU.setPreservesCFG();
153 }
154
157 MachineFunctionProperties::Property::IsSSA);
158 }
159};
160} // end anonymous namespace
161
162char MachineCSELegacy::ID = 0;
163
164char &llvm::MachineCSELegacyID = MachineCSELegacy::ID;
165
167 "Machine Common Subexpression Elimination", false, false)
170 "Machine Common Subexpression Elimination", false, false)
171
172/// The source register of a COPY machine instruction can be propagated to all
173/// its users, and this propagation could increase the probability of finding
174/// common subexpressions. If the COPY has only one user, the COPY itself can
175/// be removed.
176bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI,
178 bool Changed = false;
179 for (MachineOperand &MO : MI->all_uses()) {
180 Register Reg = MO.getReg();
181 if (!Reg.isVirtual())
182 continue;
183 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
184 MachineInstr *DefMI = MRI->getVRegDef(Reg);
185 if (!DefMI || !DefMI->isCopy())
186 continue;
187 Register SrcReg = DefMI->getOperand(1).getReg();
188 if (!SrcReg.isVirtual())
189 continue;
190 // FIXME: We should trivially coalesce subregister copies to expose CSE
191 // opportunities on instructions with truncated operands (see
192 // cse-add-with-overflow.ll). This can be done here as follows:
193 // if (SrcSubReg)
194 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
195 // SrcSubReg);
196 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
197 //
198 // The 2-addr pass has been updated to handle coalesced subregs. However,
199 // some machine-specific code still can't handle it.
200 // To handle it properly we also need a way find a constrained subregister
201 // class given a super-reg class and subreg index.
202 if (DefMI->getOperand(1).getSubReg())
203 continue;
204 if (!MRI->constrainRegAttrs(SrcReg, Reg))
205 continue;
206 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
207 LLVM_DEBUG(dbgs() << "*** to: " << *MI);
208
209 // Propagate SrcReg of copies to MI.
210 MO.setReg(SrcReg);
211 MRI->clearKillFlags(SrcReg);
212 // Coalesce single use copies.
213 if (OnlyOneUse) {
214 // If (and only if) we've eliminated all uses of the copy, also
215 // copy-propagate to any debug-users of MI, or they'll be left using
216 // an undefined value.
218
220 ++NumCoalesces;
221 }
222 Changed = true;
223 }
224
225 return Changed;
226}
227
228bool MachineCSEImpl::isPhysDefTriviallyDead(
231 unsigned LookAheadLeft = LookAheadLimit;
232 while (LookAheadLeft) {
233 // Skip over dbg_value's.
235
236 if (I == E)
237 // Reached end of block, we don't know if register is dead or not.
238 return false;
239
240 bool SeenDef = false;
241 for (const MachineOperand &MO : I->operands()) {
242 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
243 SeenDef = true;
244 if (!MO.isReg() || !MO.getReg())
245 continue;
246 if (!TRI->regsOverlap(MO.getReg(), Reg))
247 continue;
248 if (MO.isUse())
249 // Found a use!
250 return false;
251 SeenDef = true;
252 }
253 if (SeenDef)
254 // See a def of Reg (or an alias) before encountering any use, it's
255 // trivially dead.
256 return true;
257
258 --LookAheadLeft;
259 ++I;
260 }
261 return false;
262}
263
265 const MachineOperand &MO,
266 const MachineFunction &MF,
267 const TargetRegisterInfo &TRI,
268 const TargetInstrInfo &TII) {
269 // MachineRegisterInfo::isConstantPhysReg directly called by
270 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
271 // reserved registers to be frozen. That doesn't cause a problem post-ISel as
272 // most (if not all) targets freeze reserved registers right after ISel.
273 //
274 // It does cause issues mid-GlobalISel, however, hence the additional
275 // reservedRegsFrozen check.
276 const MachineRegisterInfo &MRI = MF.getRegInfo();
277 return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
278 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
279}
280
281/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
282/// physical registers (except for dead defs of physical registers). It also
283/// returns the physical register def by reference if it's the only one and the
284/// instruction does not uses a physical register.
285bool MachineCSEImpl::hasLivePhysRegDefUses(const MachineInstr *MI,
286 const MachineBasicBlock *MBB,
287 SmallSet<MCRegister, 8> &PhysRefs,
288 PhysDefVector &PhysDefs,
289 bool &PhysUseDef) const {
290 // First, add all uses to PhysRefs.
291 for (const MachineOperand &MO : MI->all_uses()) {
292 Register Reg = MO.getReg();
293 if (!Reg)
294 continue;
295 if (Reg.isVirtual())
296 continue;
297 // Reading either caller preserved or constant physregs is ok.
298 if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
299 *TII))
300 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
301 PhysRefs.insert(*AI);
302 }
303
304 // Next, collect all defs into PhysDefs. If any is already in PhysRefs
305 // (which currently contains only uses), set the PhysUseDef flag.
306 PhysUseDef = false;
307 MachineBasicBlock::const_iterator I = MI; I = std::next(I);
308 for (const auto &MOP : llvm::enumerate(MI->operands())) {
309 const MachineOperand &MO = MOP.value();
310 if (!MO.isReg() || !MO.isDef())
311 continue;
312 Register Reg = MO.getReg();
313 if (!Reg)
314 continue;
315 if (Reg.isVirtual())
316 continue;
317 // Check against PhysRefs even if the def is "dead".
318 if (PhysRefs.count(Reg.asMCReg()))
319 PhysUseDef = true;
320 // If the def is dead, it's ok. But the def may not marked "dead". That's
321 // common since this pass is run before livevariables. We can scan
322 // forward a few instructions and check if it is obviously dead.
323 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
324 PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
325 }
326
327 // Finally, add all defs to PhysRefs as well.
328 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
329 for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
330 ++AI)
331 PhysRefs.insert(*AI);
332
333 return !PhysRefs.empty();
334}
335
336bool MachineCSEImpl::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
337 SmallSet<MCRegister, 8> &PhysRefs,
338 PhysDefVector &PhysDefs,
339 bool &NonLocal) const {
340 // For now conservatively returns false if the common subexpression is
341 // not in the same basic block as the given instruction. The only exception
342 // is if the common subexpression is in the sole predecessor block.
343 const MachineBasicBlock *MBB = MI->getParent();
344 const MachineBasicBlock *CSMBB = CSMI->getParent();
345
346 bool CrossMBB = false;
347 if (CSMBB != MBB) {
348 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
349 return false;
350
351 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
352 if (MRI->isAllocatable(PhysDefs[i].second) ||
353 MRI->isReserved(PhysDefs[i].second))
354 // Avoid extending live range of physical registers if they are
355 //allocatable or reserved.
356 return false;
357 }
358 CrossMBB = true;
359 }
360 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
363 unsigned LookAheadLeft = LookAheadLimit;
364 while (LookAheadLeft) {
365 // Skip over dbg_value's.
366 while (I != E && I != EE && I->isDebugInstr())
367 ++I;
368
369 if (I == EE) {
370 assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
371 (void)CrossMBB;
372 CrossMBB = false;
373 NonLocal = true;
374 I = MBB->begin();
375 EE = MBB->end();
376 continue;
377 }
378
379 if (I == E)
380 return true;
381
382 for (const MachineOperand &MO : I->operands()) {
383 // RegMasks go on instructions like calls that clobber lots of physregs.
384 // Don't attempt to CSE across such an instruction.
385 if (MO.isRegMask())
386 return false;
387 if (!MO.isReg() || !MO.isDef())
388 continue;
389 Register MOReg = MO.getReg();
390 if (MOReg.isVirtual())
391 continue;
392 if (PhysRefs.count(MOReg.asMCReg()))
393 return false;
394 }
395
396 --LookAheadLeft;
397 ++I;
398 }
399
400 return false;
401}
402
403bool MachineCSEImpl::isCSECandidate(MachineInstr *MI) {
404 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
405 MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() ||
406 MI->isFakeUse())
407 return false;
408
409 // Ignore copies.
410 if (MI->isCopyLike())
411 return false;
412
413 // Ignore stuff that we obviously can't move.
414 if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
415 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
416 return false;
417
418 if (MI->mayLoad()) {
419 // Okay, this instruction does a load. As a refinement, we allow the target
420 // to decide whether the loaded value is actually a constant. If so, we can
421 // actually use it as a load.
422 if (!MI->isDereferenceableInvariantLoad())
423 // FIXME: we should be able to hoist loads with no other side effects if
424 // there are no other instructions which can change memory in this loop.
425 // This is a trivial form of alias analysis.
426 return false;
427 }
428
429 // Ignore stack guard loads, otherwise the register that holds CSEed value may
430 // be spilled and get loaded back with corrupted data.
431 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
432 return false;
433
434 return true;
435}
436
437/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
438/// common expression that defines Reg. CSBB is basic block where CSReg is
439/// defined.
440bool MachineCSEImpl::isProfitableToCSE(Register CSReg, Register Reg,
441 MachineBasicBlock *CSBB,
442 MachineInstr *MI) {
444 return true;
445
446 // FIXME: Heuristics that works around the lack the live range splitting.
447
448 // If CSReg is used at all uses of Reg, CSE should not increase register
449 // pressure of CSReg.
450 bool MayIncreasePressure = true;
451 if (CSReg.isVirtual() && Reg.isVirtual()) {
452 MayIncreasePressure = false;
454 int NumOfUses = 0;
455 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
456 CSUses.insert(&MI);
457 // Too costly to compute if NumOfUses is very large. Conservatively assume
458 // MayIncreasePressure to avoid spending too much time here.
459 if (++NumOfUses > CSUsesThreshold) {
460 MayIncreasePressure = true;
461 break;
462 }
463 }
464 if (!MayIncreasePressure)
465 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
466 if (!CSUses.count(&MI)) {
467 MayIncreasePressure = true;
468 break;
469 }
470 }
471 }
472 if (!MayIncreasePressure) return true;
473
474 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
475 // an immediate predecessor. We don't want to increase register pressure and
476 // end up causing other computation to be spilled.
477 if (TII->isAsCheapAsAMove(*MI)) {
478 MachineBasicBlock *BB = MI->getParent();
479 if (CSBB != BB && !CSBB->isSuccessor(BB))
480 return false;
481 }
482
483 // Heuristics #2: If the expression doesn't not use a vr and the only use
484 // of the redundant computation are copies, do not cse.
485 bool HasVRegUse = false;
486 for (const MachineOperand &MO : MI->all_uses()) {
487 if (MO.getReg().isVirtual()) {
488 HasVRegUse = true;
489 break;
490 }
491 }
492 if (!HasVRegUse) {
493 bool HasNonCopyUse = false;
494 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
495 // Ignore copies.
496 if (!MI.isCopyLike()) {
497 HasNonCopyUse = true;
498 break;
499 }
500 }
501 if (!HasNonCopyUse)
502 return false;
503 }
504
505 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
506 // it unless the defined value is already used in the BB of the new use.
507 bool HasPHI = false;
508 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
509 HasPHI |= UseMI.isPHI();
510 if (UseMI.getParent() == MI->getParent())
511 return true;
512 }
513
514 return !HasPHI;
515}
516
517void MachineCSEImpl::EnterScope(MachineBasicBlock *MBB) {
518 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
519 ScopeType *Scope = new ScopeType(VNT);
520 ScopeMap[MBB] = Scope;
521}
522
523void MachineCSEImpl::ExitScope(MachineBasicBlock *MBB) {
524 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
526 assert(SI != ScopeMap.end());
527 delete SI->second;
528 ScopeMap.erase(SI);
529}
530
531bool MachineCSEImpl::ProcessBlockCSE(MachineBasicBlock *MBB) {
532 bool Changed = false;
533
535 SmallVector<unsigned, 2> ImplicitDefsToUpdate;
536 SmallVector<unsigned, 2> ImplicitDefs;
538 if (!isCSECandidate(&MI))
539 continue;
540
541 bool FoundCSE = VNT.count(&MI);
542 if (!FoundCSE) {
543 // Using trivial copy propagation to find more CSE opportunities.
544 if (PerformTrivialCopyPropagation(&MI, MBB)) {
545 Changed = true;
546
547 // After coalescing MI itself may become a copy.
548 if (MI.isCopyLike())
549 continue;
550
551 // Try again to see if CSE is possible.
552 FoundCSE = VNT.count(&MI);
553 }
554 }
555
556 // Commute commutable instructions.
557 bool Commuted = false;
558 if (!FoundCSE && MI.isCommutable()) {
559 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
560 Commuted = true;
561 FoundCSE = VNT.count(NewMI);
562 if (NewMI != &MI) {
563 // New instruction. It doesn't need to be kept.
564 NewMI->eraseFromParent();
565 Changed = true;
566 } else if (!FoundCSE)
567 // MI was changed but it didn't help, commute it back!
568 (void)TII->commuteInstruction(MI);
569 }
570 }
571
572 // If the instruction defines physical registers and the values *may* be
573 // used, then it's not safe to replace it with a common subexpression.
574 // It's also not safe if the instruction uses physical registers.
575 bool CrossMBBPhysDef = false;
577 PhysDefVector PhysDefs;
578 bool PhysUseDef = false;
579 if (FoundCSE &&
580 hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
581 FoundCSE = false;
582
583 // ... Unless the CS is local or is in the sole predecessor block
584 // and it also defines the physical register which is not clobbered
585 // in between and the physical register uses were not clobbered.
586 // This can never be the case if the instruction both uses and
587 // defines the same physical register, which was detected above.
588 if (!PhysUseDef) {
589 unsigned CSVN = VNT.lookup(&MI);
590 MachineInstr *CSMI = Exps[CSVN];
591 if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
592 FoundCSE = true;
593 }
594 }
595
596 if (!FoundCSE) {
597 VNT.insert(&MI, CurrVN++);
598 Exps.push_back(&MI);
599 continue;
600 }
601
602 // Found a common subexpression, eliminate it.
603 unsigned CSVN = VNT.lookup(&MI);
604 MachineInstr *CSMI = Exps[CSVN];
605 LLVM_DEBUG(dbgs() << "Examining: " << MI);
606 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
607
608 // Prevent CSE-ing non-local convergent instructions.
609 // LLVM's current definition of `isConvergent` does not necessarily prove
610 // that non-local CSE is illegal. The following check extends the definition
611 // of `isConvergent` to assume a convergent instruction is dependent not
612 // only on additional conditions, but also on fewer conditions. LLVM does
613 // not have a MachineInstr attribute which expresses this extended
614 // definition, so it's necessary to use `isConvergent` to prevent illegally
615 // CSE-ing the subset of `isConvergent` instructions which do fall into this
616 // extended definition.
617 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
618 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
619 "different BBs, avoid CSE!\n");
620 VNT.insert(&MI, CurrVN++);
621 Exps.push_back(&MI);
622 continue;
623 }
624
625 // Check if it's profitable to perform this CSE.
626 bool DoCSE = true;
627 unsigned NumDefs = MI.getNumDefs();
628
629 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
630 MachineOperand &MO = MI.getOperand(i);
631 if (!MO.isReg() || !MO.isDef())
632 continue;
633 Register OldReg = MO.getReg();
634 Register NewReg = CSMI->getOperand(i).getReg();
635
636 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
637 // we should make sure it is not dead at CSMI.
638 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
639 ImplicitDefsToUpdate.push_back(i);
640
641 // Keep track of implicit defs of CSMI and MI, to clear possibly
642 // made-redundant kill flags.
643 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
644 ImplicitDefs.push_back(OldReg);
645
646 if (OldReg == NewReg) {
647 --NumDefs;
648 continue;
649 }
650
651 assert(OldReg.isVirtual() && NewReg.isVirtual() &&
652 "Do not CSE physical register defs!");
653
654 if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
655 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
656 DoCSE = false;
657 break;
658 }
659
660 // Don't perform CSE if the result of the new instruction cannot exist
661 // within the constraints (register class, bank, or low-level type) of
662 // the old instruction.
663 if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
665 dbgs() << "*** Not the same register constraints, avoid CSE!\n");
666 DoCSE = false;
667 break;
668 }
669
670 CSEPairs.push_back(std::make_pair(OldReg, NewReg));
671 --NumDefs;
672 }
673
674 // Actually perform the elimination.
675 if (DoCSE) {
676 for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
677 unsigned OldReg = CSEPair.first;
678 unsigned NewReg = CSEPair.second;
679 // OldReg may have been unused but is used now, clear the Dead flag
680 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
681 assert(Def != nullptr && "CSEd register has no unique definition?");
682 Def->clearRegisterDeads(NewReg);
683 // Replace with NewReg and clear kill flags which may be wrong now.
684 MRI->replaceRegWith(OldReg, NewReg);
685 MRI->clearKillFlags(NewReg);
686 }
687
688 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
689 // we should make sure it is not dead at CSMI.
690 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
691 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
692 for (const auto &PhysDef : PhysDefs)
693 if (!MI.getOperand(PhysDef.first).isDead())
694 CSMI->getOperand(PhysDef.first).setIsDead(false);
695
696 // Go through implicit defs of CSMI and MI, and clear the kill flags on
697 // their uses in all the instructions between CSMI and MI.
698 // We might have made some of the kill flags redundant, consider:
699 // subs ... implicit-def %nzcv <- CSMI
700 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
701 // subs ... implicit-def %nzcv <- MI, to be eliminated
702 // csinc ... implicit killed %nzcv
703 // Since we eliminated MI, and reused a register imp-def'd by CSMI
704 // (here %nzcv), that register, if it was killed before MI, should have
705 // that kill flag removed, because it's lifetime was extended.
706 if (CSMI->getParent() == MI.getParent()) {
707 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
708 for (auto ImplicitDef : ImplicitDefs)
709 if (MachineOperand *MO = II->findRegisterUseOperand(
710 ImplicitDef, TRI, /*isKill=*/true))
711 MO->setIsKill(false);
712 } else {
713 // If the instructions aren't in the same BB, bail out and clear the
714 // kill flag on all uses of the imp-def'd register.
715 for (auto ImplicitDef : ImplicitDefs)
716 MRI->clearKillFlags(ImplicitDef);
717 }
718
719 if (CrossMBBPhysDef) {
720 // Add physical register defs now coming in from a predecessor to MBB
721 // livein list.
722 while (!PhysDefs.empty()) {
723 auto LiveIn = PhysDefs.pop_back_val();
724 if (!MBB->isLiveIn(LiveIn.second))
725 MBB->addLiveIn(LiveIn.second);
726 }
727 ++NumCrossBBCSEs;
728 }
729
730 MI.eraseFromParent();
731 ++NumCSEs;
732 if (!PhysRefs.empty())
733 ++NumPhysCSEs;
734 if (Commuted)
735 ++NumCommutes;
736 Changed = true;
737 } else {
738 VNT.insert(&MI, CurrVN++);
739 Exps.push_back(&MI);
740 }
741 CSEPairs.clear();
742 ImplicitDefsToUpdate.clear();
743 ImplicitDefs.clear();
744 }
745
746 return Changed;
747}
748
749/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
750/// dominator tree node if its a leaf or all of its children are done. Walk
751/// up the dominator tree to destroy ancestors which are now done.
752void MachineCSEImpl::ExitScopeIfDone(
755 if (OpenChildren[Node])
756 return;
757
758 // Pop scope.
759 ExitScope(Node->getBlock());
760
761 // Now traverse upwards to pop ancestors whose offsprings are all done.
762 while (MachineDomTreeNode *Parent = Node->getIDom()) {
763 unsigned Left = --OpenChildren[Parent];
764 if (Left != 0)
765 break;
766 ExitScope(Parent->getBlock());
767 Node = Parent;
768 }
769}
770
771bool MachineCSEImpl::PerformCSE(MachineDomTreeNode *Node) {
775
776 CurrVN = 0;
777
778 // Perform a DFS walk to determine the order of visit.
779 WorkList.push_back(Node);
780 do {
781 Node = WorkList.pop_back_val();
782 Scopes.push_back(Node);
783 OpenChildren[Node] = Node->getNumChildren();
784 append_range(WorkList, Node->children());
785 } while (!WorkList.empty());
786
787 // Now perform CSE.
788 bool Changed = false;
789 for (MachineDomTreeNode *Node : Scopes) {
790 MachineBasicBlock *MBB = Node->getBlock();
791 EnterScope(MBB);
792 Changed |= ProcessBlockCSE(MBB);
793 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
794 ExitScopeIfDone(Node, OpenChildren);
795 }
796
797 return Changed;
798}
799
800// We use stronger checks for PRE candidate rather than for CSE ones to embrace
801// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
802// to exclude instrs created by PRE that won't be CSEed later.
803bool MachineCSEImpl::isPRECandidate(MachineInstr *MI,
804 SmallSet<MCRegister, 8> &PhysRefs) {
805 if (!isCSECandidate(MI) ||
806 MI->isNotDuplicable() ||
807 MI->mayLoad() ||
809 MI->getNumDefs() != 1 ||
810 MI->getNumExplicitDefs() != 1)
811 return false;
812
813 for (const MachineOperand &MO : MI->operands()) {
814 if (MO.isReg() && !MO.getReg().isVirtual()) {
815 if (MO.isDef())
816 return false;
817 else
818 PhysRefs.insert(MO.getReg());
819 }
820 }
821
822 return true;
823}
824
825bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT,
827 bool Changed = false;
830 if (!isPRECandidate(&MI, PhysRefs))
831 continue;
832
833 auto [It, Inserted] = PREMap.try_emplace(&MI, MBB);
834 if (Inserted)
835 continue;
836
837 auto *MBB1 = It->second;
838 assert(
839 !DT->properlyDominates(MBB, MBB1) &&
840 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
841 auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
842 if (!CMBB->isLegalToHoistInto())
843 continue;
844
845 if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
846 continue;
847
848 // Two instrs are partial redundant if their basic blocks are reachable
849 // from one to another but one doesn't dominate another.
850 if (CMBB != MBB1) {
851 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
852 if (BB != nullptr && BB1 != nullptr &&
853 (isPotentiallyReachable(BB1, BB) ||
854 isPotentiallyReachable(BB, BB1))) {
855 // The following check extends the definition of `isConvergent` to
856 // assume a convergent instruction is dependent not only on additional
857 // conditions, but also on fewer conditions. LLVM does not have a
858 // MachineInstr attribute which expresses this extended definition, so
859 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
860 // subset of `isConvergent` instructions which do fall into this
861 // extended definition.
862 if (MI.isConvergent() && CMBB != MBB)
863 continue;
864
865 // If this instruction uses physical registers then we can only do PRE
866 // if it's using the value that is live at the place we're hoisting to.
867 bool NonLocal;
868 PhysDefVector PhysDefs;
869 if (!PhysRefs.empty() &&
870 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
871 PhysDefs, NonLocal))
872 continue;
873
874 assert(MI.getOperand(0).isDef() &&
875 "First operand of instr with one explicit def must be this def");
876 Register VReg = MI.getOperand(0).getReg();
877 Register NewReg = MRI->cloneVirtualRegister(VReg);
878 if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
879 continue;
880 MachineInstr &NewMI =
881 TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
882
883 // When hoisting, make sure we don't carry the debug location of
884 // the original instruction, as that's not correct and can cause
885 // unexpected jumps when debugging optimized code.
886 auto EmptyDL = DebugLoc();
887 NewMI.setDebugLoc(EmptyDL);
888
889 NewMI.getOperand(0).setReg(NewReg);
890
891 PREMap[&MI] = CMBB;
892 ++NumPREs;
893 Changed = true;
894 }
895 }
896 }
897 return Changed;
898}
899
900// This simple PRE (partial redundancy elimination) pass doesn't actually
901// eliminate partial redundancy but transforms it to full redundancy,
902// anticipating that the next CSE step will eliminate this created redundancy.
903// If CSE doesn't eliminate this, than created instruction will remain dead
904// and eliminated later by Remove Dead Machine Instructions pass.
905bool MachineCSEImpl::PerformSimplePRE(MachineDominatorTree *DT) {
907
908 PREMap.clear();
909 bool Changed = false;
910 BBs.push_back(DT->getRootNode());
911 do {
912 auto Node = BBs.pop_back_val();
913 append_range(BBs, Node->children());
914
915 MachineBasicBlock *MBB = Node->getBlock();
916 Changed |= ProcessBlockPRE(DT, MBB);
917
918 } while (!BBs.empty());
919
920 return Changed;
921}
922
923bool MachineCSEImpl::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
925 MachineBasicBlock *MBB1) {
926 if (CandidateBB->getParent()->getFunction().hasMinSize())
927 return true;
928 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
929 assert(DT->dominates(CandidateBB, MBB1) &&
930 "CandidateBB should dominate MBB1");
931 return MBFI->getBlockFreq(CandidateBB) <=
932 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
933}
934
935void MachineCSEImpl::releaseMemory() {
936 ScopeMap.clear();
937 PREMap.clear();
938 Exps.clear();
939}
940
941bool MachineCSEImpl::run(MachineFunction &MF) {
944 MRI = &MF.getRegInfo();
945 LookAheadLimit = TII->getMachineCSELookAheadLimit();
946 bool ChangedPRE, ChangedCSE;
947 ChangedPRE = PerformSimplePRE(DT);
948 ChangedCSE = PerformCSE(DT->getRootNode());
949 releaseMemory();
950 return ChangedPRE || ChangedCSE;
951}
952
955 MFPropsModifier _(*this, MF);
956
960 MachineCSEImpl Impl(&MDT, &MBFI);
961 bool Changed = Impl.run(MF);
962 if (!Changed)
963 return PreservedAnalyses::all();
964
966 PA.preserve<MachineLoopAnalysis>();
967 PA.preserve<MachineDominatorTreeAnalysis>();
968 PA.preserve<MachineBlockFrequencyAnalysis>();
969 PA.preserveSet<CFGAnalyses>();
970 return PA;
971}
972
973bool MachineCSELegacy::runOnMachineFunction(MachineFunction &MF) {
974 if (skipFunction(MF.getFunction()))
975 return false;
976
978 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
980 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
981 MachineCSEImpl Impl(&MDT, &MBFI);
982 return Impl.run(MF);
983}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
This file defines the BumpPtrAllocator interface.
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:390
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
Definition: MachineCSE.cpp:264
Machine Common Subexpression Elimination
Definition: MachineCSE.cpp:170
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
static cl::opt< bool > AggressiveMachineCSE("aggressive-machine-cse", cl::Hidden, cl::init(false), cl::desc("Override the profitability heuristics for Machine CSE"))
#define DEBUG_TYPE
Definition: MachineCSE.cpp:51
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
A debug info location.
Definition: DebugLoc.h:33
Base class for the actual dominator tree node.
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:344
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
bool isAsCheapAsAMove(const MachineInstr &MI) const override
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
An RAII based helper class to modify MachineFunctionProperties when running pass.
unsigned pred_size() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition: MachineCSE.cpp:953
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:71
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:349
void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
bool empty() const
Definition: SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
void initializeMachineCSELegacyPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:382
char & MachineCSELegacyID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:164
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:281
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...