LLVM 20.0.0git
GlobalMerge.cpp
Go to the documentation of this file.
1//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges globals with internal linkage into one. This way all the
10// globals which were merged into a biggest one can be addressed using offsets
11// from the same base pointer (no need for separate base pointer for each of the
12// global). Such a transformation can significantly reduce the register pressure
13// when many globals are involved.
14//
15// For example, consider the code which touches several global variables at
16// once:
17//
18// static int foo[N], bar[N], baz[N];
19//
20// for (i = 0; i < N; ++i) {
21// foo[i] = bar[i] * baz[i];
22// }
23//
24// On ARM the addresses of 3 arrays should be kept in the registers, thus
25// this code has quite large register pressure (loop body):
26//
27// ldr r1, [r5], #4
28// ldr r2, [r6], #4
29// mul r1, r2, r1
30// str r1, [r0], #4
31//
32// Pass converts the code to something like:
33//
34// static struct {
35// int foo[N];
36// int bar[N];
37// int baz[N];
38// } merged;
39//
40// for (i = 0; i < N; ++i) {
41// merged.foo[i] = merged.bar[i] * merged.baz[i];
42// }
43//
44// and in ARM code this becomes:
45//
46// ldr r0, [r5, #40]
47// ldr r1, [r5, #80]
48// mul r0, r1, r0
49// str r0, [r5], #4
50//
51// note that we saved 2 registers here almostly "for free".
52//
53// However, merging globals can have tradeoffs:
54// - it confuses debuggers, tools, and users
55// - it makes linker optimizations less useful (order files, LOHs, ...)
56// - it forces usage of indexed addressing (which isn't necessarily "free")
57// - it can increase register pressure when the uses are disparate enough.
58//
59// We use heuristics to discover the best global grouping we can (cf cl::opts).
60//
61// ===---------------------------------------------------------------------===//
62
64#include "llvm/ADT/BitVector.h"
65#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/MapVector.h"
67#include "llvm/ADT/SetVector.h"
69#include "llvm/ADT/Statistic.h"
70#include "llvm/ADT/StringRef.h"
71#include "llvm/ADT/Twine.h"
72#include "llvm/CodeGen/Passes.h"
73#include "llvm/IR/BasicBlock.h"
74#include "llvm/IR/Constants.h"
75#include "llvm/IR/DataLayout.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Module.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
88#include "llvm/MC/SectionKind.h"
89#include "llvm/Pass.h"
92#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstddef>
100#include <cstdint>
101#include <string>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "global-merge"
107
108// FIXME: This is only useful as a last-resort way to disable the pass.
109static cl::opt<bool>
110EnableGlobalMerge("enable-global-merge", cl::Hidden,
111 cl::desc("Enable the global merge pass"),
112 cl::init(true));
113
115GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
116 cl::desc("Set maximum offset for global merge pass"),
117 cl::init(0));
118
120 "global-merge-group-by-use", cl::Hidden,
121 cl::desc("Improve global merge pass to look at uses"), cl::init(true));
122
124 "global-merge-all-const", cl::Hidden,
125 cl::desc("Merge all const globals without looking at uses"),
126 cl::init(false));
127
129 "global-merge-ignore-single-use", cl::Hidden,
130 cl::desc("Improve global merge pass to ignore globals only used alone"),
131 cl::init(true));
132
133static cl::opt<bool>
134EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
135 cl::desc("Enable global merge pass on constants"),
136 cl::init(false));
137
138// FIXME: this could be a transitional option, and we probably need to remove
139// it if only we are sure this optimization could always benefit all targets.
141EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
142 cl::desc("Enable global merge pass on external linkage"));
143
145 GlobalMergeMinDataSize("global-merge-min-data-size",
146 cl::desc("The minimum size in bytes of each global "
147 "that should considered in merging."),
148 cl::init(0), cl::Hidden);
149
150STATISTIC(NumMerged, "Number of globals merged");
151
152namespace {
153
154class GlobalMergeImpl {
155 const TargetMachine *TM = nullptr;
157 bool IsMachO = false;
158
159private:
160 bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, Module &M,
161 bool isConst, unsigned AddrSpace) const;
162
163 /// Merge everything in \p Globals for which the corresponding bit
164 /// in \p GlobalSet is set.
165 bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
166 const BitVector &GlobalSet, Module &M, bool isConst,
167 unsigned AddrSpace) const;
168
169 /// Check if the given variable has been identified as must keep
170 /// \pre setMustKeepGlobalVariables must have been called on the Module that
171 /// contains GV
172 bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
173 return MustKeepGlobalVariables.count(GV);
174 }
175
176 /// Collect every variables marked as "used" or used in a landing pad
177 /// instruction for this Module.
178 void setMustKeepGlobalVariables(Module &M);
179
180 /// Collect every variables marked as "used"
182
183 /// Keep track of the GlobalVariable that must not be merged away
184 SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables;
185
186public:
187 GlobalMergeImpl(const TargetMachine *TM, GlobalMergeOptions Opt)
188 : TM(TM), Opt(Opt) {}
189 bool run(Module &M);
190};
191
192class GlobalMerge : public FunctionPass {
193 const TargetMachine *TM = nullptr;
195
196public:
197 static char ID; // Pass identification, replacement for typeid.
198
199 explicit GlobalMerge() : FunctionPass(ID) {
202 }
203
204 explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
205 bool OnlyOptimizeForSize, bool MergeExternalGlobals,
206 bool MergeConstantGlobals, bool MergeConstAggressive)
207 : FunctionPass(ID), TM(TM) {
208 Opt.MaxOffset = MaximalOffset;
209 Opt.SizeOnly = OnlyOptimizeForSize;
210 Opt.MergeExternal = MergeExternalGlobals;
211 Opt.MergeConstantGlobals = MergeConstantGlobals;
212 Opt.MergeConstAggressive = MergeConstAggressive;
214 }
215
216 bool doInitialization(Module &M) override {
217 auto GetSmallDataLimit = [](Module &M) -> std::optional<uint64_t> {
218 Metadata *SDL = M.getModuleFlag("SmallDataLimit");
219 if (!SDL)
220 return std::nullopt;
221 return mdconst::extract<ConstantInt>(SDL)->getZExtValue();
222 };
223 if (GlobalMergeMinDataSize.getNumOccurrences())
225 else if (auto SDL = GetSmallDataLimit(M); SDL && *SDL > 0)
226 Opt.MinSize = *SDL + 1;
227 else
228 Opt.MinSize = 0;
229
230 GlobalMergeImpl P(TM, Opt);
231 return P.run(M);
232 }
233 bool runOnFunction(Function &F) override { return false; }
234
235 StringRef getPassName() const override { return "Merge internal globals"; }
236
237 void getAnalysisUsage(AnalysisUsage &AU) const override {
238 AU.setPreservesCFG();
240 }
241};
242
243} // end anonymous namespace
244
246 GlobalMergeImpl P(TM, Options);
247 bool Changed = P.run(M);
248 if (!Changed)
249 return PreservedAnalyses::all();
250
253 return PA;
254}
255
256char GlobalMerge::ID = 0;
257
258INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
259
260bool GlobalMergeImpl::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
261 Module &M, bool isConst,
262 unsigned AddrSpace) const {
263 auto &DL = M.getDataLayout();
264 // FIXME: Find better heuristics
266 Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
267 // We don't support scalable global variables.
268 return DL.getTypeAllocSize(GV1->getValueType()).getFixedValue() <
269 DL.getTypeAllocSize(GV2->getValueType()).getFixedValue();
270 });
271
272 // If we want to just blindly group all globals together, do so.
273 if (!GlobalMergeGroupByUse || (Opt.MergeConstAggressive && isConst)) {
274 BitVector AllGlobals(Globals.size());
275 AllGlobals.set();
276 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
277 }
278
279 // If we want to be smarter, look at all uses of each global, to try to
280 // discover all sets of globals used together, and how many times each of
281 // these sets occurred.
282 //
283 // Keep this reasonably efficient, by having an append-only list of all sets
284 // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
285 // code (currently, a Function) to the set of globals seen so far that are
286 // used together in that unit (GlobalUsesByFunction).
287 //
288 // When we look at the Nth global, we know that any new set is either:
289 // - the singleton set {N}, containing this global only, or
290 // - the union of {N} and a previously-discovered set, containing some
291 // combination of the previous N-1 globals.
292 // Using that knowledge, when looking at the Nth global, we can keep:
293 // - a reference to the singleton set {N} (CurGVOnlySetIdx)
294 // - a list mapping each previous set to its union with {N} (EncounteredUGS),
295 // if it actually occurs.
296
297 // We keep track of the sets of globals used together "close enough".
298 struct UsedGlobalSet {
299 BitVector Globals;
300 unsigned UsageCount = 1;
301
302 UsedGlobalSet(size_t Size) : Globals(Size) {}
303 };
304
305 // Each set is unique in UsedGlobalSets.
306 std::vector<UsedGlobalSet> UsedGlobalSets;
307
308 // Avoid repeating the create-global-set pattern.
309 auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
310 UsedGlobalSets.emplace_back(Globals.size());
311 return UsedGlobalSets.back();
312 };
313
314 // The first set is the empty set.
315 CreateGlobalSet().UsageCount = 0;
316
317 // We define "close enough" to be "in the same function".
318 // FIXME: Grouping uses by function is way too aggressive, so we should have
319 // a better metric for distance between uses.
320 // The obvious alternative would be to group by BasicBlock, but that's in
321 // turn too conservative..
322 // Anything in between wouldn't be trivial to compute, so just stick with
323 // per-function grouping.
324
325 // The value type is an index into UsedGlobalSets.
326 // The default (0) conveniently points to the empty set.
327 DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
328
329 // Now, look at each merge-eligible global in turn.
330
331 // Keep track of the sets we already encountered to which we added the
332 // current global.
333 // Each element matches the same-index element in UsedGlobalSets.
334 // This lets us efficiently tell whether a set has already been expanded to
335 // include the current global.
336 std::vector<size_t> EncounteredUGS;
337
338 for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
339 GlobalVariable *GV = Globals[GI];
340
341 // Reset the encountered sets for this global and grow it in case we created
342 // new sets for the previous global.
343 EncounteredUGS.assign(UsedGlobalSets.size(), 0);
344
345 // We might need to create a set that only consists of the current global.
346 // Keep track of its index into UsedGlobalSets.
347 size_t CurGVOnlySetIdx = 0;
348
349 // For each global, look at all its Uses.
350 for (auto &U : GV->uses()) {
351 // This Use might be a ConstantExpr. We're interested in Instruction
352 // users, so look through ConstantExpr...
353 Use *UI, *UE;
354 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
355 if (CE->use_empty())
356 continue;
357 UI = &*CE->use_begin();
358 UE = nullptr;
359 } else if (isa<Instruction>(U.getUser())) {
360 UI = &U;
361 UE = UI->getNext();
362 } else {
363 continue;
364 }
365
366 // ...to iterate on all the instruction users of the global.
367 // Note that we iterate on Uses and not on Users to be able to getNext().
368 for (; UI != UE; UI = UI->getNext()) {
369 Instruction *I = dyn_cast<Instruction>(UI->getUser());
370 if (!I)
371 continue;
372
373 Function *ParentFn = I->getParent()->getParent();
374
375 // If we're only optimizing for size, ignore non-minsize functions.
376 if (Opt.SizeOnly && !ParentFn->hasMinSize())
377 continue;
378
379 size_t UGSIdx = GlobalUsesByFunction[ParentFn];
380
381 // If this is the first global the function uses, map it to the set
382 // consisting of this global only.
383 if (!UGSIdx) {
384 // If that set doesn't exist yet, create it.
385 if (!CurGVOnlySetIdx) {
386 CurGVOnlySetIdx = UsedGlobalSets.size();
387 CreateGlobalSet().Globals.set(GI);
388 } else {
389 ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
390 }
391
392 GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
393 continue;
394 }
395
396 // If we already encountered a use of this global in this function, just
397 // increment the counter.
398 if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
399 ++UsedGlobalSets[UGSIdx].UsageCount;
400 continue;
401 }
402
403 // If not, the previous set wasn't actually used in this function.
404 --UsedGlobalSets[UGSIdx].UsageCount;
405
406 // If we already expanded the previous set to include this global, just
407 // reuse that expanded set.
408 if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
409 ++UsedGlobalSets[ExpandedIdx].UsageCount;
410 GlobalUsesByFunction[ParentFn] = ExpandedIdx;
411 continue;
412 }
413
414 // If not, create a new set consisting of the union of the previous set
415 // and this global. Mark it as encountered, so we can reuse it later.
416 GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
417 UsedGlobalSets.size();
418
419 UsedGlobalSet &NewUGS = CreateGlobalSet();
420 NewUGS.Globals.set(GI);
421 NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
422 }
423 }
424 }
425
426 // We can choose to merge all globals together, but ignore globals never used
427 // with another global. This catches the obviously non-profitable cases of
428 // having a single global, but is aggressive enough for any other case.
430 BitVector AllGlobals(Globals.size());
431 for (const UsedGlobalSet &UGS : UsedGlobalSets) {
432 if (UGS.UsageCount == 0)
433 continue;
434 if (UGS.Globals.count() > 1)
435 AllGlobals |= UGS.Globals;
436 }
437 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
438 }
439
440 // Now we found a bunch of sets of globals used together. We accumulated
441 // the number of times we encountered the sets (i.e., the number of functions
442 // that use that exact set of globals). Multiply that by the size of the set
443 // to give us a crude profitability metric.
444 llvm::stable_sort(UsedGlobalSets,
445 [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
446 return UGS1.Globals.count() * UGS1.UsageCount >=
447 UGS2.Globals.count() * UGS2.UsageCount;
448 });
449
450 // Starting from the sets with the best (=biggest) profitability, find a
451 // good combination.
452 // The ideal (and expensive) solution can only be found by trying all
453 // combinations, looking for the one with the best profitability.
454 // Don't be smart about it, and just pick the first compatible combination,
455 // starting with the sets with the best profitability.
456 BitVector PickedGlobals(Globals.size());
457 bool Changed = false;
458
459 for (const UsedGlobalSet &UGS : UsedGlobalSets) {
460 if (UGS.UsageCount == 0)
461 continue;
462 if (PickedGlobals.anyCommon(UGS.Globals))
463 continue;
464 PickedGlobals |= UGS.Globals;
465 // If the set only contains one global, there's no point in merging.
466 // Ignore the global for inclusion in other sets though, so keep it in
467 // PickedGlobals.
468 if (UGS.Globals.count() < 2)
469 continue;
470 Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
471 }
472
473 return Changed;
474}
475
476bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
477 const BitVector &GlobalSet, Module &M,
478 bool isConst, unsigned AddrSpace) const {
479 assert(Globals.size() > 1);
480
481 Type *Int32Ty = Type::getInt32Ty(M.getContext());
482 Type *Int8Ty = Type::getInt8Ty(M.getContext());
483 auto &DL = M.getDataLayout();
484
485 LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
486 << GlobalSet.find_first() << ", total of " << Globals.size()
487 << "\n");
488
489 bool Changed = false;
490 ssize_t i = GlobalSet.find_first();
491 while (i != -1) {
492 ssize_t j = 0;
493 uint64_t MergedSize = 0;
494 std::vector<Type*> Tys;
495 std::vector<Constant*> Inits;
496 std::vector<unsigned> StructIdxs;
497
498 bool HasExternal = false;
499 StringRef FirstExternalName;
500 Align MaxAlign;
501 unsigned CurIdx = 0;
502 for (j = i; j != -1; j = GlobalSet.find_next(j)) {
503 Type *Ty = Globals[j]->getValueType();
504
505 // Make sure we use the same alignment AsmPrinter would use.
506 Align Alignment = DL.getPreferredAlign(Globals[j]);
507 unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
508 MergedSize += Padding;
509 MergedSize += DL.getTypeAllocSize(Ty);
510 if (MergedSize > Opt.MaxOffset) {
511 break;
512 }
513 if (Padding) {
514 Tys.push_back(ArrayType::get(Int8Ty, Padding));
515 Inits.push_back(ConstantAggregateZero::get(Tys.back()));
516 ++CurIdx;
517 }
518 Tys.push_back(Ty);
519 Inits.push_back(Globals[j]->getInitializer());
520 StructIdxs.push_back(CurIdx++);
521
522 MaxAlign = std::max(MaxAlign, Alignment);
523
524 if (Globals[j]->hasExternalLinkage() && !HasExternal) {
525 HasExternal = true;
526 FirstExternalName = Globals[j]->getName();
527 }
528 }
529
530 // Exit early if there is only one global to merge.
531 if (Tys.size() < 2) {
532 i = j;
533 continue;
534 }
535
536 // If merged variables doesn't have external linkage, we needn't to expose
537 // the symbol after merging.
541 // Use a packed struct so we can control alignment.
542 StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
543 Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
544
545 // On Darwin external linkage needs to be preserved, otherwise
546 // dsymutil cannot preserve the debug info for the merged
547 // variables. If they have external linkage, use the symbol name
548 // of the first variable merged as the suffix of global symbol
549 // name. This avoids a link-time naming conflict for the
550 // _MergedGlobals symbols.
551 Twine MergedName =
552 (IsMachO && HasExternal)
553 ? "_MergedGlobals_" + FirstExternalName
554 : "_MergedGlobals";
555 auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
556 auto *MergedGV = new GlobalVariable(
557 M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
559
560 MergedGV->setAlignment(MaxAlign);
561 MergedGV->setSection(Globals[i]->getSection());
562
563 LLVM_DEBUG(dbgs() << "MergedGV: " << *MergedGV << "\n");
564
565 const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
566 for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
567 GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
568 std::string Name(Globals[k]->getName());
569 GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
571 Globals[k]->getDLLStorageClass();
572
573 // Copy metadata while adjusting any debug info metadata by the original
574 // global's offset within the merged global.
575 MergedGV->copyMetadata(Globals[k],
576 MergedLayout->getElementOffset(StructIdxs[idx]));
577
578 Constant *Idx[2] = {
579 ConstantInt::get(Int32Ty, 0),
580 ConstantInt::get(Int32Ty, StructIdxs[idx]),
581 };
582 Constant *GEP =
583 ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
584 Globals[k]->replaceAllUsesWith(GEP);
585 Globals[k]->eraseFromParent();
586
587 // Emit an alias for the original variable name. This is necessary for an
588 // external symbol, as it may be accessed from another object. For
589 // internal symbols, it's not strictly required, but it's useful.
590 //
591 // This _should_ also work on Mach-O ever since '.alt_entry' support was
592 // added in 2016. Unfortunately, there's a bug in ld-prime (present at
593 // least from Xcode 15.0 through Xcode 16.0), in which -dead_strip doesn't
594 // always honor alt_entry. To workaround this issue, we don't emit aliases
595 // on Mach-O. Except, we _must_ do so for external symbols. That means
596 // MergeExternal is broken with that linker. (That option is currently off
597 // by default on MachO).
598 if (!IsMachO || Linkage == GlobalValue::ExternalLinkage) {
599 GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
600 Linkage, Name, GEP, &M);
601 GA->setVisibility(Visibility);
602 GA->setDLLStorageClass(DLLStorage);
603 }
604
605 NumMerged++;
606 }
607 Changed = true;
608 i = j;
609 }
610
611 return Changed;
612}
613
614void GlobalMergeImpl::collectUsedGlobalVariables(Module &M, StringRef Name) {
615 // Extract global variables from llvm.used array
616 const GlobalVariable *GV = M.getGlobalVariable(Name);
617 if (!GV || !GV->hasInitializer()) return;
618
619 // Should be an array of 'i8*'.
620 const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
621
622 for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
623 if (const GlobalVariable *G =
624 dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
625 MustKeepGlobalVariables.insert(G);
626}
627
628void GlobalMergeImpl::setMustKeepGlobalVariables(Module &M) {
629 collectUsedGlobalVariables(M, "llvm.used");
630 collectUsedGlobalVariables(M, "llvm.compiler.used");
631
632 for (Function &F : M) {
633 for (BasicBlock &BB : F) {
634 BasicBlock::iterator Pad = BB.getFirstNonPHIIt();
635 auto *II = dyn_cast<IntrinsicInst>(Pad);
636 if (!Pad->isEHPad() &&
637 !(II && II->getIntrinsicID() == Intrinsic::eh_typeid_for))
638 continue;
639
640 // Keep globals used by landingpads, catchpads,
641 // or intrinsics that require a plain global.
642 for (const Use &U : Pad->operands()) {
643 if (const GlobalVariable *GV =
644 dyn_cast<GlobalVariable>(U->stripPointerCasts()))
645 MustKeepGlobalVariables.insert(GV);
646 else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
647 for (const Use &Elt : CA->operands()) {
648 if (const GlobalVariable *GV =
649 dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
650 MustKeepGlobalVariables.insert(GV);
651 }
652 }
653 }
654 }
655 }
656}
657
658// This function returns true if the given data Section name has custom
659// subsection-splitting semantics in Mach-O (such as splitting by a fixed size)
660//
661// See also ObjFile::parseSections and getRecordSize in lld/MachO/InputFiles.cpp
662static bool isSpecialMachOSection(StringRef Section) {
663 // Uses starts_with, since section attributes can appear at the end of the
664 // name.
665 return Section.starts_with("__DATA,__cfstring") ||
666 Section.starts_with("__DATA,__objc_classrefs") ||
667 Section.starts_with("__DATA,__objc_selrefs");
668}
669
670bool GlobalMergeImpl::run(Module &M) {
672 return false;
673
674 IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
675
676 auto &DL = M.getDataLayout();
678 Globals, ConstGlobals, BSSGlobals;
679 bool Changed = false;
680 setMustKeepGlobalVariables(M);
681
682 LLVM_DEBUG({
683 dbgs() << "Number of GV that must be kept: " <<
684 MustKeepGlobalVariables.size() << "\n";
685 for (const GlobalVariable *KeptGV : MustKeepGlobalVariables)
686 dbgs() << "Kept: " << *KeptGV << "\n";
687 });
688 // Grab all non-const globals.
689 for (auto &GV : M.globals()) {
690 // Merge is safe for "normal" internal or external globals only
691 if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
692 continue;
693
694 // It's not safe to merge globals that may be preempted
695 if (TM && !TM->shouldAssumeDSOLocal(&GV))
696 continue;
697
698 if (!(Opt.MergeExternal && GV.hasExternalLinkage()) &&
699 !GV.hasLocalLinkage())
700 continue;
701
702 PointerType *PT = dyn_cast<PointerType>(GV.getType());
703 assert(PT && "Global variable is not a pointer!");
704
705 unsigned AddressSpace = PT->getAddressSpace();
707
708 // On Mach-O, some section names have special semantics. Don't merge these.
709 if (IsMachO && isSpecialMachOSection(Section))
710 continue;
711
712 // Ignore all 'special' globals.
713 if (GV.getName().starts_with("llvm.") || GV.getName().starts_with(".llvm."))
714 continue;
715
716 // Ignore all "required" globals:
717 if (isMustKeepGlobalVariable(&GV))
718 continue;
719
720 // Don't merge tagged globals, as each global should have its own unique
721 // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
722 // with compatible alignment and the same contents may be merged as long as
723 // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
724 // size_b / 16`).
725 if (GV.isTagged())
726 continue;
727
728 Type *Ty = GV.getValueType();
729 TypeSize AllocSize = DL.getTypeAllocSize(Ty);
730 if (AllocSize < Opt.MaxOffset && AllocSize >= Opt.MinSize) {
731 if (TM &&
733 BSSGlobals[{AddressSpace, Section}].push_back(&GV);
734 else if (GV.isConstant())
735 ConstGlobals[{AddressSpace, Section}].push_back(&GV);
736 else
737 Globals[{AddressSpace, Section}].push_back(&GV);
738 }
739 LLVM_DEBUG(dbgs() << "GV "
740 << ((DL.getTypeAllocSize(Ty) < Opt.MaxOffset)
741 ? "to merge: "
742 : "not to merge: ")
743 << GV << "\n");
744 }
745
746 for (auto &P : Globals)
747 if (P.second.size() > 1)
748 Changed |= doMerge(P.second, M, false, P.first.first);
749
750 for (auto &P : BSSGlobals)
751 if (P.second.size() > 1)
752 Changed |= doMerge(P.second, M, false, P.first.first);
753
754 if (Opt.MergeConstantGlobals)
755 for (auto &P : ConstGlobals)
756 if (P.second.size() > 1)
757 Changed |= doMerge(P.second, M, true, P.first.first);
758
759 return Changed;
760}
761
763 bool OnlyOptimizeForSize,
764 bool MergeExternalByDefault,
765 bool MergeConstantByDefault,
766 bool MergeConstAggressiveByDefault) {
767 bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
768 MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
769 bool MergeConstant = EnableGlobalMergeOnConst || MergeConstantByDefault;
770 bool MergeConstAggressive = GlobalMergeAllConst.getNumOccurrences() > 0
772 : MergeConstAggressiveByDefault;
773 return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal,
774 MergeConstant, MergeConstAggressive);
775}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::string Name
uint64_t Size
#define DEBUG_TYPE
static cl::opt< bool > GlobalMergeIgnoreSingleUse("global-merge-ignore-single-use", cl::Hidden, cl::desc("Improve global merge pass to ignore globals only used alone"), cl::init(true))
static cl::opt< bool > GlobalMergeGroupByUse("global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true))
static bool isSpecialMachOSection(StringRef Section)
static cl::opt< bool > GlobalMergeAllConst("global-merge-all-const", cl::Hidden, cl::desc("Merge all const globals without looking at uses"), cl::init(false))
static cl::opt< bool > EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false))
static cl::opt< unsigned > GlobalMergeMinDataSize("global-merge-min-data-size", cl::desc("The minimum size in bytes of each global " "that should considered in merging."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), cl::init(0))
static cl::opt< bool > EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true))
static cl::opt< cl::boolOrDefault > EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"))
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This defines the Use class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:300
bool back() const
Return the last element in the vector.
Definition: BitVector.h:456
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:308
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1672
ConstantArray - Constant Array Declarations.
Definition: Constants.h:427
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1294
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1378
This is an important base class in LLVM.
Definition: Constant.h:42
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:557
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:117
bool hasExternalLinkage() const
Definition: GlobalValue.h:512
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:264
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:296
bool hasLocalLinkage() const
Definition: GlobalValue.h:529
bool isTagged() const
Definition: GlobalValue.h:366
void setDLLStorageClass(DLLStorageClassTypes C)
Definition: GlobalValue.h:285
DLLStorageClassTypes
Storage classes of global values for PE targets.
Definition: GlobalValue.h:73
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:295
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition: GlobalValue.h:66
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:255
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Type * getValueType() const
Definition: GlobalValue.h:297
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool hasImplicitSection() const
Check if section name is present.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:567
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:596
Class to represent struct types.
Definition: DerivedTypes.h:218
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM)
Classify the specified global variable into a set of target independent categories embodied in Sectio...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:760
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
int getNumOccurrences() const
Definition: CommandLine.h:399
@ SDL
Definition: COFF.h:833
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Expected< const typename ELFT::Shdr * > getSection(typename ELFT::ShdrRange Sections, uint32_t Index)
Definition: ELF.h:534
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
Pass * createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize=false, bool MergeExternalByDefault=false, bool MergeConstantByDefault=false, bool MergeConstAggressiveByDefault=false)
GlobalMerge - This pass merges internal (by default) globals into structs to enable reuse of a base p...
void initializeGlobalMergePass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:865
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool MergeConstAggressive
Whether we should merge constant global variables aggressively without looking at use.
Definition: GlobalMerge.h:35
bool SizeOnly
Whether we should try to optimize for size only.
Definition: GlobalMerge.h:40
bool MergeExternal
Whether we should merge global variables that have external linkage.
Definition: GlobalMerge.h:30
bool MergeConstantGlobals
Whether we should merge constant global variables.
Definition: GlobalMerge.h:32