Skip to content

Commit bb168c9

Browse files
damyanpCopilot
andauthored
Fix perf regression: limit debug info to relevant subprograms (#8174) (#8177)
PR #7799 added debug info for global variables across all inlined subprograms, creating O(subprograms x globals) debug instructions. This caused 2-3x slower compilation for shaders with many globals and deeply-inlined functions. Fix: Instead of collecting ALL subprograms for a function (walking every instruction), only collect subprograms from the alloca's actual users -- the instructions that reference the specific global variable. This preserves the debug info feature (globals visible in inlined scopes that use them) while eliminating work for unrelated scopes. The 'Lower static global into Alloca' pass is 3.8x faster on a stress test (41ms -> 11ms), and overall debug compilation is ~10% faster. Fixes #8174 Co-authored-by: Copilot <[email protected]>
1 parent cdbd4e6 commit bb168c9

1 file changed

Lines changed: 41 additions & 29 deletions

File tree

lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6505,47 +6505,49 @@ ModulePass *llvm::createSROA_Parameter_HLSL() {
65056505
namespace {
65066506

65076507
struct GVDebugInfoPatchCache {
6508-
DenseMap<Function *, SetVector<DISubprogram *>> SubprogramsForFunction;
6509-
DenseSet<DILocation *> Seen;
6508+
DenseMap<DILocation *, DISubprogram *> LocToSubprogram;
6509+
DenseMap<Function *, DISubprogram *> FuncToSubprogram;
65106510
DITypeIdentifierMap EmptyMap;
65116511

6512+
DISubprogram *GetSubprogramForLoc(DILocation *Loc) {
6513+
auto It = LocToSubprogram.find(Loc);
6514+
if (It != LocToSubprogram.end())
6515+
return It->second;
6516+
DISubprogram *Result = nullptr;
6517+
auto *Scope = dyn_cast<DIScope>(Loc->getScope());
6518+
while (Scope) {
6519+
if (auto SubP = dyn_cast<DISubprogram>(Scope)) {
6520+
Result = SubP;
6521+
break;
6522+
}
6523+
Scope = Scope->getScope().resolve(EmptyMap);
6524+
}
6525+
LocToSubprogram[Loc] = Result;
6526+
return Result;
6527+
}
6528+
6529+
// Collect DISubprograms from a DILocation's inlined-at chain.
65126530
void CollectSubprograms(DILocation *Loc, SetVector<DISubprogram *> &Set) {
65136531
while (Loc) {
6514-
// This is potentially very expensive. Avoid repeatedly looking for
6515-
// DISubprogram's
6516-
if (Seen.count(Loc))
6517-
return;
6518-
Seen.insert(Loc);
6519-
auto *Scope = dyn_cast<DIScope>(Loc->getScope());
6520-
while (Scope) {
6521-
if (auto SubP = dyn_cast<DISubprogram>(Scope)) {
6522-
Set.insert(SubP);
6523-
break;
6524-
}
6525-
Scope = Scope->getScope().resolve(EmptyMap);
6526-
}
6532+
if (DISubprogram *SP = GetSubprogramForLoc(Loc))
6533+
Set.insert(SP);
65276534
Loc = Loc->getInlinedAt();
65286535
}
65296536
}
65306537

6531-
SetVector<DISubprogram *> &
6532-
GetSubprogramsForFunction(Function *F, DebugInfoFinder &DbgFinder) {
6533-
auto It = SubprogramsForFunction.find(F);
6534-
if (It != SubprogramsForFunction.end())
6538+
DISubprogram *GetFuncSubprogram(Function *F, DebugInfoFinder &DbgFinder) {
6539+
auto It = FuncToSubprogram.find(F);
6540+
if (It != FuncToSubprogram.end())
65356541
return It->second;
6536-
6537-
SetVector<DISubprogram *> &Ret = SubprogramsForFunction[F];
6542+
DISubprogram *Result = nullptr;
65386543
for (DISubprogram *SP : DbgFinder.subprograms()) {
65396544
if (SP->getFunction() == F) {
6540-
Ret.insert(SP);
6545+
Result = SP;
65416546
break;
65426547
}
65436548
}
6544-
6545-
for (BasicBlock &BB : *F)
6546-
for (Instruction &I : BB)
6547-
CollectSubprograms(I.getDebugLoc(), Ret);
6548-
return Ret;
6549+
FuncToSubprogram[F] = Result;
6550+
return Result;
65496551
}
65506552
};
65516553

@@ -6771,8 +6773,18 @@ static void PatchDebugInfo(GVDebugInfoPatchCache &Cache,
67716773

67726774
DIBuilder DIB(*GV->getParent());
67736775

6774-
SetVector<DISubprogram *> &Subprograms =
6775-
Cache.GetSubprogramsForFunction(F, DbgFinder);
6776+
// Only collect subprograms relevant to this GV to avoid creating
6777+
// O(subprograms × globals) debug instructions.
6778+
SetVector<DISubprogram *> Subprograms;
6779+
6780+
if (DISubprogram *SP = Cache.GetFuncSubprogram(F, DbgFinder))
6781+
Subprograms.insert(SP);
6782+
6783+
for (User *U : AI->users()) {
6784+
if (Instruction *I = dyn_cast<Instruction>(U))
6785+
Cache.CollectSubprograms(I->getDebugLoc(), Subprograms);
6786+
}
6787+
67766788
for (DISubprogram *Subprogram : Subprograms) {
67776789
DIScope *Scope = Subprogram;
67786790
DebugLoc Loc = DebugLoc::get(DGV->getLine(), 0, Scope);

0 commit comments

Comments
 (0)