Skip to content

Commit 6f1d6d9

Browse files
authored
Replaced some code in dxil-cond-mem2reg with a dxilutils helper. Fixed a crash. (#4474)
* Replaced the code to delete unread allocas in dxil-cond-mem2reg with calling the helper in dxilutils. * Fixed a crash in the helper caused by walking the entry in forward order, which could invalidate next iterator. * Added handling of memcpy's in the helper. * Added tests that actually exercise the memcpy path.
1 parent fafb0ae commit 6f1d6d9

5 files changed

Lines changed: 378 additions & 63 deletions

File tree

lib/DXIL/DxilUtil.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,12 @@ struct AllocaDeleter {
10221022
for (User *U : V->users())
10231023
Add(U);
10241024
}
1025+
else if (MemCpyInst *MC = dyn_cast<MemCpyInst>(V)) {
1026+
// If the memcopy's source is anything we've encountered in the
1027+
// seen set, then the alloca is being read.
1028+
if (Seen.count(MC->getSource()))
1029+
return false;
1030+
}
10251031
// If it's anything else, we'll assume it's reading the
10261032
// alloca. Give up.
10271033
else {
@@ -1061,8 +1067,9 @@ bool DeleteDeadAllocas(llvm::Function &F) {
10611067

10621068
while (1) {
10631069
bool LocalChanged = false;
1064-
for (auto it = Entry.begin(), end = Entry.end(); it != end;) {
1065-
AllocaInst *AI = dyn_cast<AllocaInst>(&*(it++));
1070+
for (Instruction *it = &Entry.back(); it;) {
1071+
AllocaInst *AI = dyn_cast<AllocaInst>(it);
1072+
it = it->getPrevNode();
10661073
if (!AI)
10671074
continue;
10681075
LocalChanged |= Deleter.TryDeleteUnusedAlloca(AI);

lib/Transforms/Scalar/DxilConditionalMem2Reg.cpp

Lines changed: 1 addition & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -110,66 +110,6 @@ class DxilConditionalMem2Reg : public FunctionPass {
110110
AU.setPreservesCFG();
111111
}
112112

113-
// Collect and remove all instructions that use AI, but
114-
// give up if there are anything other than store, bitcast,
115-
// memcpy, or GEP.
116-
static bool TryRemoveUnusedAlloca(AllocaInst *AI) {
117-
std::vector<Instruction *> WorkList;
118-
119-
WorkList.push_back(AI);
120-
121-
for (unsigned i = 0; i < WorkList.size(); i++) {
122-
Instruction *I = WorkList[i];
123-
124-
for (User *U : I->users()) {
125-
Instruction *UI = cast<Instruction>(U);
126-
127-
unsigned Opcode = UI->getOpcode();
128-
if (Opcode == Instruction::BitCast ||
129-
Opcode == Instruction::GetElementPtr ||
130-
Opcode == Instruction::Store)
131-
{
132-
WorkList.push_back(UI);
133-
}
134-
else if (MemCpyInst *MC = dyn_cast<MemCpyInst>(UI)) {
135-
if (MC->getSource() == I) { // MC reads from our alloca
136-
return false;
137-
}
138-
WorkList.push_back(UI);
139-
}
140-
else { // Load? PHINode? Assume read.
141-
return false;
142-
}
143-
}
144-
}
145-
146-
// Remove all instructions
147-
for (auto It = WorkList.rbegin(), E = WorkList.rend(); It != E; It++) {
148-
Instruction *I = *It;
149-
I->eraseFromParent();
150-
}
151-
152-
return true;
153-
}
154-
155-
static bool RemoveAllUnusedAllocas(Function &F) {
156-
std::vector<AllocaInst *> Allocas;
157-
BasicBlock &EntryBB = *F.begin();
158-
for (auto It = EntryBB.begin(), E = EntryBB.end(); It != E;) {
159-
Instruction &I = *(It++);
160-
if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
161-
Allocas.push_back(AI);
162-
}
163-
}
164-
165-
bool Changed = false;
166-
for (AllocaInst *AI : Allocas) {
167-
Changed |= TryRemoveUnusedAlloca(AI);
168-
}
169-
170-
return Changed;
171-
}
172-
173113
//
174114
// Turns all allocas of vector types that are marked with 'dx.precise'
175115
// and turn them into scalars. For example:
@@ -415,7 +355,7 @@ class DxilConditionalMem2Reg : public FunctionPass {
415355
bool Changed = false;
416356

417357
Changed |= RewriteOutputArgsDebugInfo(F);
418-
Changed |= RemoveAllUnusedAllocas(F);
358+
Changed |= dxilutil::DeleteDeadAllocas(F);
419359
Changed |= ScalarizePreciseVectorAlloca(F);
420360
Changed |= Mem2Reg(F, *DT, *AC);
421361

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
; RUN: %opt %s -dxil-cond-mem2reg -S | FileCheck %s
2+
3+
; Regression test for a crash in dxilutil::DeleteDeadAllocas
4+
5+
; CHECK: @main
6+
; CHECK-NOT: = alloca
7+
8+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
9+
target triple = "dxil-ms-dx"
10+
11+
%cb = type { [5 x float] }
12+
%dx.types.Handle = type { i8* }
13+
%dx.types.ResourceProperties = type { i32, i32 }
14+
15+
@cb = external constant %cb
16+
17+
; Function Attrs: nounwind readnone
18+
declare %cb* @"dx.hl.subscript.cb.%cb* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #0
19+
20+
; Function Attrs: nounwind readnone
21+
declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %cb*, i32)"(i32, %cb*, i32) #0
22+
23+
; Function Attrs: nounwind readnone
24+
declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %cb)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %cb) #0
25+
26+
; Function Attrs: convergent
27+
declare void @dx.noop() #1
28+
29+
; Function Attrs: nounwind
30+
define void @main(float* noalias) #2 {
31+
entry:
32+
%retval = alloca float, align 4, !dx.temp !3
33+
%alloca = alloca [5 x float], align 4
34+
35+
; Test is here. There was a crash when a user is immediately after the alloca
36+
; when calling dxilutil::DeleteDeadAllocas, because we deleted instructions
37+
; in the forward order.
38+
%my_index = getelementptr inbounds [5 x float], [5 x float]* %alloca, i32 0, i32 0
39+
store float %7, float* %my_index, align 4
40+
41+
%i = alloca i32, align 4
42+
%1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %cb*, i32)"(i32 0, %cb* @cb, i32 0)
43+
%2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %cb)"(i32 11, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 13, i32 68 }, %cb undef)
44+
%3 = call %cb* @"dx.hl.subscript.cb.%cb* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %2, i32 0)
45+
%4 = getelementptr inbounds %cb, %cb* %3, i32 0, i32 0, i32 0
46+
call void @dx.noop(), !dbg !4
47+
store i32 0, i32* %i, align 4, !dbg !4
48+
%5 = load i32, i32* %i, align 4, !dbg !8
49+
%cmp.1 = icmp slt i32 %5, 5, !dbg !9
50+
br i1 %cmp.1, label %for.body.lr.ph, label %for.end, !dbg !10
51+
52+
for.body.lr.ph: ; preds = %entry
53+
br label %for.body, !dbg !10
54+
55+
for.body: ; preds = %for.body.lr.ph, %for.inc
56+
%6 = load i32, i32* %i, align 4, !dbg !11
57+
%arrayidx3 = getelementptr inbounds %cb, %cb* %3, i32 0, i32 0, i32 %6, !dbg !12
58+
%7 = load float, float* %arrayidx3, align 4, !dbg !12
59+
%8 = load i32, i32* %i, align 4, !dbg !13
60+
%arrayidx2 = getelementptr inbounds [5 x float], [5 x float]* %alloca, i32 0, i32 %8, !dbg !14
61+
call void @dx.noop(), !dbg !15
62+
store float %7, float* %arrayidx2, align 4, !dbg !15
63+
br label %for.inc, !dbg !16
64+
65+
for.inc: ; preds = %for.body
66+
%9 = load i32, i32* %i, align 4, !dbg !17
67+
%inc = add nsw i32 %9, 1, !dbg !17
68+
call void @dx.noop(), !dbg !17
69+
store i32 %inc, i32* %i, align 4, !dbg !17
70+
%10 = load i32, i32* %i, align 4, !dbg !8
71+
%cmp = icmp slt i32 %10, 5, !dbg !9
72+
%tobool = icmp ne i1 %cmp, false, !dbg !9
73+
%tobool1 = icmp ne i1 %tobool, false, !dbg !10
74+
br i1 %tobool1, label %for.body, label %for.cond.for.end_crit_edge, !dbg !10, !llvm.loop !18
75+
76+
for.cond.for.end_crit_edge: ; preds = %for.inc
77+
br label %for.end, !dbg !10
78+
79+
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
80+
%11 = load float, float* %4, align 4, !dbg !20
81+
store float %11, float* %retval, !dbg !21
82+
%12 = load float, float* %retval, !dbg !21
83+
call void @dx.noop(), !dbg !21
84+
store float %12, float* %0, !dbg !21
85+
ret void, !dbg !21
86+
}
87+
88+
attributes #0 = { nounwind readnone }
89+
attributes #1 = { convergent }
90+
attributes #2 = { nounwind }
91+
92+
!llvm.module.flags = !{!0}
93+
!pauseresume = !{!1}
94+
!llvm.ident = !{!2}
95+
96+
!0 = !{i32 2, !"Debug Info Version", i32 3}
97+
!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
98+
!2 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
99+
!3 = !{}
100+
!4 = !DILocation(line: 9, column: 12, scope: !5)
101+
!5 = !DISubprogram(name: "main", scope: !6, file: !6, line: 6, type: !7, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: false, function: void (float*)* @main)
102+
!6 = !DIFile(filename: "F:\5Ctest\5Cod_fails_may_2022\5Csimple.hlsl", directory: "")
103+
!7 = !DISubroutineType(types: !3)
104+
!8 = !DILocation(line: 9, column: 19, scope: !5)
105+
!9 = !DILocation(line: 9, column: 21, scope: !5)
106+
!10 = !DILocation(line: 9, column: 3, scope: !5)
107+
!11 = !DILocation(line: 10, column: 21, scope: !5)
108+
!12 = !DILocation(line: 10, column: 17, scope: !5)
109+
!13 = !DILocation(line: 10, column: 12, scope: !5)
110+
!14 = !DILocation(line: 10, column: 5, scope: !5)
111+
!15 = !DILocation(line: 10, column: 15, scope: !5)
112+
!16 = !DILocation(line: 11, column: 3, scope: !5)
113+
!17 = !DILocation(line: 9, column: 27, scope: !5)
114+
!18 = distinct !{!18, !19}
115+
!19 = !{!"llvm.loop.unroll.disable"}
116+
!20 = !DILocation(line: 13, column: 10, scope: !5)
117+
!21 = !DILocation(line: 13, column: 3, scope: !5)
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
; RUN: %opt %s -dxil-cond-mem2reg -S | FileCheck %s
2+
3+
; Make sure that dxilutil::DeleteDeadAllocas handles memcpy's
4+
5+
; CHECK: @main
6+
; CHECK-NOT: = alloca %struct.Data
7+
8+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
9+
target triple = "dxil-ms-dx"
10+
11+
%struct.Data = type { [5 x float] }
12+
%ConstantBuffer = type opaque
13+
%cb = type { %struct.Data, %struct.Data }
14+
%dx.types.Handle = type { i8* }
15+
%dx.types.ResourceProperties = type { i32, i32 }
16+
17+
@"\01?foo@cb@@3UData@@B" = external global %struct.Data, align 4
18+
@"\01?bar@cb@@3UData@@B" = external global %struct.Data, align 4
19+
@"$Globals" = external constant %ConstantBuffer
20+
@cb = external constant %cb
21+
22+
; Function Attrs: nounwind
23+
define float @main() #0 {
24+
entry:
25+
%0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %cb*, i32)"(i32 0, %cb* @cb, i32 0)
26+
%1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %cb)"(i32 11, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 148 }, %cb undef)
27+
%2 = call %cb* @"dx.hl.subscript.cb.%cb* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0)
28+
%3 = getelementptr inbounds %cb, %cb* %2, i32 0, i32 0
29+
%4 = getelementptr inbounds %cb, %cb* %2, i32 0, i32 1, i32 0, i32 0
30+
%retval = alloca float, align 4, !dx.temp !3
31+
%alloca = alloca %struct.Data, align 4
32+
%i = alloca i32, align 4
33+
%5 = bitcast %struct.Data* %alloca to i8*, !dbg !4
34+
%6 = bitcast %struct.Data* %3 to i8*, !dbg !4
35+
call void @dx.noop(), !dbg !4
36+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* %6, i64 20, i32 1, i1 false), !dbg !4
37+
call void @dx.noop(), !dbg !8
38+
store i32 0, i32* %i, align 4, !dbg !8
39+
br label %for.cond, !dbg !9
40+
41+
for.cond: ; preds = %for.inc, %entry
42+
%7 = load i32, i32* %i, align 4, !dbg !10
43+
%cmp = icmp slt i32 %7, 5, !dbg !11
44+
%tobool = icmp ne i1 %cmp, false, !dbg !11
45+
%tobool1 = icmp ne i1 %tobool, false, !dbg !12
46+
br i1 %tobool1, label %for.body, label %for.end, !dbg !12
47+
48+
for.body: ; preds = %for.cond
49+
%8 = load i32, i32* %i, align 4, !dbg !13
50+
%arrayidx3 = getelementptr inbounds %cb, %cb* %2, i32 0, i32 1, i32 0, i32 %8, !dbg !14
51+
%9 = load float, float* %arrayidx3, align 4, !dbg !14
52+
%10 = load i32, i32* %i, align 4, !dbg !15
53+
%member = getelementptr inbounds %struct.Data, %struct.Data* %alloca, i32 0, i32 0, !dbg !16
54+
%arrayidx2 = getelementptr inbounds [5 x float], [5 x float]* %member, i32 0, i32 %10, !dbg !17
55+
call void @dx.noop(), !dbg !18
56+
store float %9, float* %arrayidx2, align 4, !dbg !18
57+
br label %for.inc, !dbg !19
58+
59+
for.inc: ; preds = %for.body
60+
%11 = load i32, i32* %i, align 4, !dbg !20
61+
%inc = add nsw i32 %11, 1, !dbg !20
62+
call void @dx.noop(), !dbg !20
63+
store i32 %inc, i32* %i, align 4, !dbg !20
64+
br label %for.cond, !dbg !12, !llvm.loop !21
65+
66+
for.end: ; preds = %for.cond
67+
%12 = load float, float* %4, align 4, !dbg !23
68+
store float %12, float* %retval, !dbg !24
69+
%13 = load float, float* %retval, !dbg !24
70+
call void @dx.noop(), !dbg !24
71+
ret float %13, !dbg !24
72+
}
73+
74+
; Function Attrs: nounwind
75+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
76+
77+
; Function Attrs: nounwind readnone
78+
declare %cb* @"dx.hl.subscript.cb.%cb* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
79+
80+
; Function Attrs: nounwind readnone
81+
declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %cb*, i32)"(i32, %cb*, i32) #1
82+
83+
; Function Attrs: nounwind readnone
84+
declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %cb)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %cb) #1
85+
86+
; Function Attrs: convergent
87+
declare void @dx.noop() #2
88+
89+
attributes #0 = { nounwind }
90+
attributes #1 = { nounwind readnone }
91+
attributes #2 = { convergent }
92+
93+
!llvm.module.flags = !{!0}
94+
!pauseresume = !{!1}
95+
!llvm.ident = !{!2}
96+
97+
!0 = !{i32 2, !"Debug Info Version", i32 3}
98+
!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
99+
!2 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
100+
!3 = !{}
101+
!4 = !DILocation(line: 12, column: 17, scope: !5)
102+
!5 = !DISubprogram(name: "main", scope: !6, file: !6, line: 11, type: !7, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: float ()* @main)
103+
!6 = !DIFile(filename: "F:\5Ctest\5Cod_fails_may_2022\5Cmemcpy.hlsl", directory: "")
104+
!7 = !DISubroutineType(types: !3)
105+
!8 = !DILocation(line: 14, column: 12, scope: !5)
106+
!9 = !DILocation(line: 14, column: 8, scope: !5)
107+
!10 = !DILocation(line: 14, column: 19, scope: !5)
108+
!11 = !DILocation(line: 14, column: 21, scope: !5)
109+
!12 = !DILocation(line: 14, column: 3, scope: !5)
110+
!13 = !DILocation(line: 15, column: 35, scope: !5)
111+
!14 = !DILocation(line: 15, column: 24, scope: !5)
112+
!15 = !DILocation(line: 15, column: 19, scope: !5)
113+
!16 = !DILocation(line: 15, column: 12, scope: !5)
114+
!17 = !DILocation(line: 15, column: 5, scope: !5)
115+
!18 = !DILocation(line: 15, column: 22, scope: !5)
116+
!19 = !DILocation(line: 16, column: 3, scope: !5)
117+
!20 = !DILocation(line: 14, column: 27, scope: !5)
118+
!21 = distinct !{!21, !22}
119+
!22 = !{!"llvm.loop.unroll.disable"}
120+
!23 = !DILocation(line: 18, column: 10, scope: !5)
121+
!24 = !DILocation(line: 18, column: 3, scope: !5)

0 commit comments

Comments
 (0)