Skip to content

Commit 484f1b0

Browse files
python3kgaeGreg Roth
authored andcommitted
Fix instruction order issue in scalarizer (#5001)
When scalarize shuffle inst which has extract element as the input element, create clone before the shuffle instead of after. Because later when replace the shuffle, new vector will be inserted before the shuffle. The elt define will be after its use if inserted before the shuffle earlier. Like %[[B:.+]] = load <2 x float>, <2 x float>* %b, align 4 %[[X:.+]] = insertelement <4 x float> undef, float %[[BX0]], i32 0 %[[Y:.+]] = insertelement <4 x float> %[[X]], float %[[BY0]], i32 1 %[[Z:.+]] = insertelement <4 x float> %[[Y]], float %[[BX1]], i32 2 %[[W:.+]] = insertelement <4 x float> %[[Z]], float %[[BY1]], i32 3 %[[BX0:.+]] = extractelement <2 x float> %[[B]], i32 0 %[[BY0:.+]] = extractelement <2 x float> %[[B]], i32 1 %[[BX1:.+]] = extractelement <2 x float> %[[B]], i32 0 %[[BY1:.+]] = extractelement <2 x float> %[[B]], i32 1 (cherry picked from commit b3dedc9)
1 parent d9d83d0 commit 484f1b0

2 files changed

Lines changed: 34 additions & 1 deletion

File tree

lib/Transforms/Scalar/Scalarizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
633633
// instruction is processed, it will be replaced without updating our
634634
// Gather entry. This dead instruction will be accessed by finish(),
635635
// causing assert or crash.
636-
Res[I] = IRBuilder<>(SVI.getNextNode()).Insert(EA->clone());
636+
Res[I] = IRBuilder<>(&SVI).Insert(EA->clone());
637637
}
638638
// HLSL Change Ends
639639
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; RUN: opt -S -scalarizer -dce %s | FileCheck %s
2+
3+
; CHECK: %[[B:.+]] = load <2 x float>, <2 x float>* %b, align 4
4+
; CHECK: %[[BX0:.+]] = extractelement <2 x float> %[[B]], i32 0
5+
; CHECK: %[[BY0:.+]] = extractelement <2 x float> %[[B]], i32 1
6+
; CHECK: %[[BX1:.+]] = extractelement <2 x float> %[[B]], i32 0
7+
; CHECK: %[[BY1:.+]] = extractelement <2 x float> %[[B]], i32 1
8+
9+
; CHECK: %[[X:.+]] = insertelement <4 x float> undef, float %[[BX0]], i32 0
10+
; CHECK: %[[Y:.+]] = insertelement <4 x float> %[[X]], float %[[BY0]], i32 1
11+
; CHECK: %[[Z:.+]] = insertelement <4 x float> %[[Y]], float %[[BX1]], i32 2
12+
; CHECK: %[[W:.+]] = insertelement <4 x float> %[[Z]], float %[[BY1]], i32 3
13+
; CHECK: ret <4 x float> %[[W]]
14+
15+
declare void @foo(<2 x float>, <2 x float>* dereferenceable(8))
16+
17+
; Function Attrs: noinline nounwind
18+
define internal <4 x float> @bar(<3 x float> %v) #0 {
19+
entry:
20+
%0 = alloca <2 x float>
21+
%b = alloca <2 x float>, align 4
22+
store <2 x float> zeroinitializer, <2 x float>* %b, align 4
23+
%1 = insertelement <3 x float> %v, float 1.000000e+00, i32 0
24+
%2 = shufflevector <3 x float> %1, <3 x float> undef, <2 x i32> <i32 0, i32 1>
25+
store <2 x float> %2, <2 x float>* %0
26+
;call void @foo(<2 x float>* dereferenceable(8) %0, <2 x float>* dereferenceable(8) %b)
27+
%3 = load <2 x float>, <2 x float>* %b, align 4
28+
%4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
29+
ret <4 x float> %4
30+
}
31+
32+
attributes #0 = { noinline nounwind }
33+

0 commit comments

Comments
 (0)