DirectXShaderCompiler/test/Transforms/GVN/min-precision-padding.ll at 77ab398184dcade1af2f442832ecdb5d0f82730b · alsepkow/DirectXShaderCompiler · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
; RUN: opt < %s -basicaa -gvn -S | FileCheck %s

; Regression test for min precision vector GVN miscompilation.
; DXC's data layout pads i16 to 32 bits (i16:32). GVN must not:
;   1. Coerce padded vector types via bitcast (CanCoerceMustAliasedValueToLoad)
;   2. Forward a zeroinitializer store past partial element stores (processLoad)
;
; Without the fix, GVN would forward the zeroinitializer vector load, producing
; incorrect all-zero results for elements that were individually written.

target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-ms-dx"

; Test 1: GVN must not forward zeroinitializer past element store for <3 x i16>.
; The store of zeroinitializer to %dst is followed by an element store to
; %dst[0], then a vector load of %dst. GVN must not replace the vector load
; with the zeroinitializer.

; CHECK-LABEL: @test_no_forward_i16_vec3
; CHECK: store <3 x i16> zeroinitializer
; CHECK: store i16 %val
; The vector load must survive — GVN must not replace it with zeroinitializer.
; CHECK: %result = load <3 x i16>
; CHECK: ret <3 x i16> %result
define <3 x i16> @test_no_forward_i16_vec3(i16 %val) {
entry:
  %dst = alloca <3 x i16>, align 4
  store <3 x i16> zeroinitializer, <3 x i16>* %dst, align 4
  %elem0 = getelementptr inbounds <3 x i16>, <3 x i16>* %dst, i32 0, i32 0
  store i16 %val, i16* %elem0, align 4
  %result = load <3 x i16>, <3 x i16>* %dst, align 4
  ret <3 x i16> %result
}

; Test 2: Same pattern with <3 x half> (f16:32 padding).

; CHECK-LABEL: @test_no_forward_f16_vec3
; CHECK: store <3 x half> zeroinitializer
; CHECK: store half %val
; CHECK: %result = load <3 x half>
; CHECK: ret <3 x half> %result
define <3 x half> @test_no_forward_f16_vec3(half %val) {
entry:
  %dst = alloca <3 x half>, align 4
  store <3 x half> zeroinitializer, <3 x half>* %dst, align 4
  %elem0 = getelementptr inbounds <3 x half>, <3 x half>* %dst, i32 0, i32 0
  store half %val, half* %elem0, align 4
  %result = load <3 x half>, <3 x half>* %dst, align 4
  ret <3 x half> %result
}

; Test 3: Multiple element stores — all must survive.
; Stores to elements 0, 1, 2 of a <3 x i16> vector after zeroinitializer.

; CHECK-LABEL: @test_no_forward_i16_vec3_all_elems
; CHECK: store <3 x i16> zeroinitializer
; CHECK: store i16 %v0
; CHECK: store i16 %v1
; CHECK: store i16 %v2
; CHECK: %result = load <3 x i16>
; CHECK: ret <3 x i16> %result
define <3 x i16> @test_no_forward_i16_vec3_all_elems(i16 %v0, i16 %v1, i16 %v2) {
entry:
  %dst = alloca <3 x i16>, align 4
  store <3 x i16> zeroinitializer, <3 x i16>* %dst, align 4
  %e0 = getelementptr inbounds <3 x i16>, <3 x i16>* %dst, i32 0, i32 0
  store i16 %v0, i16* %e0, align 4
  %e1 = getelementptr inbounds <3 x i16>, <3 x i16>* %dst, i32 0, i32 1
  store i16 %v1, i16* %e1, align 4
  %e2 = getelementptr inbounds <3 x i16>, <3 x i16>* %dst, i32 0, i32 2
  store i16 %v2, i16* %e2, align 4
  %result = load <3 x i16>, <3 x i16>* %dst, align 4
  ret <3 x i16> %result
}

; Test 4: Coercion rejection — store a <3 x i16> vector, load as different type.
; GVN must not attempt bitcast coercion on padded types.
; If coercion happened, the load would be eliminated and replaced with a bitcast.

; CHECK-LABEL: @test_no_coerce_i16_vec3
; CHECK: store <3 x i16>
; CHECK: load i96
; CHECK-NOT: bitcast
; CHECK: ret
define i96 @test_no_coerce_i16_vec3(<3 x i16> %v) {
entry:
  %ptr = alloca <3 x i16>, align 4
  store <3 x i16> %v, <3 x i16>* %ptr, align 4
  %iptr = bitcast <3 x i16>* %ptr to i96*
  %result = load i96, i96* %iptr, align 4
  ret i96 %result
}

; Test 5: Long vector variant — <5 x i16> (exceeds 4-element native size).

; CHECK-LABEL: @test_no_forward_i16_vec5
; CHECK: store <5 x i16> zeroinitializer
; CHECK: store i16 %val
; CHECK: %result = load <5 x i16>
; CHECK: ret <5 x i16> %result
define <5 x i16> @test_no_forward_i16_vec5(i16 %val) {
entry:
  %dst = alloca <5 x i16>, align 4
  store <5 x i16> zeroinitializer, <5 x i16>* %dst, align 4
  %elem0 = getelementptr inbounds <5 x i16>, <5 x i16>* %dst, i32 0, i32 0
  store i16 %val, i16* %elem0, align 4
  %result = load <5 x i16>, <5 x i16>* %dst, align 4
  ret <5 x i16> %result
}

; Test 6: Long vector variant — <8 x half>.

; CHECK-LABEL: @test_no_forward_f16_vec8
; CHECK: store <8 x half> zeroinitializer
; CHECK: store half %val
; CHECK: %result = load <8 x half>
; CHECK: ret <8 x half> %result
define <8 x half> @test_no_forward_f16_vec8(half %val) {
entry:
  %dst = alloca <8 x half>, align 4
  store <8 x half> zeroinitializer, <8 x half>* %dst, align 4
  %elem0 = getelementptr inbounds <8 x half>, <8 x half>* %dst, i32 0, i32 0
  store half %val, half* %elem0, align 4
  %result = load <8 x half>, <8 x half>* %dst, align 4
  ret <8 x half> %result
}

; Test 7: Same-type store-to-load forwarding must still work for padded types.
; GVN should forward %v directly — no intervening writes, same type.

; CHECK-LABEL: @test_same_type_forward_i16_vec3
; The load should be eliminated and %v returned directly.
; CHECK-NOT: load
; CHECK: ret <3 x i16> %v
define <3 x i16> @test_same_type_forward_i16_vec3(<3 x i16> %v) {
entry:
  %ptr = alloca <3 x i16>, align 4
  store <3 x i16> %v, <3 x i16>* %ptr, align 4
  %result = load <3 x i16>, <3 x i16>* %ptr, align 4
  ret <3 x i16> %result
}

; Test 8: Same-type forwarding for <3 x half>.

; CHECK-LABEL: @test_same_type_forward_f16_vec3
; CHECK-NOT: load
; CHECK: ret <3 x half> %v
define <3 x half> @test_same_type_forward_f16_vec3(<3 x half> %v) {
entry:
  %ptr = alloca <3 x half>, align 4
  store <3 x half> %v, <3 x half>* %ptr, align 4
  %result = load <3 x half>, <3 x half>* %ptr, align 4
  ret <3 x half> %result
}