Skip to content

Commit 50155c1

Browse files
Unit test 1: Check linalg builtins for different shader stages: ps, vs, cs, ns, gs
1 parent f7fabd9 commit 50155c1

1 file changed

Lines changed: 297 additions & 0 deletions

File tree

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
// RUN: %dxc -T lib_6_9 %s | FileCheck %s
2+
3+
ByteAddressBuffer matrix_buffer;
4+
ByteAddressBuffer bias_buffer;
5+
RWByteAddressBuffer rw_matrix_buffer;
6+
7+
// CHECK: define void @ps_main()
8+
// CHECK: call <4 x float> @dx.op.matVecMul
9+
// CHECK: call <4 x float> @dx.op.matVecMulAdd
10+
// CHECK: call void @dx.op.outerProductAccumulate
11+
// CHECK: call void @dx.op.vectorAccumulate
12+
13+
[Shader("pixel")]
14+
void ps_main()
15+
{
16+
vector<float, 4> output_vector;
17+
static const uint is_output_unsigned = 0;
18+
19+
vector<float, 4> input_vector;
20+
const uint is_input_unsigned = 0;
21+
const uint input_interpretation = 9; /*F32*/
22+
23+
const uint matrix_offset = 0;
24+
const uint matrix_interpretation = 9; /*F32*/
25+
const uint matrix_dimM = 4;
26+
const uint matrix_dimK = 4;
27+
const uint matrix_layout = 0; /*RowMajor*/
28+
const bool matrix_is_transposed = false;
29+
const uint matrix_stride = 64;
30+
31+
__builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
32+
is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
33+
matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
34+
matrix_is_transposed, matrix_stride);
35+
36+
const uint bias_offset = 0;
37+
const uint bias_interpretation = 9; /*F32*/
38+
39+
__builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
40+
is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
41+
matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
42+
matrix_is_transposed, matrix_stride, bias_buffer, bias_offset,
43+
bias_interpretation);
44+
45+
vector<uint, 8> input_vector1;
46+
vector<uint, 8> input_vector2;
47+
const uint opa_matrix_offset = 0;
48+
const uint opa_matrix_interpretation = 5; /*U32*/
49+
const uint opa_matrix_layout = 3; /*OuterProductOptimal*/
50+
const uint opa_matrix_stride = 64;
51+
52+
__builtin_OuterProductAccumulate(input_vector1, input_vector2,
53+
rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation,
54+
opa_matrix_layout, opa_matrix_stride);
55+
56+
const uint va_matrix_offset = 0;
57+
58+
__builtin_VectorAccumulate(input_vector1, rw_matrix_buffer,
59+
va_matrix_offset); }
60+
61+
// CHECK: define void @cs_main()
62+
// CHECK: call <4 x float> @dx.op.matVecMul
63+
// CHECK: call <4 x float> @dx.op.matVecMulAdd
64+
// CHECK: call void @dx.op.outerProductAccumulate
65+
// CHECK: call void @dx.op.vectorAccumulate
66+
67+
[Shader("compute")]
68+
[NumThreads(1,1,1)]
69+
void cs_main()
70+
{
71+
vector<float, 4> output_vector;
72+
static const uint is_output_unsigned = 0;
73+
74+
vector<float, 4> input_vector;
75+
const uint is_input_unsigned = 0;
76+
const uint input_interpretation = 9; /*F32*/
77+
78+
const uint matrix_offset = 0;
79+
const uint matrix_interpretation = 9; /*F32*/
80+
const uint matrix_dimM = 4;
81+
const uint matrix_dimK = 4;
82+
const uint matrix_layout = 0; /*RowMajor*/
83+
const bool matrix_is_transposed = false;
84+
const uint matrix_stride = 64;
85+
86+
__builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
87+
is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
88+
matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
89+
matrix_is_transposed, matrix_stride);
90+
91+
const uint bias_offset = 0;
92+
const uint bias_interpretation = 9; /*F32*/
93+
94+
__builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
95+
is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
96+
matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
97+
matrix_is_transposed, matrix_stride, bias_buffer, bias_offset,
98+
bias_interpretation);
99+
100+
vector<uint, 8> input_vector1;
101+
vector<uint, 8> input_vector2;
102+
const uint opa_matrix_offset = 0;
103+
const uint opa_matrix_interpretation = 5; /*U32*/
104+
const uint opa_matrix_layout = 3; /*OuterProductOptimal*/
105+
const uint opa_matrix_stride = 64;
106+
107+
__builtin_OuterProductAccumulate(input_vector1, input_vector2,
108+
rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation,
109+
opa_matrix_layout, opa_matrix_stride);
110+
111+
const uint va_matrix_offset = 0;
112+
113+
__builtin_VectorAccumulate(input_vector1, rw_matrix_buffer,
114+
va_matrix_offset);
115+
}
116+
117+
// CHECK: define void @vs_main()
118+
// CHECK: call <4 x float> @dx.op.matVecMul
119+
// CHECK: call <4 x float> @dx.op.matVecMulAdd
120+
// CHECK: call void @dx.op.outerProductAccumulate
121+
// CHECK: call void @dx.op.vectorAccumulate
122+
123+
[Shader("vertex")]
124+
void vs_main()
125+
{
126+
vector<float, 4> output_vector;
127+
static const uint is_output_unsigned = 0;
128+
129+
vector<float, 4> input_vector;
130+
const uint is_input_unsigned = 0;
131+
const uint input_interpretation = 9; /*F32*/
132+
133+
const uint matrix_offset = 0;
134+
const uint matrix_interpretation = 9; /*F32*/
135+
const uint matrix_dimM = 4;
136+
const uint matrix_dimK = 4;
137+
const uint matrix_layout = 0; /*RowMajor*/
138+
const bool matrix_is_transposed = false;
139+
const uint matrix_stride = 64;
140+
141+
__builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
142+
is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
143+
matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
144+
matrix_is_transposed, matrix_stride);
145+
146+
const uint bias_offset = 0;
147+
const uint bias_interpretation = 9; /*F32*/
148+
149+
__builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
150+
is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
151+
matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
152+
matrix_is_transposed, matrix_stride, bias_buffer, bias_offset,
153+
bias_interpretation);
154+
155+
vector<uint, 8> input_vector1;
156+
vector<uint, 8> input_vector2;
157+
const uint opa_matrix_offset = 0;
158+
const uint opa_matrix_interpretation = 5; /*U32*/
159+
const uint opa_matrix_layout = 3; /*OuterProductOptimal*/
160+
const uint opa_matrix_stride = 64;
161+
162+
__builtin_OuterProductAccumulate(input_vector1, input_vector2,
163+
rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation,
164+
opa_matrix_layout, opa_matrix_stride);
165+
166+
const uint va_matrix_offset = 0;
167+
168+
__builtin_VectorAccumulate(input_vector1, rw_matrix_buffer,
169+
va_matrix_offset); }
170+
171+
struct MyRecord{
172+
uint a;
173+
};
174+
175+
// CHECK: define void @ns_main()
176+
// CHECK: call <4 x float> @dx.op.matVecMul
177+
// CHECK: call <4 x float> @dx.op.matVecMulAdd
178+
// CHECK: call void @dx.op.outerProductAccumulate
179+
// CHECK: call void @dx.op.vectorAccumulate
180+
181+
[Shader("node")]
182+
[NodeLaunch("thread")]
183+
void ns_main(ThreadNodeInputRecord<MyRecord> input)
184+
{
185+
vector<float, 4> output_vector;
186+
static const uint is_output_unsigned = 0;
187+
188+
vector<float, 4> input_vector;
189+
const uint is_input_unsigned = 0;
190+
const uint input_interpretation = 9; /*F32*/
191+
192+
const uint matrix_offset = 0;
193+
const uint matrix_interpretation = 9; /*F32*/
194+
const uint matrix_dimM = 4;
195+
const uint matrix_dimK = 4;
196+
const uint matrix_layout = 0; /*RowMajor*/
197+
const bool matrix_is_transposed = false;
198+
const uint matrix_stride = 64;
199+
200+
__builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
201+
is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
202+
matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
203+
matrix_is_transposed, matrix_stride);
204+
205+
const uint bias_offset = 0;
206+
const uint bias_interpretation = 9; /*F32*/
207+
208+
__builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
209+
is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
210+
matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
211+
matrix_is_transposed, matrix_stride, bias_buffer, bias_offset,
212+
bias_interpretation);
213+
214+
vector<uint, 8> input_vector1;
215+
vector<uint, 8> input_vector2;
216+
const uint opa_matrix_offset = 0;
217+
const uint opa_matrix_interpretation = 5; /*U32*/
218+
const uint opa_matrix_layout = 3; /*OuterProductOptimal*/
219+
const uint opa_matrix_stride = 64;
220+
221+
__builtin_OuterProductAccumulate(input_vector1, input_vector2,
222+
rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation,
223+
opa_matrix_layout, opa_matrix_stride);
224+
225+
const uint va_matrix_offset = 0;
226+
227+
__builtin_VectorAccumulate(input_vector1, rw_matrix_buffer,
228+
va_matrix_offset);
229+
}
230+
231+
// Vertex shader output structure
232+
struct VS_OUT {
233+
float3 Color : COLOR0;
234+
};
235+
236+
// Geometry shader output structure
237+
struct GS_OUT {
238+
float3 Color : COLOR0;
239+
float2 TexCoord : TEXCOORD0;
240+
};
241+
242+
// CHECK: define void @gs_main()
243+
// CHECK: call <4 x float> @dx.op.matVecMul
244+
// CHECK: call <4 x float> @dx.op.matVecMulAdd
245+
// CHECK: call void @dx.op.outerProductAccumulate
246+
// CHECK: call void @dx.op.vectorAccumulate
247+
248+
[shader("geometry")]
249+
[maxvertexcount(3)]
250+
void gs_main(point VS_OUT input[1],
251+
inout TriangleStream<GS_OUT> OutputStream)
252+
{
253+
vector<float, 4> output_vector;
254+
static const uint is_output_unsigned = 0;
255+
256+
vector<float, 4> input_vector;
257+
const uint is_input_unsigned = 0;
258+
const uint input_interpretation = 9; /*F32*/
259+
260+
const uint matrix_offset = 0;
261+
const uint matrix_interpretation = 9; /*F32*/
262+
const uint matrix_dimM = 4;
263+
const uint matrix_dimK = 4;
264+
const uint matrix_layout = 0; /*RowMajor*/
265+
const bool matrix_is_transposed = false;
266+
const uint matrix_stride = 64;
267+
268+
__builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
269+
is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
270+
matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
271+
matrix_is_transposed, matrix_stride);
272+
273+
const uint bias_offset = 0;
274+
const uint bias_interpretation = 9; /*F32*/
275+
276+
__builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
277+
is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
278+
matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
279+
matrix_is_transposed, matrix_stride, bias_buffer, bias_offset,
280+
bias_interpretation);
281+
282+
vector<uint, 8> input_vector1;
283+
vector<uint, 8> input_vector2;
284+
const uint opa_matrix_offset = 0;
285+
const uint opa_matrix_interpretation = 5; /*U32*/
286+
const uint opa_matrix_layout = 3; /*OuterProductOptimal*/
287+
const uint opa_matrix_stride = 64;
288+
289+
__builtin_OuterProductAccumulate(input_vector1, input_vector2,
290+
rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation,
291+
opa_matrix_layout, opa_matrix_stride);
292+
293+
const uint va_matrix_offset = 0;
294+
295+
__builtin_VectorAccumulate(input_vector1, rw_matrix_buffer,
296+
va_matrix_offset);
297+
}

0 commit comments

Comments
 (0)