Skip to content

Commit 6eb36e4

Browse files
committed
Add test variations for different matVecMul parameter settings
1 parent 9fcf8c7 commit 6eb36e4

1 file changed

Lines changed: 63 additions & 12 deletions

File tree

tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl

Lines changed: 63 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,82 @@
1-
// RUN: %dxc -T cs_6_9 %s | FileCheck %s
1+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes DXIL,DXIL-0
2+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E4M3 -DMI=F8_E4M3 -DML=ColumnMajor -DMT=1 | FileCheck %s --check-prefixes DXIL,DXIL-1
3+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=0 | FileCheck %s --check-prefixes DXIL,DXIL-2
4+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=I8 -DMI=I8 -DML=OuterProductOptimal -DMT=1 | FileCheck %s --check-prefixes DXIL,DXIL-3
5+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes DXIL,DXIL-4
6+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes DXIL,DXIL-5
7+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=1 | FileCheck %s --check-prefixes DXIL,DXIL-6
8+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=0 | FileCheck %s --check-prefixes DXIL,DXIL-7
9+
10+
// Test minimum support set of combinations for matVecMul
11+
// DXIL: define void @main()
12+
// DXIL-0: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
13+
// DXIL-1: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 1, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
14+
// DXIL-2: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
15+
// DXIL-3: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
16+
// DXIL-4: call <4 x i32> @dx.op.matVecMul.v4i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
17+
18+
// Test unsigned variations
19+
// DXIL-5: call <4 x i32> @dx.op.matVecMul.v4i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 true) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
20+
// DXIL-6: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
21+
// DXIL-7: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
22+
223

324
ByteAddressBuffer input_vector_buffer;
425
ByteAddressBuffer matrix_buffer;
526
ByteAddressBuffer bias_buffer;
627
RWByteAddressBuffer rw_matrix_buffer;
728

8-
// Test use of __builtin_MatVecMulAdd in compute shader
9-
// CHECK: define void @main()
10-
// CHECK: call <4 x i32> @dx.op.matVecMul.v4i32.v8f32(i32 {{[0-9]+}}
29+
enum CompType {
30+
Invalid = 0,
31+
I1 = 1,
32+
I16 = 2,
33+
U16 = 3,
34+
I32 = 4,
35+
U32 = 5,
36+
I64 = 6,
37+
U64 = 7,
38+
F16 = 8,
39+
F32 = 9,
40+
F64 = 10,
41+
SNormF16 = 11,
42+
UNormF16 = 12,
43+
SNormF32 = 13,
44+
UNormF32 = 14,
45+
SNormF64 = 15,
46+
UNormF64 = 16,
47+
PackedS8x32 = 17,
48+
PackedU8x32 = 18,
49+
50+
// BEGIN NEW FOR SM 6.9
51+
U8 = 19,
52+
I8 = 20,
53+
F8_E4M3 = 21,
54+
F8_E5M2 = 22,
55+
};
1156

57+
enum MatLayout {
58+
RowMajor = 0,
59+
ColumnMajor = 1,
60+
MulOptimal = 2,
61+
OuterProductOptimal = 3,
62+
};
1263

1364
[NumThreads(1,1,1)]
1465
void main()
1566
{
16-
vector<uint32_t, 4> output_vector;
17-
static const uint is_output_unsigned = 0;
67+
vector<OTY, 4> output_vector;
68+
static const uint is_output_unsigned = OU;
1869

19-
vector<float, 8> input_vector = input_vector_buffer.Load<vector<float, 8> >(0);
20-
const uint is_input_unsigned = 0;
21-
const uint input_interpretation = 9; /*F32*/
70+
vector<ITY, 8> input_vector = input_vector_buffer.Load<vector<ITY, 8> >(0);
71+
const uint is_input_unsigned = IU;
72+
const uint input_interpretation = II;
2273

2374
const uint matrix_offset = 0;
24-
const uint matrix_interpretation = 9; /*F32*/
75+
const uint matrix_interpretation = MI;
2576
const uint matrix_dimM = 8;
2677
const uint matrix_dimK = 8;
27-
const uint matrix_layout = 0; /*RowMajor*/
28-
const bool matrix_is_transposed = false;
78+
const uint matrix_layout = ML;
79+
const bool matrix_is_transposed = (bool) MT;
2980
const uint matrix_stride = 64;
3081

3182
__builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation,

0 commit comments

Comments
 (0)