|
1 | | -// RUN: %dxc -T cs_6_9 %s | FileCheck %s |
| 1 | +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes DXIL,DXIL-0 |
| 2 | +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E4M3 -DMI=F8_E4M3 -DML=ColumnMajor -DMT=1 | FileCheck %s --check-prefixes DXIL,DXIL-1 |
| 3 | +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=0 | FileCheck %s --check-prefixes DXIL,DXIL-2 |
| 4 | +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=I8 -DMI=I8 -DML=OuterProductOptimal -DMT=1 | FileCheck %s --check-prefixes DXIL,DXIL-3 |
| 5 | +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes DXIL,DXIL-4 |
| 6 | +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes DXIL,DXIL-5 |
| 7 | +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=1 | FileCheck %s --check-prefixes DXIL,DXIL-6 |
| 8 | +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=0 | FileCheck %s --check-prefixes DXIL,DXIL-7 |
| 9 | + |
| 10 | +// Test minimum support set of combinations for matVecMul |
| 11 | +// DXIL: define void @main() |
| 12 | +// DXIL-0: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) |
| 13 | +// DXIL-1: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 1, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) |
| 14 | +// DXIL-2: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) |
| 15 | +// DXIL-3: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) |
| 16 | +// DXIL-4: call <4 x i32> @dx.op.matVecMul.v4i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) |
| 17 | + |
| 18 | +// Test unsigned variations |
| 19 | +// DXIL-5: call <4 x i32> @dx.op.matVecMul.v4i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 true) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) |
| 20 | +// DXIL-6: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) |
| 21 | +// DXIL-7: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) |
| 22 | + |
2 | 23 |
|
3 | 24 | ByteAddressBuffer input_vector_buffer; |
4 | 25 | ByteAddressBuffer matrix_buffer; |
5 | 26 | ByteAddressBuffer bias_buffer; |
6 | 27 | RWByteAddressBuffer rw_matrix_buffer; |
7 | 28 |
|
8 | | -// Test use of __builtin_MatVecMulAdd in compute shader |
9 | | -// CHECK: define void @main() |
10 | | -// CHECK: call <4 x i32> @dx.op.matVecMul.v4i32.v8f32(i32 {{[0-9]+}} |
| 29 | +enum CompType { |
| 30 | + Invalid = 0, |
| 31 | + I1 = 1, |
| 32 | + I16 = 2, |
| 33 | + U16 = 3, |
| 34 | + I32 = 4, |
| 35 | + U32 = 5, |
| 36 | + I64 = 6, |
| 37 | + U64 = 7, |
| 38 | + F16 = 8, |
| 39 | + F32 = 9, |
| 40 | + F64 = 10, |
| 41 | + SNormF16 = 11, |
| 42 | + UNormF16 = 12, |
| 43 | + SNormF32 = 13, |
| 44 | + UNormF32 = 14, |
| 45 | + SNormF64 = 15, |
| 46 | + UNormF64 = 16, |
| 47 | + PackedS8x32 = 17, |
| 48 | + PackedU8x32 = 18, |
| 49 | + |
| 50 | + // BEGIN NEW FOR SM 6.9 |
| 51 | + U8 = 19, |
| 52 | + I8 = 20, |
| 53 | + F8_E4M3 = 21, |
| 54 | + F8_E5M2 = 22, |
| 55 | +}; |
11 | 56 |
|
| 57 | +enum MatLayout { |
| 58 | + RowMajor = 0, |
| 59 | + ColumnMajor = 1, |
| 60 | + MulOptimal = 2, |
| 61 | + OuterProductOptimal = 3, |
| 62 | +}; |
12 | 63 |
|
13 | 64 | [NumThreads(1,1,1)] |
14 | 65 | void main() |
15 | 66 | { |
16 | | - vector<uint32_t, 4> output_vector; |
17 | | - static const uint is_output_unsigned = 0; |
| 67 | + vector<OTY, 4> output_vector; |
| 68 | + static const uint is_output_unsigned = OU; |
18 | 69 |
|
19 | | - vector<float, 8> input_vector = input_vector_buffer.Load<vector<float, 8> >(0); |
20 | | - const uint is_input_unsigned = 0; |
21 | | - const uint input_interpretation = 9; /*F32*/ |
| 70 | + vector<ITY, 8> input_vector = input_vector_buffer.Load<vector<ITY, 8> >(0); |
| 71 | + const uint is_input_unsigned = IU; |
| 72 | + const uint input_interpretation = II; |
22 | 73 |
|
23 | 74 | const uint matrix_offset = 0; |
24 | | - const uint matrix_interpretation = 9; /*F32*/ |
| 75 | + const uint matrix_interpretation = MI; |
25 | 76 | const uint matrix_dimM = 8; |
26 | 77 | const uint matrix_dimK = 8; |
27 | | - const uint matrix_layout = 0; /*RowMajor*/ |
28 | | - const bool matrix_is_transposed = false; |
| 78 | + const uint matrix_layout = ML; |
| 79 | + const bool matrix_is_transposed = (bool) MT; |
29 | 80 | const uint matrix_stride = 64; |
30 | 81 |
|
31 | 82 | __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation, |
|
0 commit comments