forked from microsoft/DirectXShaderCompiler
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathouter-product-accumulate-matrix-layout.hlsl
More file actions
65 lines (58 loc) · 2.4 KB
/
outer-product-accumulate-matrix-layout.hlsl
File metadata and controls
65 lines (58 loc) · 2.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DML=RowMajor -DSTRIDE=64 2>&1| FileCheck %s --check-prefixes DXIL-0
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DML=ColumnMajor -DSTRIDE=64 2>&1 | FileCheck %s --check-prefixes DXIL-1
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DML=MulOptimal -DSTRIDE=64 2>&1 | FileCheck %s --check-prefixes DXIL-2
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DML=OuterProductOptimal -DSTRIDE=64 2>&1 | FileCheck %s --check-prefixes DXIL-3
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DML=OuterProductOptimal -DSTRIDE=0 2>&1 | FileCheck %s --check-prefixes DXIL-4
ByteAddressBuffer input_vector_buffer;
ByteAddressBuffer input_vector_buffer2;
RWByteAddressBuffer matrix_buffer;
enum CompType {
Invalid = 0,
I1 = 1,
I16 = 2,
U16 = 3,
I32 = 4,
U32 = 5,
I64 = 6,
U64 = 7,
F16 = 8,
F32 = 9,
F64 = 10,
SNormF16 = 11,
UNormF16 = 12,
SNormF32 = 13,
UNormF32 = 14,
SNormF64 = 15,
UNormF64 = 16,
PackedS8x32 = 17,
PackedU8x32 = 18,
// BEGIN NEW FOR SM 6.9
U8 = 19,
I8 = 20,
F8_E4M3 = 21,
F8_E5M2 = 22,
};
enum MatLayout {
RowMajor = 0,
ColumnMajor = 1,
MulOptimal = 2,
OuterProductOptimal = 3,
};
// DXIL-0: error: matrix layout value 'RowMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
// DXIL-1: error: matrix layout value 'ColumnMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
// DXIL-2: error: matrix layout value 'MulOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
// DXIL-3-NOT: error: matrix layout value 'OuterProductOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
// DXIL-3: error: matrix stride must be zero for optimal layouts
// DXIL-4: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0)
[Numthreads(1,1,1)]
[shader("compute")]
void main()
{
vector<half, 8> input_vector1 = input_vector_buffer.Load<vector<half, 8> >(0);
vector<half, 8> input_vector2 = input_vector_buffer2.Load<vector<half, 8> >(0);
const uint matrix_interpretation = CompType::F16;
const uint matrix_layout = ML;
const uint matrix_offset = 0;
const uint matrix_stride = STRIDE;
__builtin_OuterProductAccumulate(input_vector1, input_vector2, matrix_buffer, matrix_offset, matrix_interpretation, matrix_layout, matrix_stride);
}