Skip to content

Commit 1b23b26

Browse files
committed
Multioverload OuterProductAccmuluate test (and remove redundant
non-overload test)
1 parent 41217f3 commit 1b23b26

2 files changed

Lines changed: 56 additions & 28 deletions

File tree

tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,68 @@
1-
// RUN: %dxc -T cs_6_9 %s | FileCheck %s
1+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=RowMajor | FileCheck %s --check-prefixes COMMON,DXIL-0
2+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-1
3+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-2
24

5+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=RowMajor -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0
6+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1
7+
// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2
8+
9+
ByteAddressBuffer input_vector_buffer;
10+
ByteAddressBuffer input_vector_buffer2;
311
RWByteAddressBuffer matrix_buffer;
412

5-
// CHECK: define void @main()
6-
// CHECK: call void @dx.op.outerProductAccumulate.v2i32.v4i32(i32 {{[0-9]+}}
13+
// COMMON: define void @main()
14+
// DXIL-0: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 0, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
15+
// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 365, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 0, i32 64)
16+
// DXIL-1: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
17+
// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 365, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 64)
18+
// DXIL-2: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
19+
// HLOP-2: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 365, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 64)
20+
21+
enum CompType {
22+
Invalid = 0,
23+
I1 = 1,
24+
I16 = 2,
25+
U16 = 3,
26+
I32 = 4,
27+
U32 = 5,
28+
I64 = 6,
29+
U64 = 7,
30+
F16 = 8,
31+
F32 = 9,
32+
F64 = 10,
33+
SNormF16 = 11,
34+
UNormF16 = 12,
35+
SNormF32 = 13,
36+
UNormF32 = 14,
37+
SNormF64 = 15,
38+
UNormF64 = 16,
39+
PackedS8x32 = 17,
40+
PackedU8x32 = 18,
41+
42+
// BEGIN NEW FOR SM 6.9
43+
U8 = 19,
44+
I8 = 20,
45+
F8_E4M3 = 21,
46+
F8_E5M2 = 22,
47+
};
48+
49+
enum MatLayout {
50+
RowMajor = 0,
51+
ColumnMajor = 1,
52+
MulOptimal = 2,
53+
OuterProductOptimal = 3,
54+
};
55+
756

857
[Numthreads(1,1,1)]
958
void main()
1059
{
11-
vector<uint, 2> input_vector1 = 1;
12-
vector<uint, 4> input_vector2 = 2;
60+
vector<ITY, 8> input_vector1 = input_vector_buffer.Load<vector<ITY, 8> >(0);
61+
vector<ITY, 8> input_vector2 = input_vector_buffer2.Load<vector<ITY, 8> >(0);
1362

63+
const uint matrix_interpretation = MI;
64+
const uint matrix_layout = ML;
1465
const uint matrix_offset = 0;
15-
const uint matrix_interpretation = 5; /*U32*/
16-
const uint matrix_layout = 0;
1766
const uint matrix_stride = 64;
1867

1968
__builtin_OuterProductAccumulate(input_vector1, input_vector2, matrix_buffer, matrix_offset, matrix_interpretation, matrix_layout, matrix_stride);

tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate.hlsl

Lines changed: 0 additions & 21 deletions
This file was deleted.

0 commit comments

Comments
 (0)