Skip to content

Commit c763461

Browse files
authored
[SM6.10][Bugfix][Exec] Final test tweaks for preview (#8393)
This is the final test change PR going into the initial preview build. We'll continue to add tests after the build that can be pulled from main or the preview branch. In this PR are the following changes - OuterProduct smoke test was removed as it requires an OuterProductOptimal Layout, which requires a bit more test harness work to verify. Instead of delaying the preview for it we are punting the test for now. It will quickly be ready after the preview release - Groupshared operations require that `stride` be the number of "row elements" previously we were setting the "row bytes". This has been fixed - The 2D->1D index calculation was incorrect but hidden by the fact that all test matrices are square. This has been fixed
1 parent d831cb4 commit c763461

1 file changed

Lines changed: 12 additions & 19 deletions

File tree

tools/clang/unittests/HLSLExec/LinAlgTests.cpp

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@ struct MatrixParams {
7777
uint32_t ES = elementSize(CompType);
7878
if (Layout == LinalgMatrixLayout::RowMajor)
7979
return N * ES;
80-
return M * ES;
80+
if (Layout == LinalgMatrixLayout::ColumnMajor)
81+
return M * ES;
82+
// If not Row/Col major, spec says to use 0
83+
return 0;
8184
}
8285

8386
size_t totalElements() const { return M * N; }
@@ -320,7 +323,6 @@ class DxilConf_SM610_LinAlg {
320323
TEST_METHOD(LoadStoreDescriptor_Wave_16x16_F16);
321324
TEST_METHOD(SplatStore_Wave_16x16_F16);
322325
TEST_METHOD(AccumulateDescriptor_Wave_16x16_F16);
323-
TEST_METHOD(AccumulateDescriptor_Thread_16x16_F16);
324326

325327
// Load/Store/Accumulate Memory
326328
TEST_METHOD(LoadMemory_Wave_16x16_F16);
@@ -343,7 +345,9 @@ class DxilConf_SM610_LinAlg {
343345
// Matrix Vector Arithmetic
344346
TEST_METHOD(MatVecMul_Thread_16x16_F16);
345347
TEST_METHOD(MatVecMulAdd_Thread_16x16_F16);
348+
#if 0
346349
TEST_METHOD(OuterProduct_Thread_16x16_F16);
350+
#endif
347351

348352
// Query Accumulator Layout
349353
TEST_METHOD(QueryAccumLayout);
@@ -613,27 +617,14 @@ void DxilConf_SM610_LinAlg::AccumulateDescriptor_Wave_16x16_F16() {
613617
runAccumulateDescriptor(D3DDevice, DxcSupport, Params, 12, VerboseLogging);
614618
}
615619

616-
void DxilConf_SM610_LinAlg::AccumulateDescriptor_Thread_16x16_F16() {
617-
MatrixParams Params = {};
618-
Params.CompType = ComponentType::F16;
619-
Params.M = 16;
620-
Params.N = 16;
621-
Params.Use = MatrixUse::Accumulator;
622-
Params.Scope = MatrixScope::Thread;
623-
Params.Layout = LinalgMatrixLayout::RowMajor;
624-
Params.NumThreads = 1;
625-
Params.Enable16Bit = true;
626-
runAccumulateDescriptor(D3DDevice, DxcSupport, Params, 19, VerboseLogging);
627-
}
628-
629620
static const char ElementAccessShader[] = R"(
630621
RWByteAddressBuffer Input : register(u0);
631622
RWByteAddressBuffer Output : register(u1);
632623
633624
// flatten the 2D index into a 1D index then scale by element size
634625
// Always store row-major and work it out in the test runner
635626
uint coordToByteOffset(uint2 coord) {
636-
return (coord.y * N_DIM + coord.x) * ELEM_SIZE;
627+
return (coord.y * M_DIM + coord.x) * ELEM_SIZE;
637628
}
638629
639630
[WaveSize(4, 64)]
@@ -1323,6 +1314,7 @@ void DxilConf_SM610_LinAlg::MatVecMulAdd_Thread_16x16_F16() {
13231314
ComponentType::F16);
13241315
}
13251316

1317+
#if 0
13261318
static const char OuterProductShader[] = R"(
13271319
#define USE_A 0
13281320
#define SCOPE_THREAD 0
@@ -1404,6 +1396,7 @@ void DxilConf_SM610_LinAlg::OuterProduct_Thread_16x16_F16() {
14041396
Params.Enable16Bit = true;
14051397
runOuterProduct(D3DDevice, DxcSupport, Params, VerboseLogging);
14061398
}
1399+
#endif
14071400

14081401
static const char QueryAccumLayoutShader[] = R"(
14091402
RWByteAddressBuffer Output : register(u0);
@@ -1469,7 +1462,7 @@ static const char LoadMemoryShader[] = R"(
14691462
[[__LinAlgMatrix_Attributes(COMP_TYPE, M_DIM, N_DIM, USE, SCOPE)]]
14701463
Mat;
14711464
__builtin_LinAlg_MatrixLoadFromMemory(
1472-
Mat, GsData, OFFSET, STRIDE, LAYOUT);
1465+
Mat, GsData, OFFSET / ELEM_SIZE, STRIDE / ELEM_SIZE, LAYOUT);
14731466
__builtin_LinAlg_MatrixStoreToDescriptor(
14741467
Mat, Output, OFFSET, STRIDE, LAYOUT, 128);
14751468
}
@@ -1542,7 +1535,7 @@ static const char StoreMemoryShader[] = R"(
15421535
__builtin_LinAlg_FillMatrix(Mat, FILL_VALUE);
15431536
15441537
__builtin_LinAlg_MatrixStoreToMemory(
1545-
Mat, GsData, OFFSET, STRIDE, LAYOUT);
1538+
Mat, GsData, OFFSET / ELEM_SIZE, STRIDE / ELEM_SIZE, LAYOUT);
15461539
15471540
for (uint I = 0; I < M_DIM*N_DIM; ++I) {
15481541
Output.Store<ELEM_TYPE>(I*ELEM_SIZE, GsData[I]);
@@ -1622,7 +1615,7 @@ static const char AccumulateMemoryShader[] = R"(
16221615
__builtin_LinAlg_FillMatrix(Mat, FILL_VALUE);
16231616
16241617
__builtin_LinAlg_MatrixAccumulateToMemory(
1625-
Mat, GsData, OFFSET, STRIDE, LAYOUT);
1618+
Mat, GsData, OFFSET / ELEM_SIZE, STRIDE / ELEM_SIZE, LAYOUT);
16261619
16271620
for (uint I = 0; I < M_DIM*N_DIM; ++I) {
16281621
Output.Store<ELEM_TYPE>(I*ELEM_SIZE, GsData[I]);

0 commit comments

Comments
 (0)