From 5598e44659c93a64cca0f10323c821c994bcd24d Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Mon, 20 Apr 2026 16:15:23 -0600 Subject: [PATCH 1/4] [SM6.10][Bugfix][Exec] Final test tweaks for preview --- .../clang/unittests/HLSLExec/LinAlgTests.cpp | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp index d9d22863f1..ac9f69bb17 100644 --- a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp +++ b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp @@ -72,12 +72,21 @@ struct MatrixParams { int NumThreads; bool Enable16Bit; bool EmulateTest; + bool GroupSharedMemory = false; - size_t strideBytes() const { - uint32_t ES = elementSize(CompType); + size_t rowStride() const { + // If not Row/Col major, spec says to list 0. + size_t RowElementCount = 0; if (Layout == LinalgMatrixLayout::RowMajor) - return N * ES; - return M * ES; + RowElementCount = N; + if (Layout == LinalgMatrixLayout::ColumnMajor) + RowElementCount = M; + + if (GroupSharedMemory) + return RowElementCount; + + uint32_t ElementSize = elementSize(CompType); + return RowElementCount * ElementSize; } size_t totalElements() const { return M * N; } @@ -94,7 +103,7 @@ static std::string buildCompilerArgs(const MatrixParams &Params, SS << " -DN_DIM=" << Params.N; SS << " -DUSE=" << static_cast(Params.Use); SS << " -DSCOPE=" << static_cast(Params.Scope); - SS << " -DSTRIDE=" << Params.strideBytes(); + SS << " -DSTRIDE=" << Params.rowStride(); SS << " -DLAYOUT=" << static_cast(Params.Layout); SS << " -DELEM_SIZE=" << static_cast(elementSize(Params.CompType)); SS << " -DNUMTHREADS=" << Params.NumThreads; @@ -320,7 +329,6 @@ class DxilConf_SM610_LinAlg { TEST_METHOD(LoadStoreDescriptor_Wave_16x16_F16); TEST_METHOD(SplatStore_Wave_16x16_F16); TEST_METHOD(AccumulateDescriptor_Wave_16x16_F16); - TEST_METHOD(AccumulateDescriptor_Thread_16x16_F16); // Load/Store/Accumulate Memory TEST_METHOD(LoadMemory_Wave_16x16_F16); @@ -613,19 +621,6 @@ void DxilConf_SM610_LinAlg::AccumulateDescriptor_Wave_16x16_F16() { runAccumulateDescriptor(D3DDevice, DxcSupport, Params, 12, VerboseLogging); } -void DxilConf_SM610_LinAlg::AccumulateDescriptor_Thread_16x16_F16() { - MatrixParams Params = {}; - Params.CompType = ComponentType::F16; - Params.M = 16; - Params.N = 16; - Params.Use = MatrixUse::Accumulator; - Params.Scope = MatrixScope::Thread; - Params.Layout = LinalgMatrixLayout::RowMajor; - Params.NumThreads = 1; - Params.Enable16Bit = true; - runAccumulateDescriptor(D3DDevice, DxcSupport, Params, 19, VerboseLogging); -} - static const char ElementAccessShader[] = R"( RWByteAddressBuffer Input : register(u0); RWByteAddressBuffer Output : register(u1); @@ -633,7 +628,7 @@ static const char ElementAccessShader[] = R"( // flatten the 2D index into a 1D index then scale by element size // Always store row-major and work it out in the test runner uint coordToByteOffset(uint2 coord) { - return (coord.y * N_DIM + coord.x) * ELEM_SIZE; + return (coord.y * M_DIM + coord.x) * ELEM_SIZE; } [WaveSize(4, 64)] @@ -1394,6 +1389,7 @@ static void runOuterProduct(ID3D12Device *Device, } void DxilConf_SM610_LinAlg::OuterProduct_Thread_16x16_F16() { + /* MatrixParams Params = {}; Params.CompType = ComponentType::F16; Params.M = 16; @@ -1403,6 +1399,10 @@ void DxilConf_SM610_LinAlg::OuterProduct_Thread_16x16_F16() { Params.NumThreads = 1; Params.Enable16Bit = true; runOuterProduct(D3DDevice, DxcSupport, Params, VerboseLogging); + */ + hlsl_test::LogCommentFmt( + L"Skipping test as not implemented"); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); } static const char QueryAccumLayoutShader[] = R"( @@ -1471,7 +1471,7 @@ static const char LoadMemoryShader[] = R"( __builtin_LinAlg_MatrixLoadFromMemory( Mat, GsData, OFFSET, STRIDE, LAYOUT); __builtin_LinAlg_MatrixStoreToDescriptor( - Mat, Output, OFFSET, STRIDE, LAYOUT, 128); + Mat, Output, OFFSET, STRIDE * ELEM_SIZE, LAYOUT, 128); } )"; @@ -1523,6 +1523,7 @@ void DxilConf_SM610_LinAlg::LoadMemory_Wave_16x16_F16() { Params.Layout = LinalgMatrixLayout::RowMajor; Params.NumThreads = 64; Params.Enable16Bit = true; + Params.GroupSharedMemory = true; runLoadMemory(D3DDevice, DxcSupport, Params, VerboseLogging); } @@ -1592,6 +1593,7 @@ void DxilConf_SM610_LinAlg::StoreMemory_Wave_16x16_F16() { Params.Layout = LinalgMatrixLayout::RowMajor; Params.NumThreads = 64; Params.Enable16Bit = true; + Params.GroupSharedMemory = true; runStoreMemory(D3DDevice, DxcSupport, Params, VerboseLogging, /*FillValue=*/7.0f); } @@ -1672,6 +1674,7 @@ void DxilConf_SM610_LinAlg::AccumulateMemory_Wave_16x16_F16() { Params.Layout = LinalgMatrixLayout::RowMajor; Params.NumThreads = 64; Params.Enable16Bit = true; + Params.GroupSharedMemory = true; runAccumulateMemory(D3DDevice, DxcSupport, Params, VerboseLogging, /*FillValue=*/7.0f); } From 3fcdebda32ac71387e49d12d1c63e9a197cdb6e6 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Mon, 20 Apr 2026 16:23:25 -0600 Subject: [PATCH 2/4] format --- tools/clang/unittests/HLSLExec/LinAlgTests.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp index ac9f69bb17..2c1e30e61d 100644 --- a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp +++ b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp @@ -1400,8 +1400,7 @@ void DxilConf_SM610_LinAlg::OuterProduct_Thread_16x16_F16() { Params.Enable16Bit = true; runOuterProduct(D3DDevice, DxcSupport, Params, VerboseLogging); */ - hlsl_test::LogCommentFmt( - L"Skipping test as not implemented"); + hlsl_test::LogCommentFmt(L"Skipping test as not implemented"); WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); } From 9df4f0a2dce2e912275402ed65f4eac4c0ad67ed Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Mon, 20 Apr 2026 17:47:47 -0600 Subject: [PATCH 3/4] Address comments --- tools/clang/unittests/HLSLExec/LinAlgTests.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp index 2c1e30e61d..e623533ee3 100644 --- a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp +++ b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp @@ -351,7 +351,9 @@ class DxilConf_SM610_LinAlg { // Matrix Vector Arithmetic TEST_METHOD(MatVecMul_Thread_16x16_F16); TEST_METHOD(MatVecMulAdd_Thread_16x16_F16); +#if 0 TEST_METHOD(OuterProduct_Thread_16x16_F16); +#endif // Query Accumulator Layout TEST_METHOD(QueryAccumLayout); @@ -1318,6 +1320,7 @@ void DxilConf_SM610_LinAlg::MatVecMulAdd_Thread_16x16_F16() { ComponentType::F16); } +#if 0 static const char OuterProductShader[] = R"( #define USE_A 0 #define SCOPE_THREAD 0 @@ -1389,7 +1392,6 @@ static void runOuterProduct(ID3D12Device *Device, } void DxilConf_SM610_LinAlg::OuterProduct_Thread_16x16_F16() { - /* MatrixParams Params = {}; Params.CompType = ComponentType::F16; Params.M = 16; @@ -1399,10 +1401,8 @@ void DxilConf_SM610_LinAlg::OuterProduct_Thread_16x16_F16() { Params.NumThreads = 1; Params.Enable16Bit = true; runOuterProduct(D3DDevice, DxcSupport, Params, VerboseLogging); - */ - hlsl_test::LogCommentFmt(L"Skipping test as not implemented"); - WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); } +#endif static const char QueryAccumLayoutShader[] = R"( RWByteAddressBuffer Output : register(u0); From 7e6700dc5a74e49e895da1c08009d7acd53378f8 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Mon, 20 Apr 2026 18:10:47 -0600 Subject: [PATCH 4/4] Address comments --- .../clang/unittests/HLSLExec/LinAlgTests.cpp | 31 +++++++------------ 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp index e623533ee3..961cc4f8a1 100644 --- a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp +++ b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp @@ -72,21 +72,15 @@ struct MatrixParams { int NumThreads; bool Enable16Bit; bool EmulateTest; - bool GroupSharedMemory = false; - size_t rowStride() const { - // If not Row/Col major, spec says to list 0. - size_t RowElementCount = 0; + size_t strideBytes() const { + uint32_t ES = elementSize(CompType); if (Layout == LinalgMatrixLayout::RowMajor) - RowElementCount = N; + return N * ES; if (Layout == LinalgMatrixLayout::ColumnMajor) - RowElementCount = M; - - if (GroupSharedMemory) - return RowElementCount; - - uint32_t ElementSize = elementSize(CompType); - return RowElementCount * ElementSize; + return M * ES; + // If not Row/Col major, spec says to use 0 + return 0; } size_t totalElements() const { return M * N; } @@ -103,7 +97,7 @@ static std::string buildCompilerArgs(const MatrixParams &Params, SS << " -DN_DIM=" << Params.N; SS << " -DUSE=" << static_cast(Params.Use); SS << " -DSCOPE=" << static_cast(Params.Scope); - SS << " -DSTRIDE=" << Params.rowStride(); + SS << " -DSTRIDE=" << Params.strideBytes(); SS << " -DLAYOUT=" << static_cast(Params.Layout); SS << " -DELEM_SIZE=" << static_cast(elementSize(Params.CompType)); SS << " -DNUMTHREADS=" << Params.NumThreads; @@ -1468,9 +1462,9 @@ static const char LoadMemoryShader[] = R"( [[__LinAlgMatrix_Attributes(COMP_TYPE, M_DIM, N_DIM, USE, SCOPE)]] Mat; __builtin_LinAlg_MatrixLoadFromMemory( - Mat, GsData, OFFSET, STRIDE, LAYOUT); + Mat, GsData, OFFSET / ELEM_SIZE, STRIDE / ELEM_SIZE, LAYOUT); __builtin_LinAlg_MatrixStoreToDescriptor( - Mat, Output, OFFSET, STRIDE * ELEM_SIZE, LAYOUT, 128); + Mat, Output, OFFSET, STRIDE, LAYOUT, 128); } )"; @@ -1522,7 +1516,6 @@ void DxilConf_SM610_LinAlg::LoadMemory_Wave_16x16_F16() { Params.Layout = LinalgMatrixLayout::RowMajor; Params.NumThreads = 64; Params.Enable16Bit = true; - Params.GroupSharedMemory = true; runLoadMemory(D3DDevice, DxcSupport, Params, VerboseLogging); } @@ -1542,7 +1535,7 @@ static const char StoreMemoryShader[] = R"( __builtin_LinAlg_FillMatrix(Mat, FILL_VALUE); __builtin_LinAlg_MatrixStoreToMemory( - Mat, GsData, OFFSET, STRIDE, LAYOUT); + Mat, GsData, OFFSET / ELEM_SIZE, STRIDE / ELEM_SIZE, LAYOUT); for (uint I = 0; I < M_DIM*N_DIM; ++I) { Output.Store(I*ELEM_SIZE, GsData[I]); @@ -1592,7 +1585,6 @@ void DxilConf_SM610_LinAlg::StoreMemory_Wave_16x16_F16() { Params.Layout = LinalgMatrixLayout::RowMajor; Params.NumThreads = 64; Params.Enable16Bit = true; - Params.GroupSharedMemory = true; runStoreMemory(D3DDevice, DxcSupport, Params, VerboseLogging, /*FillValue=*/7.0f); } @@ -1623,7 +1615,7 @@ static const char AccumulateMemoryShader[] = R"( __builtin_LinAlg_FillMatrix(Mat, FILL_VALUE); __builtin_LinAlg_MatrixAccumulateToMemory( - Mat, GsData, OFFSET, STRIDE, LAYOUT); + Mat, GsData, OFFSET / ELEM_SIZE, STRIDE / ELEM_SIZE, LAYOUT); for (uint I = 0; I < M_DIM*N_DIM; ++I) { Output.Store(I*ELEM_SIZE, GsData[I]); @@ -1673,7 +1665,6 @@ void DxilConf_SM610_LinAlg::AccumulateMemory_Wave_16x16_F16() { Params.Layout = LinalgMatrixLayout::RowMajor; Params.NumThreads = 64; Params.Enable16Bit = true; - Params.GroupSharedMemory = true; runAccumulateMemory(D3DDevice, DxcSupport, Params, VerboseLogging, /*FillValue=*/7.0f); }