From 60ea20f7c23eae9b9b4f72b7ed9e50a9bf7a86dc Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 9 Apr 2026 15:52:21 -0600 Subject: [PATCH 1/6] [SM6.10][HLK] Fix GetElement test, add tranpose to helper --- .../clang/unittests/HLSLExec/LinAlgTests.cpp | 82 +++++++++++-------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp index 2e4ce65d57..46b4dc15b3 100644 --- a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp +++ b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp @@ -228,37 +228,47 @@ static bool fillInputBuffer(LPCSTR Name, std::vector &Data, return false; } -static VariantCompType makeExpected(ComponentType CompType, size_t NumElements, - float StartingVal, bool Increment) { - switch (CompType) { - case ComponentType::F32: { - std::vector Floats(NumElements); - for (size_t I = 0; I < NumElements; I++) - Floats[I] = StartingVal + static_cast(Increment ? I : 0); - return Floats; - } - case ComponentType::I32: { - DXASSERT(StartingVal < static_cast(INT_MAX), - "Value too large to cast to int32_t"); - std::vector Ints(NumElements); - for (size_t I = 0; I < NumElements; I++) - Ints[I] = static_cast(StartingVal) + - static_cast(Increment ? I : 0); - return Ints; - } - case ComponentType::F16: { - std::vector Halfs(NumElements); - for (size_t I = 0; I < NumElements; I++) { - // Downcasting is safe here since HLSLHalf_t will clamp if F is too large. - float F = StartingVal + static_cast(Increment ? I : 0); - Halfs[I] = HLSLHalf_t(F); +static VariantCompType makeExpected(ComponentType CompType, int32_t M, int32_t N, + float StartingVal, bool Increment = true, bool Transpose = false) { + int32_t NumElements = M * N; + std::vector Floats(NumElements); + std::vector Ints(NumElements); + std::vector Halfs(NumElements); + + for (int32_t I = 0; I < M; ++I) { + for (int32_t J = 0; J < M; ++J) { + int32_t Value = I * M + J; + int32_t Idx = Transpose ? J * N + I : Value; + switch (CompType) { + case ComponentType::F32: + Floats[Idx] = StartingVal + static_cast(Increment ? Value : 0); + break; + case ComponentType::I32: + DXASSERT(StartingVal < static_cast(INT_MAX), + "Value too large to cast to int32_t"); + Ints[Idx] = static_cast(StartingVal) + (Increment ? Value : 0); + break; + case ComponentType::F16: { + // Downcasting is safe here since HLSLHalf_t will clamp if F is too large. + float F = StartingVal + static_cast(Increment ? Value : 0); + Halfs[Idx] = HLSLHalf_t(F); + break; + } + } } - return Halfs; - } } - DXASSERT(false, "Unable to fill unexpected ComponentType"); - return std::vector(); + switch (CompType) { + case ComponentType::F32: + return Floats; + case ComponentType::I32: + return Ints; + case ComponentType::F16: + return Halfs; + default: + DXASSERT(false, "Unable to fill unexpected ComponentType"); + return Floats; + } } static void logCompiledButSkipping() { @@ -429,7 +439,7 @@ static void runLoadStoreRoundtrip(ID3D12Device *Device, return; } - auto Expected = makeExpected(Params.CompType, NumElements, 1, true); + auto Expected = makeExpected(Params.CompType, Params.M, Params.N, 1); // Construct the ShaderOp: two UAV buffers, load from one, store to other. auto Op = createComputeOp(LoadStoreShader, Target.c_str(), "UAV(u0), UAV(u1)", @@ -517,7 +527,7 @@ static void runSplatStore(ID3D12Device *Device, return; } - auto Expected = makeExpected(Params.CompType, NumElements, FillValue, false); + auto Expected = makeExpected(Params.CompType, Params.M, Params.N, FillValue, false); auto Op = createComputeOp(SplatStoreShader, Target.c_str(), "UAV(u0)", Args.c_str()); @@ -553,11 +563,13 @@ static const char ElementAccessShader[] = R"( RWByteAddressBuffer Output : register(u1); // flatten the 2D index into a 1D index then scale by element size + // Always store row-major and work it out in the test runner uint coordToByteOffset(uint2 coord) { - return (coord.x * MAJOR_DIM + coord.y) * ELEM_SIZE; + return (coord.y * N_DIM + coord.x) * ELEM_SIZE; } #ifndef EMULATE_TEST + [WaveSize(4, 64)] [numthreads(NUMTHREADS, 1, 1)] void main(uint threadIndex : SV_GroupIndex) { __builtin_LinAlgMatrix @@ -605,8 +617,7 @@ static void runElementAccess(ID3D12Device *Device, const size_t NumThreads = Params.NumThreads; const size_t InputBufSize = Params.totalBytes(); const size_t ElementSize = elementSize(Params.CompType); - const size_t MajorDim = - Params.Layout == LinalgMatrixLayout::RowMajor ? Params.M : Params.N; + // Output: ElementSize bytes per element // 1 element for each mat idx // 1 uint for each thread's length @@ -618,7 +629,6 @@ static void runElementAccess(ID3D12Device *Device, Target = "cs_6_8"; std::stringstream ExtraDefs; - ExtraDefs << " -DMAJOR_DIM=" << MajorDim; std::string Args = buildCompilerArgs(Params, ExtraDefs.str().c_str()); compileShader(DxcSupport, ElementAccessShader, Target.c_str(), Args, Verbose); @@ -628,7 +638,7 @@ static void runElementAccess(ID3D12Device *Device, return; } - auto Expected = makeExpected(Params.CompType, NumElements, 1, true); + auto Expected = makeExpected(Params.CompType, Params.M, Params.N, 1); auto Op = createComputeOp(ElementAccessShader, Target.c_str(), "UAV(u0), UAV(u1)", Args.c_str()); @@ -674,7 +684,7 @@ void DxilConf_SM610_LinAlg::ElementAccess_Wave_16x16_F16() { Params.Use = MatrixUse::Accumulator; Params.Scope = MatrixScope::Wave; Params.Layout = LinalgMatrixLayout::RowMajor; - Params.NumThreads = 4; + Params.NumThreads = 64; Params.Enable16Bit = true; Params.EmulateTest = EmulateTest; runElementAccess(D3DDevice, DxcSupport, Params, VerboseLogging, CompileOnly); From db5c4dabc354cc0e383ce8cf2fbbb5da487ad390 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 9 Apr 2026 22:02:33 +0000 Subject: [PATCH 2/6] chore: autopublish 2026-04-09T22:02:33Z --- .../clang/unittests/HLSLExec/LinAlgTests.cpp | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp index 46b4dc15b3..1b8dbe8800 100644 --- a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp +++ b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp @@ -228,8 +228,10 @@ static bool fillInputBuffer(LPCSTR Name, std::vector &Data, return false; } -static VariantCompType makeExpected(ComponentType CompType, int32_t M, int32_t N, - float StartingVal, bool Increment = true, bool Transpose = false) { +static VariantCompType makeExpected(ComponentType CompType, int32_t M, + int32_t N, float StartingVal, + bool Increment = true, + bool Transpose = false) { int32_t NumElements = M * N; std::vector Floats(NumElements); std::vector Ints(NumElements); @@ -249,7 +251,8 @@ static VariantCompType makeExpected(ComponentType CompType, int32_t M, int32_t N Ints[Idx] = static_cast(StartingVal) + (Increment ? Value : 0); break; case ComponentType::F16: { - // Downcasting is safe here since HLSLHalf_t will clamp if F is too large. + // Downcasting is safe here since HLSLHalf_t will clamp if F is too + // large. float F = StartingVal + static_cast(Increment ? Value : 0); Halfs[Idx] = HLSLHalf_t(F); break; @@ -259,15 +262,15 @@ static VariantCompType makeExpected(ComponentType CompType, int32_t M, int32_t N } switch (CompType) { - case ComponentType::F32: - return Floats; - case ComponentType::I32: - return Ints; - case ComponentType::F16: - return Halfs; - default: - DXASSERT(false, "Unable to fill unexpected ComponentType"); - return Floats; + case ComponentType::F32: + return Floats; + case ComponentType::I32: + return Ints; + case ComponentType::F16: + return Halfs; + default: + DXASSERT(false, "Unable to fill unexpected ComponentType"); + return Floats; } } @@ -527,7 +530,8 @@ static void runSplatStore(ID3D12Device *Device, return; } - auto Expected = makeExpected(Params.CompType, Params.M, Params.N, FillValue, false); + auto Expected = + makeExpected(Params.CompType, Params.M, Params.N, FillValue, false); auto Op = createComputeOp(SplatStoreShader, Target.c_str(), "UAV(u0)", Args.c_str()); From 9cc7e5464d4a600d6e1c0eb0afa94ffbedcb714b Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 9 Apr 2026 16:34:23 -0600 Subject: [PATCH 3/6] address comments --- tools/clang/unittests/HLSLExec/LinAlgTests.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp index 1b8dbe8800..fe068093ae 100644 --- a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp +++ b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp @@ -238,7 +238,7 @@ static VariantCompType makeExpected(ComponentType CompType, int32_t M, std::vector Halfs(NumElements); for (int32_t I = 0; I < M; ++I) { - for (int32_t J = 0; J < M; ++J) { + for (int32_t J = 0; J < N; ++J) { int32_t Value = I * M + J; int32_t Idx = Transpose ? J * N + I : Value; switch (CompType) { @@ -397,6 +397,7 @@ static const char LoadStoreShader[] = R"( RWByteAddressBuffer Output : register(u1); #ifndef EMULATE_TEST + [WaveSize(4, 64)] [numthreads(NUMTHREADS, 1, 1)] void main() { __builtin_LinAlgMatrix @@ -476,7 +477,7 @@ void DxilConf_SM610_LinAlg::LoadStoreRoundtrip_Wave_16x16_F16() { Params.Use = MatrixUse::A; Params.Scope = MatrixScope::Wave; Params.Layout = LinalgMatrixLayout::RowMajor; - Params.NumThreads = 4; + Params.NumThreads = 64; Params.Enable16Bit = true; Params.EmulateTest = EmulateTest; runLoadStoreRoundtrip(D3DDevice, DxcSupport, Params, VerboseLogging, @@ -487,6 +488,7 @@ static const char SplatStoreShader[] = R"( RWByteAddressBuffer Output : register(u0); #ifndef EMULATE_TEST + [WaveSize(4, 64)] [numthreads(NUMTHREADS, 1, 1)] void main() { __builtin_LinAlgMatrix @@ -555,7 +557,7 @@ void DxilConf_SM610_LinAlg::SplatStore_Wave_16x16_F16() { Params.Use = MatrixUse::Accumulator; Params.Scope = MatrixScope::Wave; Params.Layout = LinalgMatrixLayout::RowMajor; - Params.NumThreads = 4; + Params.NumThreads = 64; Params.Enable16Bit = true; Params.EmulateTest = EmulateTest; runSplatStore(D3DDevice, DxcSupport, Params, 42.0f, VerboseLogging, From 45bf58893a9d8f44eb03a2815b7d1da8664c7469 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 9 Apr 2026 17:49:56 -0600 Subject: [PATCH 4/6] Address comments --- .../clang/unittests/HLSLExec/LinAlgTests.cpp | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp index fe068093ae..44981cd06f 100644 --- a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp +++ b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp @@ -232,23 +232,25 @@ static VariantCompType makeExpected(ComponentType CompType, int32_t M, int32_t N, float StartingVal, bool Increment = true, bool Transpose = false) { - int32_t NumElements = M * N; + const size_t NumElements = M * N; std::vector Floats(NumElements); std::vector Ints(NumElements); std::vector Halfs(NumElements); - for (int32_t I = 0; I < M; ++I) { - for (int32_t J = 0; J < N; ++J) { - int32_t Value = I * M + J; - int32_t Idx = Transpose ? J * N + I : Value; + for (size_t I = 0; I < M; ++I) { + for (size_t J = 0; J < N; ++J) { + size_t Value = I * M + J; + size_t Idx = Transpose ? J * N + I : Value; switch (CompType) { case ComponentType::F32: Floats[Idx] = StartingVal + static_cast(Increment ? Value : 0); break; case ComponentType::I32: - DXASSERT(StartingVal < static_cast(INT_MAX), + VERIFY_IS_TRUE(StartingVal < static_cast(std::numeric_limits::max()), "Value too large to cast to int32_t"); - Ints[Idx] = static_cast(StartingVal) + (Increment ? Value : 0); + VERIFY_IS_TRUE(StartingVal > static_cast(std::numeric_limits::min()), + "Value too small to cast to int32_t"); + Ints[Idx] = static_cast(StartingVal) + static_cast(Increment ? Value : 0); break; case ComponentType::F16: { // Downcasting is safe here since HLSLHalf_t will clamp if F is too @@ -257,6 +259,8 @@ static VariantCompType makeExpected(ComponentType CompType, int32_t M, Halfs[Idx] = HLSLHalf_t(F); break; } + default: + VERIFY_IS_TRUE(false, "Unable to fill unexpected ComponentType"); } } } @@ -269,7 +273,7 @@ static VariantCompType makeExpected(ComponentType CompType, int32_t M, case ComponentType::F16: return Halfs; default: - DXASSERT(false, "Unable to fill unexpected ComponentType"); + VERIFY_IS_TRUE(false, "Unable to fill unexpected ComponentType"); return Floats; } } From 41ae95cce6fb37b447b332e7ef184dc9be0e4d32 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 9 Apr 2026 17:52:56 -0600 Subject: [PATCH 5/6] missing break --- tools/clang/unittests/HLSLExec/LinAlgTests.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp index 44981cd06f..4146da0cb2 100644 --- a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp +++ b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp @@ -261,6 +261,7 @@ static VariantCompType makeExpected(ComponentType CompType, int32_t M, } default: VERIFY_IS_TRUE(false, "Unable to fill unexpected ComponentType"); + break; } } } From 6906edbd39ded993618c3093d8a50d42bf5fc5df Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 9 Apr 2026 23:57:40 +0000 Subject: [PATCH 6/6] chore: autopublish 2026-04-09T23:57:39Z --- tools/clang/unittests/HLSLExec/LinAlgTests.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp index 4146da0cb2..6eb637cdcd 100644 --- a/tools/clang/unittests/HLSLExec/LinAlgTests.cpp +++ b/tools/clang/unittests/HLSLExec/LinAlgTests.cpp @@ -246,11 +246,14 @@ static VariantCompType makeExpected(ComponentType CompType, int32_t M, Floats[Idx] = StartingVal + static_cast(Increment ? Value : 0); break; case ComponentType::I32: - VERIFY_IS_TRUE(StartingVal < static_cast(std::numeric_limits::max()), - "Value too large to cast to int32_t"); - VERIFY_IS_TRUE(StartingVal > static_cast(std::numeric_limits::min()), - "Value too small to cast to int32_t"); - Ints[Idx] = static_cast(StartingVal) + static_cast(Increment ? Value : 0); + VERIFY_IS_TRUE(StartingVal < static_cast( + std::numeric_limits::max()), + "Value too large to cast to int32_t"); + VERIFY_IS_TRUE(StartingVal > static_cast( + std::numeric_limits::min()), + "Value too small to cast to int32_t"); + Ints[Idx] = static_cast(StartingVal) + + static_cast(Increment ? Value : 0); break; case ComponentType::F16: { // Downcasting is safe here since HLSLHalf_t will clamp if F is too