@@ -209,6 +209,9 @@ class ExecutionTest {
209209 TEST_METHOD (WaveIntrinsicsInPSTest);
210210 TEST_METHOD (WaveSizeTest);
211211 TEST_METHOD (WaveSizeRangeTest);
212+ TEST_METHOD (GroupSharedLimitTest);
213+ TEST_METHOD (GroupSharedLimitASTest);
214+ TEST_METHOD (GroupSharedLimitMSTest);
212215 TEST_METHOD (PartialDerivTest);
213216 TEST_METHOD (DerivativesTest);
214217 TEST_METHOD (ComputeSampleTest);
@@ -10619,6 +10622,315 @@ void ExecutionTest::WaveSizeRangeTest() {
1061910622 m_support);
1062010623}
1062110624
10625+ // Helper: create a SM 6.10 device with HLK-aware skip/fail logic.
10626+ // Returns true if device was created, false if skipped.
10627+ static bool CreateGSMLimitTestDevice (D3D12SDKSelector *D3D12SDK,
10628+ CComPtr<ID3D12Device> &Device) {
10629+ bool FailIfRequirementsNotMet = false ;
10630+ #ifdef _HLK_CONF
10631+ FailIfRequirementsNotMet = true ;
10632+ #endif
10633+ WEX::TestExecution::RuntimeParameters::TryGetValue (
10634+ L" FailIfRequirementsNotMet" , FailIfRequirementsNotMet);
10635+
10636+ const bool SkipUnsupported = !FailIfRequirementsNotMet;
10637+ if (!D3D12SDK->createDevice (&Device, D3D_SHADER_MODEL_6_10,
10638+ SkipUnsupported)) {
10639+ if (FailIfRequirementsNotMet)
10640+ LogErrorFmt (L" Device creation failed, resulting in test failure, since "
10641+ L" FailIfRequirementsNotMet is set." );
10642+ return false ;
10643+ }
10644+ return true ;
10645+ }
10646+
10647+ // Helper: run a GroupSharedLimit shader op test, read back UAV, and verify
10648+ // that the output buffer contains sequential uint values [0, GsmDwords).
10649+ static void RunGSMLimitShaderAndVerify (
10650+ ID3D12Device *Device, dxc::SpecificDllLoader &Support, LPCSTR OpName,
10651+ const char *ShaderText, UINT GsmDwords, UINT ShaderIndex,
10652+ std::shared_ptr<st::ShaderOpSet> ShaderOpSet) {
10653+ std::shared_ptr<st::ShaderOpTestResult> Test = st::RunShaderOpTestAfterParse (
10654+ Device, Support, OpName,
10655+ [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *Op) {
10656+ VERIFY_IS_TRUE ((0 == strncmp (Name, " UAVBuffer0" , 10 )));
10657+ Op->Shaders .at (ShaderIndex).Text = ShaderText;
10658+ Data.resize (sizeof (uint32_t ) * GsmDwords);
10659+ memset (Data.data (), 0 , Data.size ());
10660+ },
10661+ ShaderOpSet);
10662+
10663+ MappedData DataUav;
10664+ Test->Test ->GetReadBackData (" UAVBuffer0" , &DataUav);
10665+ const uint32_t *OutData = (const uint32_t *)DataUav.data ();
10666+
10667+ for (UINT I = 0 ; I < GsmDwords; I++) {
10668+ VERIFY_ARE_EQUAL (OutData[I], I);
10669+ }
10670+ }
10671+
10672+ void ExecutionTest::GroupSharedLimitTest () {
10673+ WEX::TestExecution::SetVerifyOutput VerifySettings (
10674+ WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
10675+
10676+ CComPtr<ID3D12Device> Device;
10677+ if (!CreateGSMLimitTestDevice (&*D3D12SDK, Device))
10678+ return ;
10679+
10680+ const UINT MaxGSMCS = getMaxGroupSharedMemoryCS (Device);
10681+ LogCommentFmt (L" Device MaxGroupSharedMemoryPerGroupCS: %u bytes" , MaxGSMCS);
10682+
10683+ // Read shader config
10684+ CComPtr<IStream> Stream;
10685+ std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
10686+ std::make_shared<st::ShaderOpSet>();
10687+ readHlslDataIntoNewStream (L" ShaderOpArith.xml" , &Stream, m_support);
10688+ st::ParseShaderOpSetFromStream (Stream, ShaderOpSet.get ());
10689+
10690+ // Test 1: GroupSharedLimit that is >= usage should succeed.
10691+ // Use 4096 DWORDs (16384 bytes) of TGSM with a limit of 16384 bytes.
10692+ {
10693+ static const UINT GSM_DWORDS = 4096 ;
10694+
10695+ LogCommentFmt (L" Test 1: GroupSharedLimit == usage (16384 bytes). "
10696+ L" Shader should compile and execute successfully." );
10697+
10698+ static const char Shader[] =
10699+ R"(
10700+ #define GSM_DWORDS 4096
10701+ #define NUM_THREADS 64
10702+ groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes
10703+ RWStructuredBuffer<uint> g_output : register(u0);
10704+
10705+ [GroupSharedLimit(16384)]
10706+ [numthreads(NUM_THREADS, 1, 1)]
10707+ void main(uint GI : SV_GroupIndex) {
10708+ for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS)
10709+ g_shared[i] = i;
10710+ GroupMemoryBarrierWithGroupSync();
10711+ if (GI == 0) {
10712+ for (uint j = 0; j < GSM_DWORDS; j++)
10713+ g_output[j] = g_shared[j];
10714+ }
10715+ })" ;
10716+
10717+ RunGSMLimitShaderAndVerify (Device, m_support, " GroupSharedLimitTest" ,
10718+ Shader, GSM_DWORDS, 0 , ShaderOpSet);
10719+ LogCommentFmt (L" Test 1 passed: GroupSharedLimit == usage succeeded." );
10720+ }
10721+
10722+ // Test 2: GroupSharedLimit and usage are larger than the default.
10723+ // Use 9216 DWORDs (36864 bytes) of TGSM, which exceeds the default 32768,
10724+ // but set GroupSharedLimit to 36864 so it should succeed.
10725+ static const UINT GSM_BYTES_TEST2 = 36864 ;
10726+ if (MaxGSMCS < GSM_BYTES_TEST2) {
10727+ LogCommentFmt (L" Test 2 skipped: device max GSM (%u) < %u bytes" , MaxGSMCS,
10728+ GSM_BYTES_TEST2);
10729+ } else {
10730+ static const UINT GSM_DWORDS = GSM_BYTES_TEST2 / sizeof (uint32_t );
10731+
10732+ LogCommentFmt (L" Test 2: GroupSharedLimit (%u) and usage (%u bytes), "
10733+ L" both above default (32768). "
10734+ L" Shader should compile and execute successfully." ,
10735+ GSM_BYTES_TEST2, GSM_BYTES_TEST2);
10736+
10737+ static const char Shader[] =
10738+ R"(
10739+ #define GSM_DWORDS 9216
10740+ #define NUM_THREADS 64
10741+ groupshared uint g_shared[GSM_DWORDS]; // 36864 bytes
10742+ RWStructuredBuffer<uint> g_output : register(u0);
10743+
10744+ [GroupSharedLimit(36864)]
10745+ [numthreads(NUM_THREADS, 1, 1)]
10746+ void main(uint GI : SV_GroupIndex) {
10747+ for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS)
10748+ g_shared[i] = i;
10749+ GroupMemoryBarrierWithGroupSync();
10750+ if (GI == 0) {
10751+ for (uint j = 0; j < GSM_DWORDS; j++)
10752+ g_output[j] = g_shared[j];
10753+ }
10754+ })" ;
10755+
10756+ RunGSMLimitShaderAndVerify (Device, m_support, " GroupSharedLimitTest" ,
10757+ Shader, GSM_DWORDS, 0 , ShaderOpSet);
10758+ LogCommentFmt (L" Test 2 passed: GroupSharedLimit > default succeeded." );
10759+ }
10760+
10761+ // Test 3: No GroupSharedLimit attribute, usage within default (32768 bytes).
10762+ // The shader should use default limit and succeed.
10763+ {
10764+ static const UINT GSM_DWORDS = 8192 ;
10765+
10766+ LogCommentFmt (L" Test 3: No GroupSharedLimit, usage (32768 bytes) <= "
10767+ L" default limit. Shader should succeed." );
10768+
10769+ static const char Shader[] =
10770+ R"(
10771+ #define GSM_DWORDS 8192
10772+ #define NUM_THREADS 64
10773+ groupshared uint g_shared[GSM_DWORDS]; // 32768 bytes (default max)
10774+ RWStructuredBuffer<uint> g_output : register(u0);
10775+
10776+ [numthreads(NUM_THREADS, 1, 1)]
10777+ void main(uint GI : SV_GroupIndex) {
10778+ for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS)
10779+ g_shared[i] = i;
10780+ GroupMemoryBarrierWithGroupSync();
10781+ if (GI == 0) {
10782+ for (uint j = 0; j < GSM_DWORDS; j++)
10783+ g_output[j] = g_shared[j];
10784+ }
10785+ })" ;
10786+
10787+ RunGSMLimitShaderAndVerify (Device, m_support, " GroupSharedLimitTest" ,
10788+ Shader, GSM_DWORDS, 0 , ShaderOpSet);
10789+ LogCommentFmt (L" Test 3 passed: No attribute with default usage succeeded." );
10790+ }
10791+ }
10792+
10793+ void ExecutionTest::GroupSharedLimitASTest () {
10794+ WEX::TestExecution::SetVerifyOutput VerifySettings (
10795+ WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
10796+
10797+ CComPtr<ID3D12Device> Device;
10798+ if (!CreateGSMLimitTestDevice (&*D3D12SDK, Device))
10799+ return ;
10800+
10801+ if (!doesDeviceSupportMeshShaders (Device)) {
10802+ LogCommentFmt (L" Device does not support mesh shaders, skipping." );
10803+ WEX::Logging::Log::Result (WEX::Logging::TestResults::Skipped);
10804+ return ;
10805+ }
10806+
10807+ const UINT MaxGSMAS = getMaxGroupSharedMemoryAS (Device);
10808+ LogCommentFmt (L" Device MaxGroupSharedMemoryPerGroupAS: %u bytes" , MaxGSMAS);
10809+
10810+ CComPtr<IStream> Stream;
10811+ std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
10812+ std::make_shared<st::ShaderOpSet>();
10813+ readHlslDataIntoNewStream (L" ShaderOpArith.xml" , &Stream, m_support);
10814+ st::ParseShaderOpSetFromStream (Stream, ShaderOpSet.get ());
10815+
10816+ // Test: AS shader fills groupshared memory and writes to UAV.
10817+ {
10818+ static const UINT GSM_DWORDS = 4096 ;
10819+
10820+ LogCommentFmt (L" AS Test: GroupSharedLimit == usage (16384 bytes). "
10821+ L" Amplification shader should compile and execute." );
10822+
10823+ static const char Shader[] =
10824+ R"(
10825+ struct Payload { uint dummy; };
10826+
10827+ #define GSM_DWORDS 4096
10828+ groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes
10829+ RWStructuredBuffer<uint> g_output : register(u0);
10830+
10831+ [GroupSharedLimit(16384)]
10832+ [numthreads(64, 1, 1)]
10833+ void ASMain(uint GI : SV_GroupIndex) {
10834+ for (uint i = GI; i < GSM_DWORDS; i += 64)
10835+ g_shared[i] = i;
10836+ GroupMemoryBarrierWithGroupSync();
10837+ if (GI == 0) {
10838+ for (uint j = 0; j < GSM_DWORDS; j++)
10839+ g_output[j] = g_shared[j];
10840+ }
10841+ Payload payload;
10842+ payload.dummy = 0;
10843+ DispatchMesh(1, 1, 1, payload);
10844+ }
10845+
10846+ struct MeshOutput {
10847+ float4 pos : SV_Position;
10848+ };
10849+
10850+ [OutputTopology("triangle")]
10851+ [numthreads(1, 1, 1)]
10852+ void MSMain(in payload Payload p,
10853+ out vertices MeshOutput verts[3],
10854+ out indices uint3 tris[1]) {
10855+ SetMeshOutputCounts(0, 0);
10856+ }
10857+
10858+ float4 PSMain() : SV_Target { return float4(0,0,0,0); }
10859+ )" ;
10860+
10861+ RunGSMLimitShaderAndVerify (Device, m_support, " GroupSharedLimitASTest" ,
10862+ Shader, GSM_DWORDS, 0 , ShaderOpSet);
10863+ LogCommentFmt (
10864+ L" AS Test passed: GroupSharedLimit in amplification shader succeeded." );
10865+ }
10866+ }
10867+
10868+ void ExecutionTest::GroupSharedLimitMSTest () {
10869+ WEX::TestExecution::SetVerifyOutput VerifySettings (
10870+ WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
10871+
10872+ CComPtr<ID3D12Device> Device;
10873+ if (!CreateGSMLimitTestDevice (&*D3D12SDK, Device))
10874+ return ;
10875+
10876+ if (!doesDeviceSupportMeshShaders (Device)) {
10877+ LogCommentFmt (L" Device does not support mesh shaders, skipping." );
10878+ WEX::Logging::Log::Result (WEX::Logging::TestResults::Skipped);
10879+ return ;
10880+ }
10881+
10882+ const UINT MaxGSMMS = getMaxGroupSharedMemoryMS (Device);
10883+ LogCommentFmt (L" Device MaxGroupSharedMemoryPerGroupMS: %u bytes" , MaxGSMMS);
10884+
10885+ CComPtr<IStream> Stream;
10886+ std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
10887+ std::make_shared<st::ShaderOpSet>();
10888+ readHlslDataIntoNewStream (L" ShaderOpArith.xml" , &Stream, m_support);
10889+ st::ParseShaderOpSetFromStream (Stream, ShaderOpSet.get ());
10890+
10891+ // Test: MS shader fills groupshared memory and writes to UAV.
10892+ {
10893+ static const UINT GSM_DWORDS = 4096 ;
10894+
10895+ LogCommentFmt (L" MS Test: GroupSharedLimit == usage (16384 bytes). "
10896+ L" Mesh shader should compile and execute." );
10897+
10898+ static const char Shader[] =
10899+ R"(
10900+ #define GSM_DWORDS 4096
10901+ groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes
10902+ RWStructuredBuffer<uint> g_output : register(u0);
10903+
10904+ struct MeshOutput {
10905+ float4 pos : SV_Position;
10906+ };
10907+
10908+ [GroupSharedLimit(16384)]
10909+ [OutputTopology("triangle")]
10910+ [numthreads(64, 1, 1)]
10911+ void MSMain(uint GI : SV_GroupIndex,
10912+ out vertices MeshOutput verts[3],
10913+ out indices uint3 tris[1]) {
10914+ SetMeshOutputCounts(0, 0);
10915+ for (uint i = GI; i < GSM_DWORDS; i += 64)
10916+ g_shared[i] = i;
10917+ GroupMemoryBarrierWithGroupSync();
10918+ if (GI == 0) {
10919+ for (uint j = 0; j < GSM_DWORDS; j++)
10920+ g_output[j] = g_shared[j];
10921+ }
10922+ }
10923+
10924+ float4 PSMain() : SV_Target { return float4(0,0,0,0); }
10925+ )" ;
10926+
10927+ RunGSMLimitShaderAndVerify (Device, m_support, " GroupSharedLimitMSTest" ,
10928+ Shader, GSM_DWORDS, 0 , ShaderOpSet);
10929+ LogCommentFmt (
10930+ L" MS Test passed: GroupSharedLimit in mesh shader succeeded." );
10931+ }
10932+ }
10933+
1062210934// Atomic operation testing
1062310935
1062410936// Atomic tests take a single integer index as input and contort it into some
0 commit comments