Skip to content

Commit 400ceb4

Browse files
authored
Initial HLK test for GroupSharedLimit (#8160)
1 parent e6e3717 commit 400ceb4

4 files changed

Lines changed: 427 additions & 2 deletions

File tree

tools/clang/unittests/HLSLExec/ExecutionTest.cpp

Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ class ExecutionTest {
209209
TEST_METHOD(WaveIntrinsicsInPSTest);
210210
TEST_METHOD(WaveSizeTest);
211211
TEST_METHOD(WaveSizeRangeTest);
212+
TEST_METHOD(GroupSharedLimitTest);
213+
TEST_METHOD(GroupSharedLimitASTest);
214+
TEST_METHOD(GroupSharedLimitMSTest);
212215
TEST_METHOD(PartialDerivTest);
213216
TEST_METHOD(DerivativesTest);
214217
TEST_METHOD(ComputeSampleTest);
@@ -10619,6 +10622,315 @@ void ExecutionTest::WaveSizeRangeTest() {
1061910622
m_support);
1062010623
}
1062110624

10625+
// Helper: create a SM 6.10 device with HLK-aware skip/fail logic.
10626+
// Returns true if device was created, false if skipped.
10627+
static bool CreateGSMLimitTestDevice(D3D12SDKSelector *D3D12SDK,
10628+
CComPtr<ID3D12Device> &Device) {
10629+
bool FailIfRequirementsNotMet = false;
10630+
#ifdef _HLK_CONF
10631+
FailIfRequirementsNotMet = true;
10632+
#endif
10633+
WEX::TestExecution::RuntimeParameters::TryGetValue(
10634+
L"FailIfRequirementsNotMet", FailIfRequirementsNotMet);
10635+
10636+
const bool SkipUnsupported = !FailIfRequirementsNotMet;
10637+
if (!D3D12SDK->createDevice(&Device, D3D_SHADER_MODEL_6_10,
10638+
SkipUnsupported)) {
10639+
if (FailIfRequirementsNotMet)
10640+
LogErrorFmt(L"Device creation failed, resulting in test failure, since "
10641+
L"FailIfRequirementsNotMet is set.");
10642+
return false;
10643+
}
10644+
return true;
10645+
}
10646+
10647+
// Helper: run a GroupSharedLimit shader op test, read back UAV, and verify
10648+
// that the output buffer contains sequential uint values [0, GsmDwords).
10649+
static void RunGSMLimitShaderAndVerify(
10650+
ID3D12Device *Device, dxc::SpecificDllLoader &Support, LPCSTR OpName,
10651+
const char *ShaderText, UINT GsmDwords, UINT ShaderIndex,
10652+
std::shared_ptr<st::ShaderOpSet> ShaderOpSet) {
10653+
std::shared_ptr<st::ShaderOpTestResult> Test = st::RunShaderOpTestAfterParse(
10654+
Device, Support, OpName,
10655+
[&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *Op) {
10656+
VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10)));
10657+
Op->Shaders.at(ShaderIndex).Text = ShaderText;
10658+
Data.resize(sizeof(uint32_t) * GsmDwords);
10659+
memset(Data.data(), 0, Data.size());
10660+
},
10661+
ShaderOpSet);
10662+
10663+
MappedData DataUav;
10664+
Test->Test->GetReadBackData("UAVBuffer0", &DataUav);
10665+
const uint32_t *OutData = (const uint32_t *)DataUav.data();
10666+
10667+
for (UINT I = 0; I < GsmDwords; I++) {
10668+
VERIFY_ARE_EQUAL(OutData[I], I);
10669+
}
10670+
}
10671+
10672+
void ExecutionTest::GroupSharedLimitTest() {
10673+
WEX::TestExecution::SetVerifyOutput VerifySettings(
10674+
WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
10675+
10676+
CComPtr<ID3D12Device> Device;
10677+
if (!CreateGSMLimitTestDevice(&*D3D12SDK, Device))
10678+
return;
10679+
10680+
const UINT MaxGSMCS = getMaxGroupSharedMemoryCS(Device);
10681+
LogCommentFmt(L"Device MaxGroupSharedMemoryPerGroupCS: %u bytes", MaxGSMCS);
10682+
10683+
// Read shader config
10684+
CComPtr<IStream> Stream;
10685+
std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
10686+
std::make_shared<st::ShaderOpSet>();
10687+
readHlslDataIntoNewStream(L"ShaderOpArith.xml", &Stream, m_support);
10688+
st::ParseShaderOpSetFromStream(Stream, ShaderOpSet.get());
10689+
10690+
// Test 1: GroupSharedLimit that is >= usage should succeed.
10691+
// Use 4096 DWORDs (16384 bytes) of TGSM with a limit of 16384 bytes.
10692+
{
10693+
static const UINT GSM_DWORDS = 4096;
10694+
10695+
LogCommentFmt(L"Test 1: GroupSharedLimit == usage (16384 bytes). "
10696+
L"Shader should compile and execute successfully.");
10697+
10698+
static const char Shader[] =
10699+
R"(
10700+
#define GSM_DWORDS 4096
10701+
#define NUM_THREADS 64
10702+
groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes
10703+
RWStructuredBuffer<uint> g_output : register(u0);
10704+
10705+
[GroupSharedLimit(16384)]
10706+
[numthreads(NUM_THREADS, 1, 1)]
10707+
void main(uint GI : SV_GroupIndex) {
10708+
for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS)
10709+
g_shared[i] = i;
10710+
GroupMemoryBarrierWithGroupSync();
10711+
if (GI == 0) {
10712+
for (uint j = 0; j < GSM_DWORDS; j++)
10713+
g_output[j] = g_shared[j];
10714+
}
10715+
})";
10716+
10717+
RunGSMLimitShaderAndVerify(Device, m_support, "GroupSharedLimitTest",
10718+
Shader, GSM_DWORDS, 0, ShaderOpSet);
10719+
LogCommentFmt(L"Test 1 passed: GroupSharedLimit == usage succeeded.");
10720+
}
10721+
10722+
// Test 2: GroupSharedLimit and usage are larger than the default.
10723+
// Use 9216 DWORDs (36864 bytes) of TGSM, which exceeds the default 32768,
10724+
// but set GroupSharedLimit to 36864 so it should succeed.
10725+
static const UINT GSM_BYTES_TEST2 = 36864;
10726+
if (MaxGSMCS < GSM_BYTES_TEST2) {
10727+
LogCommentFmt(L"Test 2 skipped: device max GSM (%u) < %u bytes", MaxGSMCS,
10728+
GSM_BYTES_TEST2);
10729+
} else {
10730+
static const UINT GSM_DWORDS = GSM_BYTES_TEST2 / sizeof(uint32_t);
10731+
10732+
LogCommentFmt(L"Test 2: GroupSharedLimit (%u) and usage (%u bytes), "
10733+
L"both above default (32768). "
10734+
L"Shader should compile and execute successfully.",
10735+
GSM_BYTES_TEST2, GSM_BYTES_TEST2);
10736+
10737+
static const char Shader[] =
10738+
R"(
10739+
#define GSM_DWORDS 9216
10740+
#define NUM_THREADS 64
10741+
groupshared uint g_shared[GSM_DWORDS]; // 36864 bytes
10742+
RWStructuredBuffer<uint> g_output : register(u0);
10743+
10744+
[GroupSharedLimit(36864)]
10745+
[numthreads(NUM_THREADS, 1, 1)]
10746+
void main(uint GI : SV_GroupIndex) {
10747+
for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS)
10748+
g_shared[i] = i;
10749+
GroupMemoryBarrierWithGroupSync();
10750+
if (GI == 0) {
10751+
for (uint j = 0; j < GSM_DWORDS; j++)
10752+
g_output[j] = g_shared[j];
10753+
}
10754+
})";
10755+
10756+
RunGSMLimitShaderAndVerify(Device, m_support, "GroupSharedLimitTest",
10757+
Shader, GSM_DWORDS, 0, ShaderOpSet);
10758+
LogCommentFmt(L"Test 2 passed: GroupSharedLimit > default succeeded.");
10759+
}
10760+
10761+
// Test 3: No GroupSharedLimit attribute, usage within default (32768 bytes).
10762+
// The shader should use default limit and succeed.
10763+
{
10764+
static const UINT GSM_DWORDS = 8192;
10765+
10766+
LogCommentFmt(L"Test 3: No GroupSharedLimit, usage (32768 bytes) <= "
10767+
L"default limit. Shader should succeed.");
10768+
10769+
static const char Shader[] =
10770+
R"(
10771+
#define GSM_DWORDS 8192
10772+
#define NUM_THREADS 64
10773+
groupshared uint g_shared[GSM_DWORDS]; // 32768 bytes (default max)
10774+
RWStructuredBuffer<uint> g_output : register(u0);
10775+
10776+
[numthreads(NUM_THREADS, 1, 1)]
10777+
void main(uint GI : SV_GroupIndex) {
10778+
for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS)
10779+
g_shared[i] = i;
10780+
GroupMemoryBarrierWithGroupSync();
10781+
if (GI == 0) {
10782+
for (uint j = 0; j < GSM_DWORDS; j++)
10783+
g_output[j] = g_shared[j];
10784+
}
10785+
})";
10786+
10787+
RunGSMLimitShaderAndVerify(Device, m_support, "GroupSharedLimitTest",
10788+
Shader, GSM_DWORDS, 0, ShaderOpSet);
10789+
LogCommentFmt(L"Test 3 passed: No attribute with default usage succeeded.");
10790+
}
10791+
}
10792+
10793+
void ExecutionTest::GroupSharedLimitASTest() {
10794+
WEX::TestExecution::SetVerifyOutput VerifySettings(
10795+
WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
10796+
10797+
CComPtr<ID3D12Device> Device;
10798+
if (!CreateGSMLimitTestDevice(&*D3D12SDK, Device))
10799+
return;
10800+
10801+
if (!doesDeviceSupportMeshShaders(Device)) {
10802+
LogCommentFmt(L"Device does not support mesh shaders, skipping.");
10803+
WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped);
10804+
return;
10805+
}
10806+
10807+
const UINT MaxGSMAS = getMaxGroupSharedMemoryAS(Device);
10808+
LogCommentFmt(L"Device MaxGroupSharedMemoryPerGroupAS: %u bytes", MaxGSMAS);
10809+
10810+
CComPtr<IStream> Stream;
10811+
std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
10812+
std::make_shared<st::ShaderOpSet>();
10813+
readHlslDataIntoNewStream(L"ShaderOpArith.xml", &Stream, m_support);
10814+
st::ParseShaderOpSetFromStream(Stream, ShaderOpSet.get());
10815+
10816+
// Test: AS shader fills groupshared memory and writes to UAV.
10817+
{
10818+
static const UINT GSM_DWORDS = 4096;
10819+
10820+
LogCommentFmt(L"AS Test: GroupSharedLimit == usage (16384 bytes). "
10821+
L"Amplification shader should compile and execute.");
10822+
10823+
static const char Shader[] =
10824+
R"(
10825+
struct Payload { uint dummy; };
10826+
10827+
#define GSM_DWORDS 4096
10828+
groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes
10829+
RWStructuredBuffer<uint> g_output : register(u0);
10830+
10831+
[GroupSharedLimit(16384)]
10832+
[numthreads(64, 1, 1)]
10833+
void ASMain(uint GI : SV_GroupIndex) {
10834+
for (uint i = GI; i < GSM_DWORDS; i += 64)
10835+
g_shared[i] = i;
10836+
GroupMemoryBarrierWithGroupSync();
10837+
if (GI == 0) {
10838+
for (uint j = 0; j < GSM_DWORDS; j++)
10839+
g_output[j] = g_shared[j];
10840+
}
10841+
Payload payload;
10842+
payload.dummy = 0;
10843+
DispatchMesh(1, 1, 1, payload);
10844+
}
10845+
10846+
struct MeshOutput {
10847+
float4 pos : SV_Position;
10848+
};
10849+
10850+
[OutputTopology("triangle")]
10851+
[numthreads(1, 1, 1)]
10852+
void MSMain(in payload Payload p,
10853+
out vertices MeshOutput verts[3],
10854+
out indices uint3 tris[1]) {
10855+
SetMeshOutputCounts(0, 0);
10856+
}
10857+
10858+
float4 PSMain() : SV_Target { return float4(0,0,0,0); }
10859+
)";
10860+
10861+
RunGSMLimitShaderAndVerify(Device, m_support, "GroupSharedLimitASTest",
10862+
Shader, GSM_DWORDS, 0, ShaderOpSet);
10863+
LogCommentFmt(
10864+
L"AS Test passed: GroupSharedLimit in amplification shader succeeded.");
10865+
}
10866+
}
10867+
10868+
void ExecutionTest::GroupSharedLimitMSTest() {
10869+
WEX::TestExecution::SetVerifyOutput VerifySettings(
10870+
WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
10871+
10872+
CComPtr<ID3D12Device> Device;
10873+
if (!CreateGSMLimitTestDevice(&*D3D12SDK, Device))
10874+
return;
10875+
10876+
if (!doesDeviceSupportMeshShaders(Device)) {
10877+
LogCommentFmt(L"Device does not support mesh shaders, skipping.");
10878+
WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped);
10879+
return;
10880+
}
10881+
10882+
const UINT MaxGSMMS = getMaxGroupSharedMemoryMS(Device);
10883+
LogCommentFmt(L"Device MaxGroupSharedMemoryPerGroupMS: %u bytes", MaxGSMMS);
10884+
10885+
CComPtr<IStream> Stream;
10886+
std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
10887+
std::make_shared<st::ShaderOpSet>();
10888+
readHlslDataIntoNewStream(L"ShaderOpArith.xml", &Stream, m_support);
10889+
st::ParseShaderOpSetFromStream(Stream, ShaderOpSet.get());
10890+
10891+
// Test: MS shader fills groupshared memory and writes to UAV.
10892+
{
10893+
static const UINT GSM_DWORDS = 4096;
10894+
10895+
LogCommentFmt(L"MS Test: GroupSharedLimit == usage (16384 bytes). "
10896+
L"Mesh shader should compile and execute.");
10897+
10898+
static const char Shader[] =
10899+
R"(
10900+
#define GSM_DWORDS 4096
10901+
groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes
10902+
RWStructuredBuffer<uint> g_output : register(u0);
10903+
10904+
struct MeshOutput {
10905+
float4 pos : SV_Position;
10906+
};
10907+
10908+
[GroupSharedLimit(16384)]
10909+
[OutputTopology("triangle")]
10910+
[numthreads(64, 1, 1)]
10911+
void MSMain(uint GI : SV_GroupIndex,
10912+
out vertices MeshOutput verts[3],
10913+
out indices uint3 tris[1]) {
10914+
SetMeshOutputCounts(0, 0);
10915+
for (uint i = GI; i < GSM_DWORDS; i += 64)
10916+
g_shared[i] = i;
10917+
GroupMemoryBarrierWithGroupSync();
10918+
if (GI == 0) {
10919+
for (uint j = 0; j < GSM_DWORDS; j++)
10920+
g_output[j] = g_shared[j];
10921+
}
10922+
}
10923+
10924+
float4 PSMain() : SV_Target { return float4(0,0,0,0); }
10925+
)";
10926+
10927+
RunGSMLimitShaderAndVerify(Device, m_support, "GroupSharedLimitMSTest",
10928+
Shader, GSM_DWORDS, 0, ShaderOpSet);
10929+
LogCommentFmt(
10930+
L"MS Test passed: GroupSharedLimit in mesh shader succeeded.");
10931+
}
10932+
}
10933+
1062210934
// Atomic operation testing
1062310935

1062410936
// Atomic tests take a single integer index as input and contort it into some

tools/clang/unittests/HLSLExec/HlslExecTestUtils.cpp

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,21 @@
1212
#include <filesystem>
1313
#include <optional>
1414

15+
// D3D12_FEATURE_D3D12_OPTIONS_PREVIEW and its data struct are not yet in
16+
// the released Windows SDK. Define locally so the test can query variable
17+
// group shared memory capabilities from the Agility SDK runtime.
18+
// Once the public SDK ships with these, a compile break (redefinition error)
19+
// will signal that these local definitions should be removed.
20+
#ifndef D3D12_FEATURE_D3D12_OPTIONS_PREVIEW
21+
#define D3D12_FEATURE_D3D12_OPTIONS_PREVIEW ((D3D12_FEATURE)72)
22+
#endif
23+
24+
typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS_PREVIEW {
25+
UINT MaxGroupSharedMemoryPerGroupCS;
26+
UINT MaxGroupSharedMemoryPerGroupAS;
27+
UINT MaxGroupSharedMemoryPerGroupMS;
28+
} D3D12_FEATURE_DATA_D3D12_OPTIONS_PREVIEW;
29+
1530
using namespace hlsl_test;
1631

1732
static bool useDebugIfaces() { return true; }
@@ -86,7 +101,7 @@ static bool createDevice(
86101
if (*D3DDevice)
87102
LogErrorFmt(L"createDevice called with non-null *D3DDevice - "
88103
L"this will likely leak the previous device");
89-
if (TestModel > D3D_HIGHEST_SHADER_MODEL) {
104+
if (TestModel > DXC_HIGHEST_SHADER_MODEL) {
90105
const UINT Minor = (UINT)TestModel & 0x0f;
91106
LogCommentFmt(L"Installed SDK does not support "
92107
L"shader model 6.%1u",
@@ -599,4 +614,25 @@ bool isFallbackPathEnabled() {
599614
WEX::TestExecution::RuntimeParameters::TryGetValue(L"EnableFallback",
600615
EnableFallbackValue);
601616
return EnableFallbackValue != 0;
602-
}
617+
}
618+
619+
UINT getMaxGroupSharedMemoryCS(ID3D12Device *Device) {
620+
D3D12_FEATURE_DATA_D3D12_OPTIONS_PREVIEW O = {};
621+
VERIFY_SUCCEEDED(Device->CheckFeatureSupport(
622+
D3D12_FEATURE_D3D12_OPTIONS_PREVIEW, &O, sizeof(O)));
623+
return O.MaxGroupSharedMemoryPerGroupCS;
624+
}
625+
626+
UINT getMaxGroupSharedMemoryAS(ID3D12Device *Device) {
627+
D3D12_FEATURE_DATA_D3D12_OPTIONS_PREVIEW O = {};
628+
VERIFY_SUCCEEDED(Device->CheckFeatureSupport(
629+
D3D12_FEATURE_D3D12_OPTIONS_PREVIEW, &O, sizeof(O)));
630+
return O.MaxGroupSharedMemoryPerGroupAS;
631+
}
632+
633+
UINT getMaxGroupSharedMemoryMS(ID3D12Device *Device) {
634+
D3D12_FEATURE_DATA_D3D12_OPTIONS_PREVIEW O = {};
635+
VERIFY_SUCCEEDED(Device->CheckFeatureSupport(
636+
D3D12_FEATURE_D3D12_OPTIONS_PREVIEW, &O, sizeof(O)));
637+
return O.MaxGroupSharedMemoryPerGroupMS;
638+
}

0 commit comments

Comments
 (0)