|
24 | 24 | #include <array> |
25 | 25 | #include <string> |
26 | 26 | #include <map> |
| 27 | +#include <set> |
27 | 28 | #include <unordered_set> |
28 | 29 | #include <sstream> |
29 | 30 | #include <iomanip> |
@@ -212,6 +213,7 @@ class ExecutionTest { |
212 | 213 | TEST_METHOD(GroupSharedLimitTest); |
213 | 214 | TEST_METHOD(GroupSharedLimitASTest); |
214 | 215 | TEST_METHOD(GroupSharedLimitMSTest); |
| 216 | + TEST_METHOD(GroupWaveIndexTest); |
215 | 217 | TEST_METHOD(PartialDerivTest); |
216 | 218 | TEST_METHOD(DerivativesTest); |
217 | 219 | TEST_METHOD(ComputeSampleTest); |
@@ -10931,6 +10933,210 @@ void ExecutionTest::GroupSharedLimitMSTest() { |
10931 | 10933 | } |
10932 | 10934 | } |
10933 | 10935 |
|
| 10936 | +void ExecutionTest::GroupWaveIndexTest() { |
| 10937 | + WEX::TestExecution::SetVerifyOutput VerifySettings( |
| 10938 | + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| 10939 | + |
| 10940 | + BEGIN_TEST_METHOD_PROPERTIES() |
| 10941 | + TEST_METHOD_PROPERTY(L"Kits.TestId", L"c3f60f00-8e91-4acb-b4be-9f483fbe836b") |
| 10942 | + TEST_METHOD_PROPERTY( |
| 10943 | + L"Kits.Specification", |
| 10944 | + L"Device.Graphics.D3D12.DXILCore.ShaderModel610.CoreRequirement") |
| 10945 | + END_TEST_METHOD_PROPERTIES() |
| 10946 | + |
| 10947 | + bool FailIfRequirementsNotMet = false; |
| 10948 | +#ifdef _HLK_CONF |
| 10949 | + FailIfRequirementsNotMet = true; |
| 10950 | +#endif |
| 10951 | + WEX::TestExecution::RuntimeParameters::TryGetValue( |
| 10952 | + L"FailIfRequirementsNotMet", FailIfRequirementsNotMet); |
| 10953 | + |
| 10954 | + CComPtr<ID3D12Device> Device; |
| 10955 | + const bool SkipUnsupported = !FailIfRequirementsNotMet; |
| 10956 | + if (!createDevice(&Device, D3D_SHADER_MODEL_6_10, SkipUnsupported)) { |
| 10957 | + if (FailIfRequirementsNotMet) |
| 10958 | + LogErrorFmt(L"Device creation failed, resulting in test failure, since " |
| 10959 | + L"FailIfRequirementsNotMet is set."); |
| 10960 | + return; |
| 10961 | + } |
| 10962 | + |
| 10963 | + // Get supported wave sizes for WaveSize attribute tests. |
| 10964 | + D3D12_FEATURE_DATA_D3D12_OPTIONS1 WaveOpts = {}; |
| 10965 | + VERIFY_SUCCEEDED( |
| 10966 | + Device->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS1, |
| 10967 | + &WaveOpts, sizeof(WaveOpts))); |
| 10968 | + const UINT MinWaveSize = WaveOpts.WaveLaneCountMin; |
| 10969 | + const UINT MaxWaveSize = WaveOpts.WaveLaneCountMax; |
| 10970 | + |
| 10971 | + struct GroupWaveData { |
| 10972 | + uint32_t GroupIndex; |
| 10973 | + uint32_t WaveIndex; |
| 10974 | + uint32_t WaveCount; |
| 10975 | + uint32_t LaneIndex; |
| 10976 | + uint32_t LaneCount; |
| 10977 | + uint32_t FirstLaneGroupIndex; |
| 10978 | + }; |
| 10979 | + |
| 10980 | + // Shader source uses defines for thread group dimensions and optional |
| 10981 | + // WaveSize attribute, injected via compiler -D options. |
| 10982 | + const char Shader[] = |
| 10983 | + R"(struct GroupWaveData { |
| 10984 | + uint GroupIndex; |
| 10985 | + uint WaveIndex; |
| 10986 | + uint WaveCount; |
| 10987 | + uint LaneIndex; |
| 10988 | + uint LaneCount; |
| 10989 | + uint FirstLaneGroupIndex; |
| 10990 | + }; |
| 10991 | + RWStructuredBuffer<GroupWaveData> Data : register(u0); |
| 10992 | + |
| 10993 | + WAVE_SIZE_ATTR |
| 10994 | + [numthreads(NUMTHREADS_X, NUMTHREADS_Y, NUMTHREADS_Z)] |
| 10995 | + void main(uint GI : SV_GroupIndex) { |
| 10996 | + GroupWaveData D; |
| 10997 | + D.GroupIndex = GI; |
| 10998 | + D.WaveIndex = GetGroupWaveIndex(); |
| 10999 | + D.WaveCount = GetGroupWaveCount(); |
| 11000 | + D.LaneIndex = WaveGetLaneIndex(); |
| 11001 | + D.LaneCount = WaveGetLaneCount(); |
| 11002 | + D.FirstLaneGroupIndex = WaveReadLaneFirst(GI); |
| 11003 | + Data[GI] = D; |
| 11004 | + })"; |
| 11005 | + |
| 11006 | + CComPtr<IStream> Stream; |
| 11007 | + std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| 11008 | + std::make_shared<st::ShaderOpSet>(); |
| 11009 | + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &Stream, m_support); |
| 11010 | + st::ParseShaderOpSetFromStream(Stream, ShaderOpSet.get()); |
| 11011 | + |
| 11012 | + // Test configurations: {numthreadsX, numthreadsY, numthreadsZ, WaveSize} |
| 11013 | + // WaveSize 0 means no [WaveSize] attribute. |
| 11014 | + struct TestConfig { |
| 11015 | + UINT X, Y, Z; |
| 11016 | + UINT WaveSize; |
| 11017 | + }; |
| 11018 | + |
| 11019 | + std::vector<TestConfig> Configs = { |
| 11020 | + {8, 1, 1, 0}, // 1D small (8 threads) |
| 11021 | + {8, 8, 1, 0}, // 2D medium (64 threads) |
| 11022 | + {16, 16, 1, 0}, // 2D large (256 threads) |
| 11023 | + {32, 32, 1, 0}, // 2D max (1024 threads) |
| 11024 | + {4, 4, 4, 0}, // 3D (64 threads) |
| 11025 | + {10, 1, 1, 0}, // 1D non-power-of-2 |
| 11026 | + }; |
| 11027 | + |
| 11028 | + // Add WaveSize-attributed variants for each supported wave size. |
| 11029 | + for (UINT WS = MinWaveSize; WS <= MaxWaveSize; WS *= 2) { |
| 11030 | + Configs.push_back({8, 8, 1, WS}); |
| 11031 | + // Single wave case: numthreads <= WaveSize. |
| 11032 | + if (WS >= 8) |
| 11033 | + Configs.push_back({8, 1, 1, WS}); |
| 11034 | + } |
| 11035 | + |
| 11036 | + for (const auto &Cfg : Configs) { |
| 11037 | + const UINT NumThreads = Cfg.X * Cfg.Y * Cfg.Z; |
| 11038 | + if (Cfg.WaveSize > 0) { |
| 11039 | + LogCommentFmt(L"Testing [numthreads(%u,%u,%u)] [WaveSize(%u)] " |
| 11040 | + L"(%u threads)", |
| 11041 | + Cfg.X, Cfg.Y, Cfg.Z, Cfg.WaveSize, NumThreads); |
| 11042 | + } else { |
| 11043 | + LogCommentFmt(L"Testing [numthreads(%u,%u,%u)] (%u threads)", Cfg.X, |
| 11044 | + Cfg.Y, Cfg.Z, NumThreads); |
| 11045 | + } |
| 11046 | + |
| 11047 | + // Build compiler options with thread group defines. |
| 11048 | + char CompilerOptions[256]; |
| 11049 | + if (Cfg.WaveSize > 0) { |
| 11050 | + VERIFY_IS_TRUE( |
| 11051 | + sprintf_s(CompilerOptions, sizeof(CompilerOptions), |
| 11052 | + "-D NUMTHREADS_X=%u -D NUMTHREADS_Y=%u " |
| 11053 | + "-D NUMTHREADS_Z=%u -D WAVE_SIZE_ATTR=[wavesize(%u)]", |
| 11054 | + Cfg.X, Cfg.Y, Cfg.Z, Cfg.WaveSize) != -1); |
| 11055 | + } else { |
| 11056 | + VERIFY_IS_TRUE(sprintf_s(CompilerOptions, sizeof(CompilerOptions), |
| 11057 | + "-D NUMTHREADS_X=%u -D NUMTHREADS_Y=%u " |
| 11058 | + "-D NUMTHREADS_Z=%u -D WAVE_SIZE_ATTR=", |
| 11059 | + Cfg.X, Cfg.Y, Cfg.Z) != -1); |
| 11060 | + } |
| 11061 | + |
| 11062 | + std::shared_ptr<st::ShaderOpTestResult> Test = |
| 11063 | + st::RunShaderOpTestAfterParse( |
| 11064 | + Device, m_support, "GroupWaveIndexTest", |
| 11065 | + [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *ShaderOp) { |
| 11066 | + VERIFY_IS_TRUE(0 == strcmp(Name, "UAVBuffer0")); |
| 11067 | + ShaderOp->Shaders.at(0).Text = Shader; |
| 11068 | + ShaderOp->Shaders.at(0).Arguments = CompilerOptions; |
| 11069 | + |
| 11070 | + VERIFY_IS_TRUE(sizeof(GroupWaveData) * NumThreads <= Data.size()); |
| 11071 | + GroupWaveData *InData = (GroupWaveData *)Data.data(); |
| 11072 | + memset(InData, 0, sizeof(GroupWaveData) * NumThreads); |
| 11073 | + }, |
| 11074 | + ShaderOpSet); |
| 11075 | + |
| 11076 | + MappedData DataUav; |
| 11077 | + Test->Test->GetReadBackData("UAVBuffer0", &DataUav); |
| 11078 | + VERIFY_IS_TRUE(sizeof(GroupWaveData) * NumThreads <= DataUav.size()); |
| 11079 | + const GroupWaveData *Results = (const GroupWaveData *)DataUav.data(); |
| 11080 | + |
| 11081 | + // Verify WaveCount is uniform across all threads and >= 1. |
| 11082 | + const uint32_t GroupWaveCount = Results[0].WaveCount; |
| 11083 | + VERIFY_IS_GREATER_THAN_OR_EQUAL(GroupWaveCount, 1u); |
| 11084 | + for (UINT I = 0; I < NumThreads; ++I) { |
| 11085 | + VERIFY_ARE_EQUAL(Results[I].WaveCount, GroupWaveCount); |
| 11086 | + } |
| 11087 | + |
| 11088 | + // Verify WaveCount >= ceil(threadGroupSize / LaneCount) per spec. |
| 11089 | + const uint32_t GroupLaneCount = Results[0].LaneCount; |
| 11090 | + const uint32_t MinWaves = |
| 11091 | + (NumThreads + GroupLaneCount - 1) / GroupLaneCount; |
| 11092 | + LogCommentFmt(L" waveCount=%u, laneCount=%u, minWaves=%u", GroupWaveCount, |
| 11093 | + GroupLaneCount, MinWaves); |
| 11094 | + VERIFY_IS_GREATER_THAN_OR_EQUAL(GroupWaveCount, MinWaves); |
| 11095 | + |
| 11096 | + // If a specific WaveSize was requested, verify LaneCount matches. |
| 11097 | + if (Cfg.WaveSize > 0) { |
| 11098 | + VERIFY_ARE_EQUAL(GroupLaneCount, Cfg.WaveSize); |
| 11099 | + } |
| 11100 | + |
| 11101 | + // Verify WaveIndex is in range [0, WaveCount). |
| 11102 | + for (UINT I = 0; I < NumThreads; ++I) { |
| 11103 | + VERIFY_IS_LESS_THAN(Results[I].WaveIndex, GroupWaveCount); |
| 11104 | + } |
| 11105 | + |
| 11106 | + // Group threads by wave using FirstLaneGroupIndex. |
| 11107 | + std::map<uint32_t, std::vector<const GroupWaveData *>> Waves; |
| 11108 | + for (UINT I = 0; I < NumThreads; ++I) { |
| 11109 | + Waves[Results[I].FirstLaneGroupIndex].push_back(&Results[I]); |
| 11110 | + } |
| 11111 | + |
| 11112 | + // Verify number of distinct waves matches WaveCount. |
| 11113 | + VERIFY_ARE_EQUAL(Waves.size(), static_cast<size_t>(GroupWaveCount)); |
| 11114 | + |
| 11115 | + // Verify WaveIndex is uniform within each wave and unique across waves. |
| 11116 | + std::set<uint32_t> SeenWaveIndices; |
| 11117 | + for (auto &WavePair : Waves) { |
| 11118 | + const std::vector<const GroupWaveData *> &Lanes = WavePair.second; |
| 11119 | + VERIFY_IS_GREATER_THAN_OR_EQUAL(Lanes.size(), 1u); |
| 11120 | + |
| 11121 | + uint32_t ExpectedWaveIndex = Lanes[0]->WaveIndex; |
| 11122 | + for (size_t J = 1; J < Lanes.size(); ++J) { |
| 11123 | + VERIFY_ARE_EQUAL(Lanes[J]->WaveIndex, ExpectedWaveIndex); |
| 11124 | + } |
| 11125 | + |
| 11126 | + VERIFY_IS_TRUE(SeenWaveIndices.find(ExpectedWaveIndex) == |
| 11127 | + SeenWaveIndices.end()); |
| 11128 | + SeenWaveIndices.insert(ExpectedWaveIndex); |
| 11129 | + } |
| 11130 | + |
| 11131 | + // Verify all wave indices from 0 to WaveCount-1 are present. |
| 11132 | + VERIFY_ARE_EQUAL(SeenWaveIndices.size(), |
| 11133 | + static_cast<size_t>(GroupWaveCount)); |
| 11134 | + for (uint32_t I = 0; I < GroupWaveCount; ++I) { |
| 11135 | + VERIFY_IS_TRUE(SeenWaveIndices.count(I) == 1); |
| 11136 | + } |
| 11137 | + } |
| 11138 | +} |
| 11139 | + |
10934 | 11140 | // Atomic operation testing |
10935 | 11141 |
|
10936 | 11142 | // Atomic tests take a single integer index as input and contort it into some |
|
0 commit comments