Skip to content

Commit e2ad253

Browse files
authored
CP: Fix ExecutionTest::DerivativesTest issues (#6311) (#6350)
This PR fixes several issues in the `ExecutionTest::DerivativesTest`: - Use 1D quad order only when writing 1D derivative results. 2D results are expected in standard 2D form. - Use separate derivatives verification function for SM 6.6 compute, mesh and amplification shaders. In these cases the quad layout is well defined, and so are the expected results. There is only one possible result for `ddx_fine`/`ddy_fine` and two for `ddx_coarse`/`ddy_coarse`. This is different from pixel shaders where the quad layout can vary quite a bit, and so do the expected results. - Change the expected values to match results for texture pixel `(2,2)` - Adjust mesh shader dispatch dimensions to make sure `X * Y * Z <= 128` - Use same logic (shared function) to calculate center pixel for compute, mesh and amplification shaders - To enable easier debugging in the future, I have added a function that writes out the derivatives results (under `DERIVATIVES_TEST_DEBUG` define) Verified on: - Latest Microsoft Basic Render Driver that supports shader model 6.8 (CS, MS, AS). Tested on x64 and arm64. - NVIDIA GeForce RTX 2080 Ti (CS only) - AMD Radeon RX 6900 XT (CS only) Fixes #4787 (cherry picked from commit fdbecd3)
1 parent fd9721e commit e2ad253

2 files changed

Lines changed: 125 additions & 39 deletions

File tree

tools/clang/unittests/HLSLExec/ExecutionTest.cpp

Lines changed: 105 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3791,7 +3791,7 @@ TEST_F(ExecutionTest, BasicTriangleOpTestHalf) {
37913791
D3D_SHADER_MODEL_6_2);
37923792
}
37933793

3794-
void VerifyDerivResults(const float *pPixels, UINT offsetCenter) {
3794+
void VerifyDerivResults_PS_60(const float *pPixels, UINT offsetCenter) {
37953795

37963796
// pixel at the center
37973797
float CenterDDXFine = pPixels[offsetCenter];
@@ -3810,6 +3810,7 @@ void VerifyDerivResults(const float *pPixels, UINT offsetCenter) {
38103810
// 1 .125 .25
38113811

38123812
// In D3D12 there is no guarantee of how the adapter is grouping 2x2 pixels
3813+
// for pixel shaders and shader model 6.0.
38133814
// So for fine derivatives there can be up to two possible results for the
38143815
// center pixel, while for coarse derivatives there can be up to six possible
38153816
// results.
@@ -3844,6 +3845,45 @@ void VerifyDerivResults(const float *pPixels, UINT offsetCenter) {
38443845
}
38453846
}
38463847

3848+
void VerifyDerivResults_CS_AS_MS_66(const float *pPixels, UINT offsetCenter) {
3849+
3850+
// pixel at the center
3851+
float CenterDDXFine = pPixels[offsetCenter];
3852+
float CenterDDYFine = pPixels[offsetCenter + 1];
3853+
float CenterDDXCoarse = pPixels[offsetCenter + 2];
3854+
float CenterDDYCoarse = pPixels[offsetCenter + 3];
3855+
3856+
LogCommentFmt(
3857+
L"center ddx_fine: %8f, ddy_fine: %8f, ddx_coarse: %8f, ddy_coarse: %8f",
3858+
CenterDDXFine, CenterDDYFine, CenterDDXCoarse, CenterDDYCoarse);
3859+
3860+
// The 4x4 texture used to calculate the derivatives looks like this:
3861+
// .125 .25 .5 1
3862+
// 2 4 16 32
3863+
// 32 64 *128* 256
3864+
// 256 512 1024 2048
3865+
//
3866+
// We are checking the derivate values calculated at the texture
3867+
// center pixel (2,2).
3868+
3869+
// In D3D12 for shader model 6.6 compute, mesh and amplification shaders
3870+
// the quad grouping is well defined. There is one possible result for
3871+
// fine derivatives and 2 possible results for coarse derivatives.
3872+
int ulpTolerance = 1;
3873+
3874+
// 256 - 128
3875+
VERIFY_IS_TRUE(CompareFloatULP(CenterDDXFine, 128.0f, ulpTolerance));
3876+
// 1024 - 128
3877+
VERIFY_IS_TRUE(CompareFloatULP(CenterDDYFine, 896.0f, ulpTolerance));
3878+
3879+
// 256 - 128 or 2048 - 1024
3880+
VERIFY_IS_TRUE(CompareFloatULP(CenterDDXCoarse, 128.0f, ulpTolerance) ||
3881+
CompareFloatULP(CenterDDXCoarse, 1024.0f, ulpTolerance));
3882+
// 1024 - 128 or 2048 - 256
3883+
VERIFY_IS_TRUE(CompareFloatULP(CenterDDYCoarse, 896.0f, ulpTolerance) ||
3884+
CompareFloatULP(CenterDDYCoarse, 1792.0f, ulpTolerance));
3885+
}
3886+
38473887
// Rendering two right triangles forming a square and assigning a texture value
38483888
// for each pixel to calculate derivates.
38493889
TEST_F(ExecutionTest, PartialDerivTest) {
@@ -3870,7 +3910,7 @@ TEST_F(ExecutionTest, PartialDerivTest) {
38703910
UINT centerIndex = (UINT64)width * height / 2 - width / 2;
38713911
UINT offsetCenter = centerIndex * pixelSize;
38723912

3873-
VerifyDerivResults(pPixels, offsetCenter);
3913+
VerifyDerivResults_PS_60(pPixels, offsetCenter);
38743914
}
38753915

38763916
struct Dispatch {
@@ -3905,6 +3945,53 @@ std::shared_ptr<st::ShaderOpTest> RunDispatch(ID3D12Device *pDevice,
39053945
return test;
39063946
}
39073947

3948+
UINT DerivativesTest_GetCenterIndex(Dispatch &D) {
3949+
if (D.height == 1) {
3950+
// 1D Quads - Find center, truncate to the previous multiple of 16 to get
3951+
// to the start of the repeating pattern, and then add 12 to get to the
3952+
// middle (2,2) pixel of the pattern. The values are stored in Z-order.
3953+
return (((UINT64)D.width / 2) & ~0xF) + 12;
3954+
} else {
3955+
// To find roughly the center, divide the height and width in
3956+
// half, truncate to the previous multiple of 4 to get to the start of the
3957+
// repeating pattern and then add 2 rows to get to the second row of quads
3958+
// and 2 to get to the first texel of the second row of that quad row
3959+
UINT centerRow = ((D.height / 2UL) & ~0x3) + 2;
3960+
UINT centerCol = ((D.width / 2UL) & ~0x3) + 2;
3961+
return centerRow * D.width + centerCol;
3962+
}
3963+
}
3964+
3965+
void DerivativesTest_DebugOutput(Dispatch &D,
3966+
std::shared_ptr<st::ShaderOpTest> &Test,
3967+
const float *pPixels, UINT centerIndex) {
3968+
#ifdef DERIVATIVES_TEST_DEBUG
3969+
LogCommentFmt(L"------------------------------------");
3970+
MappedData dataDbg;
3971+
Test->GetReadBackData("U3", &dataDbg);
3972+
UINT *pCoords = (UINT *)dataDbg.data();
3973+
3974+
LogCommentFmt(L"DISPATCH %d x %d x %d", D.width, D.height, D.depth);
3975+
for (int j = 0; j < D.height; j++) {
3976+
for (int i = 0; i < D.width; i++) {
3977+
UINT index = (j * 4) * D.width + i * 4;
3978+
LogCommentFmt(L"%3d (%2d, %2d, %2d)\t ddx_fine: %8f, ddy_fine: %8f, "
3979+
L"ddx_coarse: %8f, ddy_coarse: %8f",
3980+
pCoords[index], pCoords[index + 1], pCoords[index + 2],
3981+
pCoords[index + 3], pPixels[index], pPixels[index + 1],
3982+
pPixels[index + 2], pPixels[index + 3]);
3983+
}
3984+
}
3985+
LogCommentFmt(L"CENTER %d", centerIndex);
3986+
LogCommentFmt(L"------------------------------------");
3987+
#else
3988+
UNREFERENCED_PARAMETER(D);
3989+
UNREFERENCED_PARAMETER(Test);
3990+
UNREFERENCED_PARAMETER(pPixels);
3991+
UNREFERENCED_PARAMETER(centerIndex);
3992+
#endif
3993+
}
3994+
39083995
TEST_F(ExecutionTest, DerivativesTest) {
39093996
const UINT pixelSize = 4; // always float4
39103997

@@ -3925,12 +4012,12 @@ TEST_F(ExecutionTest, DerivativesTest) {
39254012

39264013
std::vector<Dispatch> dispatches = {{40, 1, 1}, {1000, 1, 1}, {32, 32, 1},
39274014
{16, 64, 1}, {4, 12, 4}, {4, 64, 1},
3928-
{16, 16, 3}, {32, 8, 2}};
4015+
{16, 16, 3}, {32, 8, 2}, {8, 8, 1}};
39294016

3930-
std::vector<Dispatch> meshDispatches = {
3931-
{60, 1, 1}, {128, 1, 1}, {8, 8, 1}, {32, 8, 1},
3932-
{8, 16, 4}, {8, 64, 1}, {8, 8, 3},
3933-
};
4017+
std::vector<Dispatch> meshDispatches = {// (X * Y * Z) must be <= 128
4018+
{60, 1, 1}, {128, 1, 1}, {8, 8, 1},
4019+
{16, 8, 1}, {8, 4, 2}, {10, 10, 1},
4020+
{4, 16, 2}, {4, 16, 2}};
39344021

39354022
std::vector<Dispatch> badDispatches = {{16, 3, 1}, {2, 16, 1}, {33, 1, 1}};
39364023

@@ -3945,25 +4032,15 @@ TEST_F(ExecutionTest, DerivativesTest) {
39454032
RunDispatch(pDevice, m_support, pShaderOp, D);
39464033

39474034
test->GetReadBackData("U0", &data);
3948-
39494035
float *pPixels = (float *)data.data();
3950-
;
39514036

3952-
UINT centerIndex = 0;
3953-
if (D.height == 1) {
3954-
centerIndex = (((UINT64)(D.width * D.height * D.depth) / 2) & ~0xF) + 10;
3955-
} else {
3956-
// To find roughly the center for compute, divide the height and width in
3957-
// half, truncate to the previous multiple of 4 to get to the start of the
3958-
// repeating pattern and then add 2 rows to get to the second row of quads
3959-
// and 2 to get to the first texel of the second row of that quad row
3960-
UINT centerRow = ((D.height / 2UL) & ~0x3) + 2;
3961-
UINT centerCol = ((D.width / 2UL) & ~0x3) + 2;
3962-
centerIndex = centerRow * D.width + centerCol;
3963-
}
4037+
UINT centerIndex = DerivativesTest_GetCenterIndex(D);
4038+
4039+
DerivativesTest_DebugOutput(D, test, pPixels, centerIndex);
4040+
39644041
UINT offsetCenter = centerIndex * pixelSize;
39654042
LogCommentFmt(L"Verifying derivatives in compute shader results");
3966-
VerifyDerivResults(pPixels, offsetCenter);
4043+
VerifyDerivResults_CS_AS_MS_66(pPixels, offsetCenter);
39674044
}
39684045

39694046
if (DoesDeviceSupportMeshAmpDerivatives(pDevice)) {
@@ -3976,16 +4053,18 @@ TEST_F(ExecutionTest, DerivativesTest) {
39764053

39774054
test->GetReadBackData("U1", &data);
39784055
const float *pPixels = (float *)data.data();
3979-
UINT centerIndex =
3980-
(((UINT64)(D.width * D.height * D.depth) / 2) & ~0xF) + 10;
4056+
UINT centerIndex = DerivativesTest_GetCenterIndex(D);
4057+
4058+
DerivativesTest_DebugOutput(D, test, pPixels, centerIndex);
4059+
39814060
UINT offsetCenter = centerIndex * pixelSize;
39824061
LogCommentFmt(L"Verifying derivatives in mesh shader results");
3983-
VerifyDerivResults(pPixels, offsetCenter);
4062+
VerifyDerivResults_CS_AS_MS_66(pPixels, offsetCenter);
39844063

39854064
test->GetReadBackData("U2", &data);
39864065
pPixels = (float *)data.data();
39874066
LogCommentFmt(L"Verifying derivatives in amplification shader results");
3988-
VerifyDerivResults(pPixels, offsetCenter);
4067+
VerifyDerivResults_CS_AS_MS_66(pPixels, offsetCenter);
39894068
}
39904069
}
39914070

tools/clang/unittests/HLSLExec/ShaderOpArith.xml

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
<ShaderOp Name="Derivatives" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
9292
<RootSignature>
9393
RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
94-
DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)),
94+
DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2), UAV(u3)),
9595
StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT)
9696
</RootSignature>
9797
<Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
@@ -119,6 +119,9 @@
119119
<Resource Name="U2" Dimension="BUFFER" Width="16384"
120120
Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
121121
Init="Zero" ReadBack="true" TransitionTo="UNORDERED_ACCESS" />
122+
<Resource Name="U3" Dimension="BUFFER" Width="16384"
123+
Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
124+
Init="Zero" ReadBack="true" TransitionTo="UNORDERED_ACCESS" />
122125

123126
<RootValues>
124127
<RootValue HeapName="ResHeap" />
@@ -131,6 +134,8 @@
131134
NumElements="1024" StructureByteStride="16" />
132135
<Descriptor Name='U2' Kind='UAV' ResName='U2'
133136
NumElements="1024" StructureByteStride="16" />
137+
<Descriptor Name='U3' Kind='UAV' ResName='U3'
138+
NumElements="1024" StructureByteStride="16" />
134139
</DescriptorHeap>
135140
<DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
136141
<Descriptor Name="RTarget" Kind="RTV"/>
@@ -157,6 +162,7 @@
157162
RWStructuredBuffer<float4> g_bufMain : register(u0);
158163
RWStructuredBuffer<float4> g_bufMesh : register(u1);
159164
RWStructuredBuffer<float4> g_bufAmp : register(u2);
165+
RWStructuredBuffer<uint4> g_bufDbg : register(u3);
160166
161167
float4 DerivTest(int2 uv) {
162168
int3 offset = int3(uv%4, 0);
@@ -204,14 +210,7 @@
204210
{ 1.0f, 1.0f }};
205211
206212
uint convert2Dto1D(uint x, uint y, uint width) {
207-
// Convert 2D coords to 1D for testing
208-
// All completed rows of quads
209-
uint prevRows = (y/2)*2*width;
210-
// All previous full quads on this quad row
211-
uint prevQuads = (x/2)*4;
212-
// index into current quad
213-
uint quadIx = (y&1)*2 + (x&1);
214-
return prevRows + prevQuads + quadIx;
213+
return (y * width) + x;
215214
}
216215
217216
float4 PSMain(PSInput input) : SV_TARGET {
@@ -232,10 +231,14 @@
232231
233232
[NumThreads(DISPATCHX, DISPATCHY, DISPATCHZ)]
234233
void CSMain(uint3 id : SV_GroupThreadID, uint ix : SV_GroupIndex) {
235-
if (DISPATCHY == 1 && DISPATCHZ == 1)
234+
if (DISPATCHY == 1 && DISPATCHZ == 1) {
236235
g_bufMain[ix] = DerivTest(ix);
237-
else
236+
g_bufDbg[ix] = uint4(ix, ConvertGroupIdx(id), 0);
237+
}
238+
else {
238239
g_bufMain[convert2Dto1D(id.x, id.y, DISPATCHX)] = DerivTest(id.xy);
240+
g_bufDbg[convert2Dto1D(id.x, id.y, DISPATCHX)] = uint4(ix, id);
241+
}
239242
}
240243
241244
#if DISPATCHX * DISPATCHY * DISPATCHZ > 128
@@ -273,10 +276,14 @@
273276
verts[ix%6].uv = g_UV[ix%6];
274277
tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
275278
g_bufMesh[ix] = DerivTest(ix);
276-
if (DISPATCHY == 1 && DISPATCHZ == 1)
279+
if (DISPATCHY == 1 && DISPATCHZ == 1) {
277280
g_bufMesh[ix] = DerivTest(ix);
278-
else
281+
g_bufDbg[ix] = uint4(ix, id);
282+
}
283+
else {
279284
g_bufMesh[convert2Dto1D(id.x, id.y, DISPATCHX)] = DerivTest(id.xy);
285+
g_bufDbg[convert2Dto1D(id.x, id.y, DISPATCHX)] = uint4(ix, id);
286+
}
280287
}
281288
282289
]]>

0 commit comments

Comments
 (0)