Vulkan-Samples/shaders/gpu_dispatch/classify_material_map_gpu_enqueue_cs.hlsl at main · GPUOpen-LibrariesAndSDKs/Vulkan-Samples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
//  Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved.
//
//  Permission is hereby granted, free of charge, to any person obtaining a copy
//  of this software and associated documentation files (the "Software"), to deal
//  in the Software without restriction, including without limitation the rights
//  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
//  copies of the Software, and to permit persons to whom the Software is
//  furnished to do so, subject to the following conditions:
//
//  The above copyright notice and this permission notice shall be included in all
//  copies or substantial portions of the Software.
//
//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
//  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
//  SOFTWARE.

#define TILE_SIZE 			16
#ifdef NODE_AGGREGATION
#define MAX_ALLOCATIONS 	256	// TILE_SIZE*TILE_SIZE
#else
#define MAX_ALLOCATIONS		1	// 1 payload per tile
#endif

typedef vector<uint16_t, 2> uint2_16;

struct InputPayload
{
    uint3 grid_size : SV_DispatchGrid;
};

struct OutputPayload
{
#ifdef NODE_DYNAMIC_EXPANSION
    uint3       grid_size;
#endif
    uint2_16    coord;		// tile or pixel coordinates
};

// All shaders must use the same resource bindings (it's the same pipeline)

[[vk::binding(2, 0)]] Texture2D<uint> inMaterial;

// NumSubgroups (number of waves in a threadgroup) is not a constant,
// but we can allocate a large enough array. The subgroup will be at least 32 invocations.
groupshared uint sharedPerSubgroupMask[(TILE_SIZE*TILE_SIZE)/32];
groupshared uint sharedShaderIndex;

[Shader("node")]
[NodeID("classify")]
[NodeLaunch("broadcasting")]
[NodeIsProgramEntry]
[NodeMaxDispatchGrid(512, 512, 1)]
[NumThreads(TILE_SIZE, TILE_SIZE, 1)]
void main(
    const uint  svGroupIndex    : SV_GroupIndex,
    const uint3 svGroupId       : SV_GroupID,
    const uint3 svGroupThreadId : SV_GroupThreadID,

    DispatchNodeInputRecord<InputPayload> in_payload,

    [NodeID("compose", 0)]
    [NodeArraySize(3)]
    [MaxRecords(MAX_ALLOCATIONS)]
    NodeOutputArray<OutputPayload> node_out)
{
    const int3 coord    = int3(TILE_SIZE * svGroupId.xy + svGroupThreadId.xy, 0);	// Z is mip level
    const uint material = inMaterial.Load(coord).r;		// zero-based material index

#ifdef NODE_AGGREGATION
    const uint recordCount  = 1;		// one record per invocation
    const uint shaderIndex  = material;	// use the shader corresponding to that material ID

    ThreadNodeOutputRecords<OutputPayload> out_payload = node_out[shaderIndex].GetThreadNodeOutputRecords(recordCount);

    out_payload[0].coord = uint2_16(coord.xy);

    out_payload.OutputComplete();

#else // not NODE_AGGREGATION

    const uint waveId   = svGroupIndex / WaveGetLaneCount();
    const uint numWaves = (TILE_SIZE*TILE_SIZE) / WaveGetLaneCount();

    sharedPerSubgroupMask[waveId] = WaveActiveBitOr(material);

    GroupMemoryBarrierWithGroupSync();

    if (svGroupIndex == 0)
    {
        sharedShaderIndex = 0;

        for (int i = 0; i < numWaves; ++i)
        {
            sharedShaderIndex = sharedShaderIndex | sharedPerSubgroupMask[i];
        }
        // sharedShaderIndex corresponds to the common mask of all materials in this tile
    }

    GroupMemoryBarrierWithGroupSync();

    const uint recordCount = 1;	// only one payload

    GroupNodeOutputRecords<OutputPayload> out_payload = node_out[sharedShaderIndex].GetGroupNodeOutputRecords(recordCount);

    if (svGroupIndex == 0)
    {
#ifdef NODE_DYNAMIC_EXPANSION
        out_payload[0].grid_size = uint3(1, 1, 1);
#endif
        out_payload[0].coord = uint2_16(svGroupId.xy);
    }

    out_payload.OutputComplete();

#endif // not NODE_AGGREGATION
}