Skip to content

Commit 9f86c09

Browse files
committed
refactor to use workgroup2 stuff in cpp and average metering
1 parent e1d1cb1 commit 9f86c09

5 files changed

Lines changed: 42 additions & 17 deletions

File tree

26_Autoexposure/app_resources/avg_luma_meter.comp.hlsl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@ groupshared float32_t sdata[WORKGROUP_SIZE];
2020
struct SharedAccessor
2121
{
2222
using type = float32_t;
23-
void get(const uint32_t index, NBL_REF_ARG(uint32_t) value)
23+
template<typename AccessType, typename IndexType>
24+
void get(const uint32_t ix, NBL_REF_ARG(AccessType) value)
2425
{
25-
value = sdata[index];
26+
value = sdata[ix];
2627
}
27-
28-
void set(const uint32_t index, const uint32_t value)
28+
template<typename AccessType, typename IndexType>
29+
void set(const uint32_t ix, const AccessType value)
2930
{
30-
sdata[index] = value;
31+
sdata[ix] = value;
3132
}
3233

3334
void workgroupExecutionAndMemoryBarrier()
@@ -62,7 +63,7 @@ void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID)
6263
SharedAccessor sdata;
6364
TexAccessor tex;
6465

65-
using LumaMeter = luma_meter::geom_meter< WORKGROUP_SIZE, SUBGROUP_SIZE, PtrAccessor, SharedAccessor, TexAccessor>;
66+
using LumaMeter = luma_meter::geom_meter<wg_config_t, PtrAccessor, SharedAccessor, TexAccessor, device_capabilities>;
6667
LumaMeter meter = LumaMeter::create(pushData.lumaMin, pushData.lumaMax, pushData.sampleCount, pushData.rcpFirstPassWGCount);
6768

6869
uint32_t texWidth, texHeight;

26_Autoexposure/app_resources/avg_luma_tonemap.comp.hlsl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,15 @@ groupshared float32_t sdata[WORKGROUP_SIZE];
2525
struct SharedAccessor
2626
{
2727
using type = float32_t;
28-
void get(const uint32_t index, NBL_REF_ARG(uint32_t) value)
28+
template<typename AccessType, typename IndexType>
29+
void get(const uint32_t ix, NBL_REF_ARG(AccessType) value)
2930
{
30-
value = sdata[index];
31+
value = sdata[ix];
3132
}
32-
33-
void set(const uint32_t index, const uint32_t value)
33+
template<typename AccessType, typename IndexType>
34+
void set(const uint32_t ix, const AccessType value)
3435
{
35-
sdata[index] = value;
36+
sdata[ix] = value;
3637
}
3738

3839
void workgroupExecutionAndMemoryBarrier()
@@ -67,7 +68,7 @@ void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID)
6768
SharedAccessor sdata;
6869
TexAccessor tex;
6970

70-
using LumaMeter = luma_meter::geom_meter< WORKGROUP_SIZE, SUBGROUP_SIZE, PtrAccessor, SharedAccessor, TexAccessor>;
71+
using LumaMeter = luma_meter::geom_meter<wg_config_t, PtrAccessor, SharedAccessor, TexAccessor, device_capabilities>;
7172
LumaMeter meter = LumaMeter::create(pushData.lumaMin, pushData.lumaMax, pushData.sampleCount, pushData.rcpFirstPassWGCount);
7273

7374
float32_t EV = meter.gatherLuma(val_accessor);

26_Autoexposure/app_resources/common.hlsl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "nbl/builtin/hlsl/cpp_compat.hlsl"
99
#include "nbl/builtin/hlsl/luma_meter/common.hlsl"
10+
#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl"
1011

1112
namespace nbl
1213
{
@@ -42,6 +43,21 @@ struct AutoexposurePushData
4243
#error "Define SUBGROUP_SIZE!"
4344
#endif
4445

46+
#ifndef WG_CONFIG_T
47+
#error "Define WG_CONFIG_T!"
48+
#endif
49+
50+
using wg_config_t = WG_CONFIG_T;
51+
52+
struct device_capabilities
53+
{
54+
#ifdef NATIVE_SUBGROUP_ARITHMETIC
55+
NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = true;
56+
#else
57+
NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = false;
58+
#endif
59+
};
60+
4561
#endif
4662

4763
}

26_Autoexposure/app_resources/present.frag.hlsl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
#pragma wave shader_stage(fragment)
66

7-
#include "app_resources/common.hlsl"
8-
97
// vertex shader is provided by the fullScreenTriangle extension
108
#include <nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl>
119
using namespace nbl::hlsl;

26_Autoexposure/main.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h"
1212

1313
#include "nbl/builtin/hlsl/luma_meter/common.hlsl"
14+
#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl"
1415
#include "app_resources/common.hlsl"
1516

1617
using namespace nbl;
@@ -317,18 +318,26 @@ class AutoexposureApp final : public SimpleWindowedApplication, public BuiltinRe
317318
const uint32_t workgroupSize = m_physicalDevice->getLimits().maxComputeWorkGroupInvocations;
318319
const uint32_t subgroupSize = m_physicalDevice->getLimits().maxSubgroupSize;
319320

321+
const uint32_t configItemsPerInvoc = MeterMode == MeteringMode::AVERAGE ? 1 : workgroupSize / BinCount;
322+
workgroup2::SArithmeticConfiguration wgConfig;
323+
wgConfig.init(hlsl::findMSB(workgroupSize), hlsl::log2(float(subgroupSize)), configItemsPerInvoc);
324+
320325
struct MacroDefines
321326
{
322327
std::string identifier;
323328
std::string definition;
324329
};
325-
const MacroDefines definesBuf[2] = {
330+
constexpr uint32_t NumDefines = 4;
331+
const MacroDefines definesBuf[NumDefines] = {
326332
{ "WORKGROUP_SIZE", std::to_string(workgroupSize) },
327-
{ "SUBGROUP_SIZE", std::to_string(subgroupSize) }
333+
{ "SUBGROUP_SIZE", std::to_string(subgroupSize) },
334+
{"WG_CONFIG_T", wgConfig.getConfigTemplateStructString()},
335+
{"NATIVE_SUBGROUP_ARITHMETIC", "1"}
328336
};
329337

338+
const uint32_t defineCount = m_physicalDevice->getLimits().shaderSubgroupArithmetic ? NumDefines : NumDefines - 1;
330339
std::vector<IShaderCompiler::SMacroDefinition> defines;
331-
for (uint32_t i = 0; i < 2; i++)
340+
for (uint32_t i = 0; i < defineCount; i++)
332341
defines.emplace_back(definesBuf[i].identifier, definesBuf[i].definition);
333342
options.preprocessorOptions.extraDefines = defines;
334343

0 commit comments

Comments
 (0)