Skip to content

Commit 37cb817

Browse files
committed
Eytzinger CDF, alias table is packed, 2 versions, CDF sampler now has 3 enumed types (tracking, YOLO, Eytzinger)
1 parent 2291b7d commit 37cb817

4 files changed

Lines changed: 342 additions & 114 deletions

File tree

examples_tests

Submodule examples_tests updated 41 files

include/nbl/builtin/hlsl/sampling/alias_table.hlsl

Lines changed: 152 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include <nbl/builtin/hlsl/cpp_compat.hlsl>
99
#include <nbl/builtin/hlsl/bit.hlsl>
10+
#include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
1011
#include <nbl/builtin/hlsl/concepts/core.hlsl>
1112
#include <nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl>
1213

@@ -17,84 +18,187 @@ namespace hlsl
1718
namespace sampling
1819
{
1920

20-
// Alias Method (Vose/Walker) discrete sampler.
21-
//
22-
// Samples a discrete index in [0, N) with probability proportional to
23-
// precomputed weights in O(1) time per sample, using a prebuilt alias table.
24-
//
25-
// Accessor template parameters must satisfy GenericReadAccessor:
26-
// accessor.template get<V, I>(index, outVal) // void, writes to outVal
27-
//
28-
// - ProbabilityAccessor: reads scalar_type threshold in [0, 1] for bin i
29-
// - AliasIndexAccessor: reads uint32_t redirect index for bin i
30-
// - PdfAccessor: reads scalar_type weight[i] / totalWeight
31-
//
32-
// Satisfies TractableSampler (not BackwardTractableSampler: the mapping is discrete).
33-
// The cache stores the PDF value looked up during generate, avoiding redundant
34-
// storage of the codomain (sampled index) which is already the return value.
35-
template<typename T, typename Domain, typename Codomain, typename ProbabilityAccessor, typename AliasIndexAccessor, typename PdfAccessor
21+
// Packed alias-entry bit layout shared by every packed variant. One 32-bit
22+
// word holds the redirect index in the low Log2N bits and the stay-
23+
// probability quantized as an unorm in the high (32 - Log2N) bits.
24+
// u * N = scaled; bin = floor(scaled); remainder = scaled - bin
25+
// if (remainder < getStayProb(word)) -> result = bin
26+
// else -> result = getTarget(word)
27+
// Quantizing the threshold to (32 - Log2N) bits is precision-neutral: `u`
28+
// already consumed Log2N bits of randomness producing `bin`, so `remainder`
29+
// carries exactly that many bits of discriminatory power.
30+
namespace impl
31+
{
32+
template<uint32_t Log2N>
33+
struct AliasBitDecoder
34+
{
35+
static uint32_t getTarget(uint32_t word)
36+
{
37+
return word & ((1u << Log2N) - 1u);
38+
}
39+
template<typename T>
40+
static T getStayProb(uint32_t word)
41+
{
42+
const uint32_t unormMax = (~0u) >> Log2N;
43+
return T(word >> Log2N) / T(unormMax);
44+
}
45+
};
46+
} // namespace impl
47+
48+
// 8 B entry used by the NBig == true variant. Embeds the bin's own pdf
49+
// alongside the packed word so the common stay-case needs no extra tap.
50+
template<typename T>
51+
struct PackedAliasEntryB
52+
{
53+
uint32_t packedWord; // low Log2N: redirect target; high 32-Log2N: stayProb unorm
54+
T ownPdf; // pdf of this bin
55+
};
56+
57+
58+
// NBig == false: 4 B packed word per bin + separate pdf[] array. Per sample
59+
// = one 4 B word load + one unconditional 4 B pdf[] tap indexed by the
60+
// selected bin (either the current bin or its redirect). Total 8 B whether
61+
// the sample stays or aliases. Favours small N.
62+
template<typename T, typename Domain, typename Codomain, typename PackedWordAccessor, typename PdfAccessor, uint32_t Log2N
3663
NBL_PRIMARY_REQUIRES(
3764
concepts::UnsignedIntegralScalar<Codomain> &&
38-
concepts::accessors::GenericReadAccessor<ProbabilityAccessor, T, Codomain> &&
39-
concepts::accessors::GenericReadAccessor<AliasIndexAccessor, Codomain, Codomain> &&
65+
concepts::accessors::GenericReadAccessor<PackedWordAccessor, uint32_t, Codomain> &&
4066
concepts::accessors::GenericReadAccessor<PdfAccessor, T, Codomain>)
41-
struct AliasTable
67+
struct PackedAliasTableA
4268
{
4369
using scalar_type = T;
44-
4570
using domain_type = Domain;
4671
using codomain_type = Codomain;
4772
using density_type = scalar_type;
4873
using weight_type = density_type;
74+
using decoder = impl::AliasBitDecoder<Log2N>;
75+
NBL_CONSTEXPR_STATIC_INLINE bool NBig = false;
4976

5077
struct cache_type
5178
{
5279
density_type pdf;
5380
};
5481

55-
static AliasTable create(NBL_CONST_REF_ARG(ProbabilityAccessor) _probAccessor, NBL_CONST_REF_ARG(AliasIndexAccessor) _aliasAccessor, NBL_CONST_REF_ARG(PdfAccessor) _pdfAccessor, codomain_type _size)
82+
static PackedAliasTableA create(NBL_CONST_REF_ARG(PackedWordAccessor) _entryAcc, NBL_CONST_REF_ARG(PdfAccessor) _pdfAcc, codomain_type _size)
5683
{
57-
AliasTable retval;
58-
retval.probAccessor = _probAccessor;
59-
retval.aliasAccessor = _aliasAccessor;
60-
retval.pdfAccessor = _pdfAccessor;
61-
// Precompute tableSize as float minus 1 ULP so that u=1.0 maps to bin N-1
84+
PackedAliasTableA retval;
85+
retval.entryAcc = _entryAcc;
86+
retval.pdfAcc = _pdfAcc;
6287
const scalar_type exact = scalar_type(_size);
6388
retval.tableSizeMinusUlp = nbl::hlsl::bit_cast<scalar_type>(nbl::hlsl::bit_cast<uint32_t>(exact) - 1u);
6489
return retval;
6590
}
6691

67-
// BasicSampler interface
6892
codomain_type generate(const domain_type u) NBL_CONST_MEMBER_FUNC
6993
{
7094
const scalar_type scaled = u * tableSizeMinusUlp;
7195
const codomain_type bin = _static_cast<codomain_type>(scaled);
7296
const scalar_type remainder = scaled - scalar_type(bin);
7397

74-
scalar_type prob;
75-
probAccessor.template get<scalar_type, codomain_type>(bin, prob);
76-
77-
// Use if-statement to avoid select: aliasIndex is a dependent read
78-
codomain_type result;
79-
if (remainder < prob)
80-
{
81-
result = bin;
82-
}
83-
else
84-
{
85-
codomain_type alias;
86-
aliasAccessor.template get<codomain_type, codomain_type>(bin, alias);
87-
result = alias;
88-
}
98+
uint32_t packedWord;
99+
entryAcc.template get<uint32_t, codomain_type>(bin, packedWord);
100+
return hlsl::select(remainder < decoder::template getStayProb<scalar_type>(packedWord), bin, codomain_type(decoder::getTarget(packedWord)));
101+
}
89102

103+
codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC
104+
{
105+
const codomain_type result = generate(u);
106+
pdfAcc.template get<scalar_type, codomain_type>(result, cache.pdf);
90107
return result;
91108
}
92109

93-
// TractableSampler interface
110+
density_type forwardPdf(const domain_type u, NBL_CONST_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC
111+
{
112+
return cache.pdf;
113+
}
114+
115+
weight_type forwardWeight(const domain_type u, NBL_CONST_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC
116+
{
117+
return cache.pdf;
118+
}
119+
120+
density_type backwardPdf(const codomain_type v) NBL_CONST_MEMBER_FUNC
121+
{
122+
scalar_type pdf;
123+
pdfAcc.template get<scalar_type, codomain_type>(v, pdf);
124+
return pdf;
125+
}
126+
127+
weight_type backwardWeight(const codomain_type v) NBL_CONST_MEMBER_FUNC
128+
{
129+
return backwardPdf(v);
130+
}
131+
132+
PackedWordAccessor entryAcc;
133+
PdfAccessor pdfAcc;
134+
scalar_type tableSizeMinusUlp;
135+
};
136+
137+
// NBig == true: 8 B entry {packedWord, ownPdf} + separate pdf[] array. Per
138+
// sample = one 8 B entry load (covers the common stay case where cache
139+
// already has ownPdf). If the sample aliases, a conditional 4 B pdf[target]
140+
// tap fills the cache. Total 8 B stay, 12 B aliased. Favours large N.
141+
template<typename T, typename Domain, typename Codomain, typename EntryAccessor, typename PdfAccessor, uint32_t Log2N
142+
NBL_PRIMARY_REQUIRES(
143+
concepts::UnsignedIntegralScalar<Codomain> &&
144+
concepts::accessors::GenericReadAccessor<EntryAccessor, PackedAliasEntryB<T>, Codomain> &&
145+
concepts::accessors::GenericReadAccessor<PdfAccessor, T, Codomain>)
146+
struct PackedAliasTableB
147+
{
148+
using scalar_type = T;
149+
using domain_type = Domain;
150+
using codomain_type = Codomain;
151+
using density_type = scalar_type;
152+
using weight_type = density_type;
153+
using entry_type = PackedAliasEntryB<scalar_type>;
154+
using decoder = impl::AliasBitDecoder<Log2N>;
155+
NBL_CONSTEXPR_STATIC_INLINE bool NBig = true;
156+
157+
struct cache_type
158+
{
159+
density_type pdf;
160+
};
161+
162+
static PackedAliasTableB create(NBL_CONST_REF_ARG(EntryAccessor) _entryAcc, NBL_CONST_REF_ARG(PdfAccessor) _pdfAcc, codomain_type _size)
163+
{
164+
PackedAliasTableB retval;
165+
retval.entryAcc = _entryAcc;
166+
retval.pdfAcc = _pdfAcc;
167+
const scalar_type exact = scalar_type(_size);
168+
retval.tableSizeMinusUlp = nbl::hlsl::bit_cast<scalar_type>(nbl::hlsl::bit_cast<uint32_t>(exact) - 1u);
169+
return retval;
170+
}
171+
172+
codomain_type generate(const domain_type u) NBL_CONST_MEMBER_FUNC
173+
{
174+
const scalar_type scaled = u * tableSizeMinusUlp;
175+
const codomain_type bin = _static_cast<codomain_type>(scaled);
176+
const scalar_type remainder = scaled - scalar_type(bin);
177+
178+
entry_type entry;
179+
entryAcc.template get<entry_type, codomain_type>(bin, entry);
180+
return hlsl::select(remainder < decoder::template getStayProb<scalar_type>(entry.packedWord), bin, codomain_type(decoder::getTarget(entry.packedWord)));
181+
}
182+
94183
codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC
95184
{
96-
const codomain_type result = generate(u);
97-
pdfAccessor.template get<scalar_type, codomain_type>(result, cache.pdf);
185+
const scalar_type scaled = u * tableSizeMinusUlp;
186+
const codomain_type bin = _static_cast<codomain_type>(scaled);
187+
const scalar_type remainder = scaled - scalar_type(bin);
188+
189+
entry_type entry;
190+
entryAcc.template get<entry_type, codomain_type>(bin, entry);
191+
192+
const bool stay = remainder < decoder::template getStayProb<scalar_type>(entry.packedWord);
193+
194+
cache.pdf = entry.ownPdf;
195+
codomain_type result = bin;
196+
if (!stay)
197+
{
198+
const codomain_type target = codomain_type(decoder::getTarget(entry.packedWord));
199+
pdfAcc.template get<scalar_type, codomain_type>(target, cache.pdf);
200+
result = target;
201+
}
98202
return result;
99203
}
100204

@@ -111,7 +215,7 @@ struct AliasTable
111215
density_type backwardPdf(const codomain_type v) NBL_CONST_MEMBER_FUNC
112216
{
113217
scalar_type pdf;
114-
pdfAccessor.template get<scalar_type, codomain_type>(v, pdf);
218+
pdfAcc.template get<scalar_type, codomain_type>(v, pdf);
115219
return pdf;
116220
}
117221

@@ -120,9 +224,8 @@ struct AliasTable
120224
return backwardPdf(v);
121225
}
122226

123-
ProbabilityAccessor probAccessor;
124-
AliasIndexAccessor aliasAccessor;
125-
PdfAccessor pdfAccessor;
227+
EntryAccessor entryAcc;
228+
PdfAccessor pdfAcc;
126229
scalar_type tableSizeMinusUlp;
127230
};
128231

0 commit comments

Comments
 (0)