77
88#include <nbl/builtin/hlsl/cpp_compat.hlsl>
99#include <nbl/builtin/hlsl/bit.hlsl>
10+ #include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
1011#include <nbl/builtin/hlsl/concepts/core.hlsl>
1112#include <nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl>
1213
@@ -17,84 +18,187 @@ namespace hlsl
1718namespace sampling
1819{
1920
20- // Alias Method (Vose/Walker) discrete sampler.
21- //
22- // Samples a discrete index in [0, N) with probability proportional to
23- // precomputed weights in O(1) time per sample, using a prebuilt alias table.
24- //
25- // Accessor template parameters must satisfy GenericReadAccessor:
26- // accessor.template get<V, I>(index, outVal) // void, writes to outVal
27- //
28- // - ProbabilityAccessor: reads scalar_type threshold in [0, 1] for bin i
29- // - AliasIndexAccessor: reads uint32_t redirect index for bin i
30- // - PdfAccessor: reads scalar_type weight[i] / totalWeight
31- //
32- // Satisfies TractableSampler (not BackwardTractableSampler: the mapping is discrete).
33- // The cache stores the PDF value looked up during generate, avoiding redundant
34- // storage of the codomain (sampled index) which is already the return value.
35- template<typename T, typename Domain, typename Codomain, typename ProbabilityAccessor, typename AliasIndexAccessor, typename PdfAccessor
21+ // Packed alias-entry bit layout shared by every packed variant. One 32-bit
22+ // word holds the redirect index in the low Log2N bits and the stay-
23+ // probability quantized as an unorm in the high (32 - Log2N) bits.
24+ // u * N = scaled; bin = floor(scaled); remainder = scaled - bin
25+ // if (remainder < getStayProb(word)) -> result = bin
26+ // else -> result = getTarget(word)
27+ // Quantizing the threshold to (32 - Log2N) bits is precision-neutral: `u`
28+ // already consumed Log2N bits of randomness producing `bin`, so `remainder`
29+ // carries exactly that many bits of discriminatory power.
30+ namespace impl
31+ {
32+ template<uint32_t Log2N>
33+ struct AliasBitDecoder
34+ {
35+ static uint32_t getTarget (uint32_t word)
36+ {
37+ return word & ((1u << Log2N) - 1u);
38+ }
39+ template<typename T>
40+ static T getStayProb (uint32_t word)
41+ {
42+ const uint32_t unormMax = (~0u) >> Log2N;
43+ return T (word >> Log2N) / T (unormMax);
44+ }
45+ };
46+ } // namespace impl
47+
48+ // 8 B entry used by the NBig == true variant. Embeds the bin's own pdf
49+ // alongside the packed word so the common stay-case needs no extra tap.
50+ template<typename T>
51+ struct PackedAliasEntryB
52+ {
53+ uint32_t packedWord; // low Log2N: redirect target; high 32-Log2N: stayProb unorm
54+ T ownPdf; // pdf of this bin
55+ };
56+
57+
58+ // NBig == false: 4 B packed word per bin + separate pdf[] array. Per sample
59+ // = one 4 B word load + one unconditional 4 B pdf[] tap indexed by the
60+ // selected bin (either the current bin or its redirect). Total 8 B whether
61+ // the sample stays or aliases. Favours small N.
62+ template<typename T, typename Domain, typename Codomain, typename PackedWordAccessor, typename PdfAccessor, uint32_t Log2N
3663 NBL_PRIMARY_REQUIRES (
3764 concepts::UnsignedIntegralScalar<Codomain> &&
38- concepts::accessors::GenericReadAccessor<ProbabilityAccessor, T, Codomain> &&
39- concepts::accessors::GenericReadAccessor<AliasIndexAccessor, Codomain, Codomain> &&
65+ concepts::accessors::GenericReadAccessor<PackedWordAccessor, uint32_t, Codomain> &&
4066 concepts::accessors::GenericReadAccessor<PdfAccessor, T, Codomain>)
41- struct AliasTable
67+ struct PackedAliasTableA
4268{
4369 using scalar_type = T;
44-
4570 using domain_type = Domain;
4671 using codomain_type = Codomain;
4772 using density_type = scalar_type;
4873 using weight_type = density_type;
74+ using decoder = impl::AliasBitDecoder<Log2N>;
75+ NBL_CONSTEXPR_STATIC_INLINE bool NBig = false ;
4976
5077 struct cache_type
5178 {
5279 density_type pdf;
5380 };
5481
55- static AliasTable create (NBL_CONST_REF_ARG (ProbabilityAccessor) _probAccessor , NBL_CONST_REF_ARG (AliasIndexAccessor) _aliasAccessor, NBL_CONST_REF_ARG ( PdfAccessor) _pdfAccessor , codomain_type _size)
82+ static PackedAliasTableA create (NBL_CONST_REF_ARG (PackedWordAccessor) _entryAcc , NBL_CONST_REF_ARG (PdfAccessor) _pdfAcc , codomain_type _size)
5683 {
57- AliasTable retval;
58- retval.probAccessor = _probAccessor;
59- retval.aliasAccessor = _aliasAccessor;
60- retval.pdfAccessor = _pdfAccessor;
61- // Precompute tableSize as float minus 1 ULP so that u=1.0 maps to bin N-1
84+ PackedAliasTableA retval;
85+ retval.entryAcc = _entryAcc;
86+ retval.pdfAcc = _pdfAcc;
6287 const scalar_type exact = scalar_type (_size);
6388 retval.tableSizeMinusUlp = nbl::hlsl::bit_cast<scalar_type>(nbl::hlsl::bit_cast<uint32_t>(exact) - 1u);
6489 return retval;
6590 }
6691
67- // BasicSampler interface
6892 codomain_type generate (const domain_type u) NBL_CONST_MEMBER_FUNC
6993 {
7094 const scalar_type scaled = u * tableSizeMinusUlp;
7195 const codomain_type bin = _static_cast<codomain_type>(scaled);
7296 const scalar_type remainder = scaled - scalar_type (bin);
7397
74- scalar_type prob;
75- probAccessor.template get<scalar_type, codomain_type>(bin, prob);
76-
77- // Use if-statement to avoid select: aliasIndex is a dependent read
78- codomain_type result;
79- if (remainder < prob)
80- {
81- result = bin;
82- }
83- else
84- {
85- codomain_type alias;
86- aliasAccessor.template get<codomain_type, codomain_type>(bin, alias);
87- result = alias;
88- }
98+ uint32_t packedWord;
99+ entryAcc.template get<uint32_t, codomain_type>(bin, packedWord);
100+ return hlsl::select (remainder < decoder::template getStayProb<scalar_type>(packedWord), bin, codomain_type (decoder::getTarget (packedWord)));
101+ }
89102
103+ codomain_type generate (const domain_type u, NBL_REF_ARG (cache_type) cache) NBL_CONST_MEMBER_FUNC
104+ {
105+ const codomain_type result = generate (u);
106+ pdfAcc.template get<scalar_type, codomain_type>(result, cache.pdf);
90107 return result;
91108 }
92109
93- // TractableSampler interface
110+ density_type forwardPdf (const domain_type u, NBL_CONST_REF_ARG (cache_type) cache) NBL_CONST_MEMBER_FUNC
111+ {
112+ return cache.pdf;
113+ }
114+
115+ weight_type forwardWeight (const domain_type u, NBL_CONST_REF_ARG (cache_type) cache) NBL_CONST_MEMBER_FUNC
116+ {
117+ return cache.pdf;
118+ }
119+
120+ density_type backwardPdf (const codomain_type v) NBL_CONST_MEMBER_FUNC
121+ {
122+ scalar_type pdf;
123+ pdfAcc.template get<scalar_type, codomain_type>(v, pdf);
124+ return pdf;
125+ }
126+
127+ weight_type backwardWeight (const codomain_type v) NBL_CONST_MEMBER_FUNC
128+ {
129+ return backwardPdf (v);
130+ }
131+
132+ PackedWordAccessor entryAcc;
133+ PdfAccessor pdfAcc;
134+ scalar_type tableSizeMinusUlp;
135+ };
136+
137+ // NBig == true: 8 B entry {packedWord, ownPdf} + separate pdf[] array. Per
138+ // sample = one 8 B entry load (covers the common stay case where cache
139+ // already has ownPdf). If the sample aliases, a conditional 4 B pdf[target]
140+ // tap fills the cache. Total 8 B stay, 12 B aliased. Favours large N.
141+ template<typename T, typename Domain, typename Codomain, typename EntryAccessor, typename PdfAccessor, uint32_t Log2N
142+ NBL_PRIMARY_REQUIRES (
143+ concepts::UnsignedIntegralScalar<Codomain> &&
144+ concepts::accessors::GenericReadAccessor<EntryAccessor, PackedAliasEntryB<T>, Codomain> &&
145+ concepts::accessors::GenericReadAccessor<PdfAccessor, T, Codomain>)
146+ struct PackedAliasTableB
147+ {
148+ using scalar_type = T;
149+ using domain_type = Domain;
150+ using codomain_type = Codomain;
151+ using density_type = scalar_type;
152+ using weight_type = density_type;
153+ using entry_type = PackedAliasEntryB<scalar_type>;
154+ using decoder = impl::AliasBitDecoder<Log2N>;
155+ NBL_CONSTEXPR_STATIC_INLINE bool NBig = true ;
156+
157+ struct cache_type
158+ {
159+ density_type pdf;
160+ };
161+
162+ static PackedAliasTableB create (NBL_CONST_REF_ARG (EntryAccessor) _entryAcc, NBL_CONST_REF_ARG (PdfAccessor) _pdfAcc, codomain_type _size)
163+ {
164+ PackedAliasTableB retval;
165+ retval.entryAcc = _entryAcc;
166+ retval.pdfAcc = _pdfAcc;
167+ const scalar_type exact = scalar_type (_size);
168+ retval.tableSizeMinusUlp = nbl::hlsl::bit_cast<scalar_type>(nbl::hlsl::bit_cast<uint32_t>(exact) - 1u);
169+ return retval;
170+ }
171+
172+ codomain_type generate (const domain_type u) NBL_CONST_MEMBER_FUNC
173+ {
174+ const scalar_type scaled = u * tableSizeMinusUlp;
175+ const codomain_type bin = _static_cast<codomain_type>(scaled);
176+ const scalar_type remainder = scaled - scalar_type (bin);
177+
178+ entry_type entry;
179+ entryAcc.template get<entry_type, codomain_type>(bin, entry);
180+ return hlsl::select (remainder < decoder::template getStayProb<scalar_type>(entry.packedWord), bin, codomain_type (decoder::getTarget (entry.packedWord)));
181+ }
182+
94183 codomain_type generate (const domain_type u, NBL_REF_ARG (cache_type) cache) NBL_CONST_MEMBER_FUNC
95184 {
96- const codomain_type result = generate (u);
97- pdfAccessor.template get<scalar_type, codomain_type>(result, cache.pdf);
185+ const scalar_type scaled = u * tableSizeMinusUlp;
186+ const codomain_type bin = _static_cast<codomain_type>(scaled);
187+ const scalar_type remainder = scaled - scalar_type (bin);
188+
189+ entry_type entry;
190+ entryAcc.template get<entry_type, codomain_type>(bin, entry);
191+
192+ const bool stay = remainder < decoder::template getStayProb<scalar_type>(entry.packedWord);
193+
194+ cache.pdf = entry.ownPdf;
195+ codomain_type result = bin;
196+ if (!stay)
197+ {
198+ const codomain_type target = codomain_type (decoder::getTarget (entry.packedWord));
199+ pdfAcc.template get<scalar_type, codomain_type>(target, cache.pdf);
200+ result = target;
201+ }
98202 return result;
99203 }
100204
@@ -111,7 +215,7 @@ struct AliasTable
111215 density_type backwardPdf (const codomain_type v) NBL_CONST_MEMBER_FUNC
112216 {
113217 scalar_type pdf;
114- pdfAccessor .template get<scalar_type, codomain_type>(v, pdf);
218+ pdfAcc .template get<scalar_type, codomain_type>(v, pdf);
115219 return pdf;
116220 }
117221
@@ -120,9 +224,8 @@ struct AliasTable
120224 return backwardPdf (v);
121225 }
122226
123- ProbabilityAccessor probAccessor;
124- AliasIndexAccessor aliasAccessor;
125- PdfAccessor pdfAccessor;
227+ EntryAccessor entryAcc;
228+ PdfAccessor pdfAcc;
126229 scalar_type tableSizeMinusUlp;
127230};
128231
0 commit comments