Skip to content

Commit c31617f

Browse files
there was a bug in decode_before_scramble_helper
fix make `OwenSampler` be able to generate dimensions in parallel
1 parent e11b118 commit c31617f

3 files changed

Lines changed: 82 additions & 82 deletions

File tree

include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ struct decode_before_scramble_helper
7979
{
8080
uvec_type seqVal;
8181
NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
82-
seqVal[i] = val.get(i);
82+
seqVal[i] = val.get(i) << Q::DiscardBits; // restore high bits
8383
seqVal ^= scrambleKey;
8484
return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(storage_scalar_type)> >(UNormConstant);
8585
}
@@ -122,6 +122,7 @@ struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T
122122
using store_type = T;
123123
using scalar_type = typename vector_traits<T>::scalar_type;
124124
NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<store_type>;
125+
NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = uint16_t(0u);
125126
NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = uint16_t(1u);
126127

127128
static this_t create(const store_type value)
@@ -223,6 +224,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
223224
using store_type = T;
224225
using scalar_type = typename vector_traits<T>::scalar_type;
225226
NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<scalar_type>;
227+
NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = uint16_t(0u);
226228
NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
227229

228230
static this_t create(const store_type value)
@@ -288,11 +290,11 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint32_t2> && Di
288290
else if (idx == 1) // y
289291
{
290292
scalar_type y = glsl::bitfieldExtract(data[0], BitsPerComponent, DiscardBits);
291-
y |= glsl::bitfieldExtract(data[1], 0u, DiscardBits - 1u) << DiscardBits;
293+
y |= glsl::bitfieldExtract(data[1], 0u, BitsPerComponent - DiscardBits) << DiscardBits;
292294
return y;
293295
}
294296
else // z
295-
return glsl::bitfieldExtract(data[1], DiscardBits - 1u, BitsPerComponent);
297+
return glsl::bitfieldExtract(data[1], BitsPerComponent - DiscardBits, BitsPerComponent);
296298
}
297299

298300
void set(const uint16_t idx, const scalar_type value)
@@ -304,10 +306,10 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint32_t2> && Di
304306
else if (idx == 1) // y
305307
{
306308
data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent, DiscardBits);
307-
data[1] = glsl::bitfieldInsert(data[1], trunc_val >> DiscardBits, 0u, DiscardBits - 1u);
309+
data[1] = glsl::bitfieldInsert(data[1], trunc_val >> DiscardBits, 0u, BitsPerComponent - DiscardBits);
308310
}
309311
else // z
310-
data[1] = glsl::bitfieldInsert(data[1], trunc_val, DiscardBits - 1u, BitsPerComponent);
312+
data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent - DiscardBits, BitsPerComponent);
311313
}
312314

313315
template<typename F, bool FullWidth>
@@ -317,14 +319,14 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint32_t2> && Di
317319
}
318320

319321
template<typename F>
320-
vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
322+
vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey) NBL_CONST_MEMBER_FUNC
321323
{
322324
impl::decode_before_scramble_helper<this_t,F> helper;
323325
helper.val.data = data;
324326
return helper(scrambleKey);
325327
}
326328
template<typename F>
327-
vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
329+
vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey) NBL_CONST_MEMBER_FUNC
328330
{
329331
impl::decode_after_scramble_helper<this_t,F> helper;
330332
helper.val.data = data;

include/nbl/core/sampling/OwenSampler.h

Lines changed: 72 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -12,92 +12,90 @@ namespace nbl::core
1212

1313
//! TODO: make the tree sampler/generator configurable and let RandomSampler be default
1414
template<class SequenceSampler=SobolSampler>
15-
class OwenSampler : protected SequenceSampler
15+
class OwenSampler final : protected SequenceSampler
1616
{
17+
// if we don't limit the sample count, then due to IEEE754 precision, we'll get duplicate sample coordinate values, ruining the net property
18+
constexpr static inline uint32_t OUT_BITS = sizeof(uint32_t)*8u;
19+
constexpr static inline uint32_t MAX_SAMPLES_LOG2 = 24u;
20+
constexpr static inline uint32_t MAX_SAMPLES = 0x1u<<MAX_SAMPLES_LOG2;
21+
1722
public:
18-
OwenSampler(uint32_t _dimensions, uint32_t _seed) : SequenceSampler(_dimensions)
19-
{
20-
mersenneTwister.seed(_seed);
21-
cachedFlip.resize(MAX_SAMPLES-1u);
22-
resetDimensionCounter(0u);
23-
}
24-
~OwenSampler()
25-
{
26-
}
23+
inline OwenSampler(uint32_t _dimensions, uint32_t _seed) : SequenceSampler(_dimensions), seed(_seed) {}
24+
inline ~OwenSampler() = default;
2725

28-
//
29-
inline uint32_t sample(uint32_t dim, uint32_t sampleNum)
26+
struct SDimensionSampler final : public core::Unmovable
3027
{
31-
if (dim>lastDim)
32-
resetDimensionCounter(dim);
33-
else if (dim<lastDim)
34-
assert(false);
35-
36-
uint32_t oldsample = SequenceSampler::sample(dim,sampleNum);
37-
#ifdef _NBL_DEBUG
38-
assert(sampleNum<MAX_SAMPLES);
39-
if (sampleNum)
40-
assert((oldsample&(0x7fffffffu>>hlsl::findMSB(sampleNum))) == 0u);
41-
else
42-
assert(oldsample == 0u);
43-
#endif
44-
constexpr uint32_t lastLevelStart = MAX_SAMPLES/2u-1u;
45-
uint32_t index = oldsample>>(OUT_BITS+1u - MAX_SAMPLES_LOG2);
46-
index += lastLevelStart;
28+
inline uint32_t sample(uint32_t sampleNum) const
29+
{
30+
const uint32_t oldsample = sampler.sample(dimension,sampleNum);
31+
#ifdef _NBL_DEBUG
32+
assert(sampleNum<MAX_SAMPLES);
33+
if (sampleNum)
34+
assert((oldsample&(0x7fffffffu>>hlsl::findMSB(sampleNum))) == 0u);
35+
else
36+
assert(oldsample == 0u);
37+
#endif
38+
constexpr uint32_t lastLevelStart = MAX_SAMPLES/2u-1u;
39+
uint32_t index = oldsample>>(OUT_BITS+1u - MAX_SAMPLES_LOG2);
40+
index += lastLevelStart;
4741

48-
return oldsample^cachedFlip[index];
49-
}
42+
return oldsample^cachedFlip[index];
43+
}
5044

51-
//!
52-
inline void resetDimensionCounter(uint32_t dimension)
53-
{
54-
/** NOTES:
55-
- For 64k samples, we can store their positions in uint16_t
56-
- The last leves of Owen Tree can be collapsed to a single node (because trailing bits are always 00000.....)
57-
- The above can be stored in 1x array of sample count uint16_t/uint32_t per Dimension
58-
- We should store samples as uint32_t always because the total amount of memory to fetch is always the same
59-
**/
60-
for (uint32_t i=0u; i<MAX_SAMPLES-1u; i++)
61-
{
62-
uint32_t randMask = (i<(MAX_SAMPLES/2u-1u)) ? 0x80000000u:0xffffffffu;
63-
cachedFlip[i] = mersenneTwister()&(randMask>>getTreeDepth(i));
64-
}
65-
for (uint32_t i=1u; i<MAX_SAMPLES_LOG2; i++)
66-
{
67-
uint32_t previousLevelStart = (0x1u<<(i-1u))-1u;
68-
uint32_t currentLevelStart = (0x1u<<i)-1u;
69-
uint32_t currentLevelSize = 0x1u<<i;
70-
for (uint32_t j=0u; j<currentLevelSize; j++)
71-
cachedFlip[currentLevelStart+j] |= cachedFlip[previousLevelStart+(j>>1u)];
72-
#ifdef _NBL_DEBUG
73-
for (uint32_t j=0u; j<currentLevelSize; j+=2)
45+
private:
46+
friend class OwenSampler;
47+
inline SDimensionSampler(const SequenceSampler& _sampler, const uint32_t seed, const uint32_t _dimension) : sampler(_sampler), dimension(_dimension),
48+
mersenneTwister(std::hash<uint64_t>()((uint64_t(_dimension)<<32)|seed))
7449
{
75-
const uint32_t highBitMask = 0xffffffffu<<(OUT_BITS-i);
76-
uint32_t left = cachedFlip[currentLevelStart+j];
77-
uint32_t right = cachedFlip[currentLevelStart+j+1];
78-
assert(((left^right)&highBitMask)==0u);
79-
assert((left&right&highBitMask)==cachedFlip[previousLevelStart+(j>>1u)]);
50+
cachedFlip.resize(MAX_SAMPLES-1u);
51+
/** NOTES:
52+
- For 64k samples, we can store their positions in uint16_t
53+
- The last leves of Owen Tree can be collapsed to a single node (because trailing bits are always 00000.....)
54+
- The above can be stored in 1x array of sample count uint16_t/uint32_t per Dimension
55+
- We should store samples as uint32_t always because the total amount of memory to fetch is always the same
56+
**/
57+
for (uint32_t i=0u; i<MAX_SAMPLES-1u; i++)
58+
{
59+
uint32_t randMask = (i<(MAX_SAMPLES/2u-1u)) ? 0x80000000u:0xffffffffu;
60+
cachedFlip[i] = mersenneTwister()&(randMask>>getTreeDepth(i));
61+
}
62+
for (uint32_t i=1u; i<MAX_SAMPLES_LOG2; i++)
63+
{
64+
uint32_t previousLevelStart = (0x1u<<(i-1u))-1u;
65+
uint32_t currentLevelStart = (0x1u<<i)-1u;
66+
uint32_t currentLevelSize = 0x1u<<i;
67+
for (uint32_t j=0u; j<currentLevelSize; j++)
68+
cachedFlip[currentLevelStart+j] |= cachedFlip[previousLevelStart+(j>>1u)];
69+
#ifdef _NBL_DEBUG
70+
for (uint32_t j=0u; j<currentLevelSize; j+=2)
71+
{
72+
const uint32_t highBitMask = 0xffffffffu<<(OUT_BITS-i);
73+
uint32_t left = cachedFlip[currentLevelStart+j];
74+
uint32_t right = cachedFlip[currentLevelStart+j+1];
75+
assert(((left^right)&highBitMask)==0u);
76+
assert((left&right&highBitMask)==cachedFlip[previousLevelStart+(j>>1u)]);
77+
}
78+
#endif
79+
}
80+
}
81+
inline uint32_t getTreeDepth(uint32_t sampleNum)
82+
{
83+
return hlsl::findMSB(sampleNum+1u);
8084
}
81-
#endif
82-
}
83-
lastDim = dimension;
84-
}
85-
86-
protected:
87-
// if we don't limit the sample count, then due to IEEE754 precision, we'll get duplicate sample coordinate values, ruining the net property
88-
_NBL_STATIC_INLINE_CONSTEXPR uint32_t OUT_BITS = sizeof(uint32_t)*8u;
89-
_NBL_STATIC_INLINE_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 24u;
90-
_NBL_STATIC_INLINE_CONSTEXPR uint32_t MAX_SAMPLES = 0x1u<<MAX_SAMPLES_LOG2;
9185

92-
inline uint32_t getTreeDepth(uint32_t sampleNum)
86+
const SequenceSampler& sampler;
87+
std::mt19937 mersenneTwister;
88+
core::vector<uint32_t> cachedFlip;
89+
const uint32_t dimension;
90+
};
91+
inline SDimensionSampler prepareDimension(const uint64_t dim) const
9392
{
94-
return hlsl::findMSB(sampleNum+1u);
93+
return SDimensionSampler(*this,seed,dim);
9594
}
9695

97-
std::mt19937 mersenneTwister;
98-
uint32_t lastDim;
99-
core::vector<uint32_t> cachedFlip;
100-
};
96+
private:
97+
uint32_t seed;
98+
};
10199

102100

103101
}

include/nbl/core/sampling/SobolSampler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class SobolSampler
7777
}
7878

7979
// Idea for optimization, do PoT samples per pass, then can precompute most of the `retval`
80-
inline uint32_t sample(uint32_t dim, uint32_t sampleNum)
80+
inline uint32_t sample(uint32_t dim, uint32_t sampleNum) const
8181
{
8282
#ifdef _DEBUG
8383
assert(dim<dimensions);

0 commit comments

Comments
 (0)