diff --git a/cmake/common.cmake b/cmake/common.cmake index e10facab0c..b004e528f3 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1191,7 +1191,7 @@ option(NSC_DEBUG_EDIF_LINE_BIT "Add \"-fspv-debug=line\" to NSC Debug CLI" OFF) option(NSC_DEBUG_EDIF_TOOL_BIT "Add \"-fspv-debug=tool\" to NSC Debug CLI" ON) option(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT "Add \"-fspv-debug=vulkan-with-source\" to NSC Debug CLI" OFF) option(NSC_USE_DEPFILE "Generate depfiles for NSC custom commands" ON) -option(NBL_DISABLE_EXPERIMENTAL_OPTS "Disable -O1experimental for all NSC compile rules globally" OFF) +option(NBL_DISABLE_EXPERIMENTAL_OPTS "Disable -O1experimental for all NSC compile rules globally" ON) # Until Arek can make sure all Unit Tests still work for samplers and BxDFs function(NBL_CREATE_NSC_COMPILE_RULES) set(COMMENT "this code has been autogenerated with Nabla CMake NBL_CREATE_HLSL_COMPILE_RULES utility") diff --git a/examples_tests b/examples_tests index 5481e4e50a..895aeab82d 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5481e4e50afa85ee384a35afbf89a294d5b55d60 +Subproject commit 895aeab82dc3fa6dfa823b916efa0f286ffeca8a diff --git a/include/nbl/builtin/hlsl/path_tracing/gaussian_filter.hlsl b/include/nbl/builtin/hlsl/path_tracing/gaussian_filter.hlsl index 6e27749405..22f344b81e 100644 --- a/include/nbl/builtin/hlsl/path_tracing/gaussian_filter.hlsl +++ b/include/nbl/builtin/hlsl/path_tracing/gaussian_filter.hlsl @@ -21,7 +21,7 @@ struct GaussianFilter static this_t create(const scalar_type gaussianFilterCutoff, const scalar_type stddev) { this_t retval; - retval.truncation = hlsl::exp(-0.5 * gaussianFilterCutoff * gaussianFilterCutoff); + retval.truncation = hlsl::exp(-scalar_type(0.5) * gaussianFilterCutoff * gaussianFilterCutoff); retval.boxMuller.stddev = stddev; return retval; } @@ -29,7 +29,7 @@ struct GaussianFilter vector2_type sample(const vector2_type randVec) { vector2_type remappedRand = randVec; - remappedRand.x *= 1.0 - truncation; + remappedRand.x *= scalar_type(1) - truncation; remappedRand.x += truncation; return boxMuller(remappedRand); } diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index cdd87ee4dc..2ef431ef62 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -24,8 +24,8 @@ struct BoxMullerTransform vector2_type operator()(const vector2_type xi) { scalar_type sinPhi, cosPhi; - math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); - return vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev; + math::sincos(scalar_type(2) * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); + return vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(scalar_type(-2) * nbl::hlsl::log(xi.x)) * stddev; } vector2_type backwardPdf(const vector2_type outPos) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 64573ac85f..db575e0250 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -79,7 +79,7 @@ struct decode_before_scramble_helper { uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) - seqVal[i] = val.get(i); + seqVal[i] = val.get(i) << Q::DiscardBits; // restore high bits seqVal ^= scrambleKey; return return_type(seqVal) * bit_cast >(UNormConstant); } @@ -122,6 +122,7 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = uint16_t(0u); NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = uint16_t(1u); static this_t create(const store_type value) @@ -223,6 +224,7 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = uint16_t(0u); NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; static this_t create(const store_type value) @@ -288,11 +290,11 @@ struct QuantizedSequence && Di else if (idx == 1) // y { scalar_type y = glsl::bitfieldExtract(data[0], BitsPerComponent, DiscardBits); - y |= glsl::bitfieldExtract(data[1], 0u, DiscardBits - 1u) << DiscardBits; + y |= glsl::bitfieldExtract(data[1], 0u, BitsPerComponent - DiscardBits) << DiscardBits; return y; } else // z - return glsl::bitfieldExtract(data[1], DiscardBits - 1u, BitsPerComponent); + return glsl::bitfieldExtract(data[1], BitsPerComponent - DiscardBits, BitsPerComponent); } void set(const uint16_t idx, const scalar_type value) @@ -304,10 +306,10 @@ struct QuantizedSequence && Di else if (idx == 1) // y { data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent, DiscardBits); - data[1] = glsl::bitfieldInsert(data[1], trunc_val >> DiscardBits, 0u, DiscardBits - 1u); + data[1] = glsl::bitfieldInsert(data[1], trunc_val >> DiscardBits, 0u, BitsPerComponent - DiscardBits); } else // z - data[1] = glsl::bitfieldInsert(data[1], trunc_val, DiscardBits - 1u, BitsPerComponent); + data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent - DiscardBits, BitsPerComponent); } template @@ -317,14 +319,14 @@ struct QuantizedSequence && Di } template - vector decode(const vector,Dimension> scrambleKey) + vector decode(const vector,Dimension> scrambleKey) NBL_CONST_MEMBER_FUNC { impl::decode_before_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } template - vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) NBL_CONST_MEMBER_FUNC { impl::decode_after_scramble_helper helper; helper.val.data = data; diff --git a/include/nbl/core/sampling/OwenSampler.h b/include/nbl/core/sampling/OwenSampler.h index b218647314..7f35b015e3 100644 --- a/include/nbl/core/sampling/OwenSampler.h +++ b/include/nbl/core/sampling/OwenSampler.h @@ -12,92 +12,90 @@ namespace nbl::core //! TODO: make the tree sampler/generator configurable and let RandomSampler be default template -class OwenSampler : protected SequenceSampler +class OwenSampler final : protected SequenceSampler { + // if we don't limit the sample count, then due to IEEE754 precision, we'll get duplicate sample coordinate values, ruining the net property + constexpr static inline uint32_t OUT_BITS = sizeof(uint32_t)*8u; + constexpr static inline uint32_t MAX_SAMPLES_LOG2 = 24u; + constexpr static inline uint32_t MAX_SAMPLES = 0x1u<lastDim) - resetDimensionCounter(dim); - else if (dim>hlsl::findMSB(sampleNum))) == 0u); - else - assert(oldsample == 0u); - #endif - constexpr uint32_t lastLevelStart = MAX_SAMPLES/2u-1u; - uint32_t index = oldsample>>(OUT_BITS+1u - MAX_SAMPLES_LOG2); - index += lastLevelStart; + inline uint32_t sample(uint32_t sampleNum) const + { + const uint32_t oldsample = sampler.sample(dimension,sampleNum); + #ifdef _NBL_DEBUG + assert(sampleNum>hlsl::findMSB(sampleNum))) == 0u); + else + assert(oldsample == 0u); + #endif + constexpr uint32_t lastLevelStart = MAX_SAMPLES/2u-1u; + uint32_t index = oldsample>>(OUT_BITS+1u - MAX_SAMPLES_LOG2); + index += lastLevelStart; - return oldsample^cachedFlip[index]; - } + return oldsample^cachedFlip[index]; + } - //! - inline void resetDimensionCounter(uint32_t dimension) - { - /** NOTES: - - For 64k samples, we can store their positions in uint16_t - - The last leves of Owen Tree can be collapsed to a single node (because trailing bits are always 00000.....) - - The above can be stored in 1x array of sample count uint16_t/uint32_t per Dimension - - We should store samples as uint32_t always because the total amount of memory to fetch is always the same - **/ - for (uint32_t i=0u; i>getTreeDepth(i)); - } - for (uint32_t i=1u; i>1u)]; - #ifdef _NBL_DEBUG - for (uint32_t j=0u; j()((uint64_t(_dimension)<<32)|seed)) { - const uint32_t highBitMask = 0xffffffffu<<(OUT_BITS-i); - uint32_t left = cachedFlip[currentLevelStart+j]; - uint32_t right = cachedFlip[currentLevelStart+j+1]; - assert(((left^right)&highBitMask)==0u); - assert((left&right&highBitMask)==cachedFlip[previousLevelStart+(j>>1u)]); + cachedFlip.resize(MAX_SAMPLES-1u); + /** NOTES: + - For 64k samples, we can store their positions in uint16_t + - The last leves of Owen Tree can be collapsed to a single node (because trailing bits are always 00000.....) + - The above can be stored in 1x array of sample count uint16_t/uint32_t per Dimension + - We should store samples as uint32_t always because the total amount of memory to fetch is always the same + **/ + for (uint32_t i=0u; i>getTreeDepth(i)); + } + for (uint32_t i=1u; i>1u)]; + #ifdef _NBL_DEBUG + for (uint32_t j=0u; j>1u)]); + } + #endif + } + } + inline uint32_t getTreeDepth(uint32_t sampleNum) + { + return hlsl::findMSB(sampleNum+1u); } - #endif - } - lastDim = dimension; - } - - protected: - // if we don't limit the sample count, then due to IEEE754 precision, we'll get duplicate sample coordinate values, ruining the net property - _NBL_STATIC_INLINE_CONSTEXPR uint32_t OUT_BITS = sizeof(uint32_t)*8u; - _NBL_STATIC_INLINE_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 24u; - _NBL_STATIC_INLINE_CONSTEXPR uint32_t MAX_SAMPLES = 0x1u< cachedFlip; + const uint32_t dimension; + }; + inline SDimensionSampler prepareDimension(const uint64_t dim) const { - return hlsl::findMSB(sampleNum+1u); + return SDimensionSampler(*this,seed,dim); } - std::mt19937 mersenneTwister; - uint32_t lastDim; - core::vector cachedFlip; - }; + private: + uint32_t seed; +}; } diff --git a/include/nbl/core/sampling/SobolSampler.h b/include/nbl/core/sampling/SobolSampler.h index 8d0c65e431..31d397168b 100644 --- a/include/nbl/core/sampling/SobolSampler.h +++ b/include/nbl/core/sampling/SobolSampler.h @@ -77,7 +77,7 @@ class SobolSampler } // Idea for optimization, do PoT samples per pass, then can precompute most of the `retval` - inline uint32_t sample(uint32_t dim, uint32_t sampleNum) + inline uint32_t sample(uint32_t dim, uint32_t sampleNum) const { #ifdef _DEBUG assert(dim(m_imContextBackPointer); + ImGuiContext* const previousContext = ImGui::GetCurrentContext(); + if (context && previousContext != context) + ImGui::SetCurrentContext(context); // we must call it to unlock atlas from potential "render" state before we kill it (obvsly if its ours!) - if(m_imFontAtlasBackPointer) + if (m_imFontAtlasBackPointer && context && context->WithinFrameScope) ImGui::EndFrame(); // context belongs to the instance, we must free it - if(m_imContextBackPointer) - ImGui::DestroyContext(reinterpret_cast(m_imContextBackPointer)); + if (context) + ImGui::DestroyContext(context); + if (previousContext && previousContext != context) + ImGui::SetCurrentContext(previousContext); // and if we own the atlas we must free it as well, if user passed its own at creation time then its "shared" - at this point m_imFontAtlasBackPointer is nullptr and we don't free anything if (m_imFontAtlasBackPointer)