Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1191,7 +1191,7 @@ option(NSC_DEBUG_EDIF_LINE_BIT "Add \"-fspv-debug=line\" to NSC Debug CLI" OFF)
option(NSC_DEBUG_EDIF_TOOL_BIT "Add \"-fspv-debug=tool\" to NSC Debug CLI" ON)
option(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT "Add \"-fspv-debug=vulkan-with-source\" to NSC Debug CLI" OFF)
option(NSC_USE_DEPFILE "Generate depfiles for NSC custom commands" ON)
option(NBL_DISABLE_EXPERIMENTAL_OPTS "Disable -O1experimental for all NSC compile rules globally" OFF)
option(NBL_DISABLE_EXPERIMENTAL_OPTS "Disable -O1experimental for all NSC compile rules globally" ON) # Until Arek can make sure all Unit Tests still work for samplers and BxDFs

function(NBL_CREATE_NSC_COMPILE_RULES)
set(COMMENT "this code has been autogenerated with Nabla CMake NBL_CREATE_HLSL_COMPILE_RULES utility")
Expand Down
2 changes: 1 addition & 1 deletion examples_tests
Submodule examples_tests updated 44 files
+1 −1 .gitignore
+0 −38 30_ComputeShaderPathTracer/CMakeLists.txt
+0 −822 30_ComputeShaderPathTracer/app_resources/common.glsl
+0 −182 30_ComputeShaderPathTracer/app_resources/litByRectangle.comp
+0 −60 30_ComputeShaderPathTracer/app_resources/litBySphere.comp
+0 −105 30_ComputeShaderPathTracer/app_resources/litByTriangle.comp
+0 −19 30_ComputeShaderPathTracer/app_resources/present.frag.hlsl
+0 −28 30_ComputeShaderPathTracer/config.json.template
+0 −11 30_ComputeShaderPathTracer/include/nbl/this_example/common.hpp
+0 −1,304 30_ComputeShaderPathTracer/main.cpp
+0 −50 30_ComputeShaderPathTracer/pipeline.groovy
+6 −8 31_HLSLPathTracer/CMakeLists.txt
+7 −3 31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl
+5 −3 31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl
+0 −40 31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl
+12 −6 31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl
+0 −1 31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.linear.proxy.hlsl
+0 −1 31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.linear.proxy.hlsl
+0 −1 31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.linear.proxy.hlsl
+4 −21 31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.variant.shared.hlsl
+14 −5 31_HLSLPathTracer/include/nbl/this_example/render_variant_info.hpp
+417 −100 31_HLSLPathTracer/main.cpp
+0 −2 31_HLSLPathTracer/pt.variant_ids.cmake
+2 −0 40_PathTracer/CMakeLists.txt
+74 −7 40_PathTracer/app_resources/pathtrace/beauty.hlsl
+123 −0 40_PathTracer/app_resources/pathtrace/common.hlsl
+48 −82 40_PathTracer/app_resources/pathtrace/debug.hlsl
+20 −42 40_PathTracer/include/renderer/CRenderer.h
+3 −4 40_PathTracer/include/renderer/CSession.h
+70 −0 40_PathTracer/include/renderer/shaders/common.hlsl
+11 −4 40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl
+0 −45 40_PathTracer/include/renderer/shaders/pathtrace/rand_gen.hlsl
+10 −4 40_PathTracer/include/renderer/shaders/scene.hlsl
+40 −29 40_PathTracer/include/renderer/shaders/session.hlsl
+27 −27 40_PathTracer/main.cpp
+24 −120 40_PathTracer/src/renderer/CRenderer.cpp
+4 −4 40_PathTracer/src/renderer/CScene.cpp
+73 −55 40_PathTracer/src/renderer/CSession.cpp
+1 −1 40_PathTracer/src/renderer/present/CWindowPresenter.cpp
+0 −3 CMakeLists.txt
+1 −1 common/CMakeLists.txt
+198 −0 common/include/nbl/examples/common/CCachedOwenScrambledSequence.hpp
+45 −0 common/include/nbl/examples/common/KeyedQuantizedSequence.hlsl
+0 −130 common/include/nbl/examples/common/ScrambleSequence.hpp
4 changes: 2 additions & 2 deletions include/nbl/builtin/hlsl/path_tracing/gaussian_filter.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ struct GaussianFilter
static this_t create(const scalar_type gaussianFilterCutoff, const scalar_type stddev)
{
this_t retval;
retval.truncation = hlsl::exp(-0.5 * gaussianFilterCutoff * gaussianFilterCutoff);
retval.truncation = hlsl::exp(-scalar_type(0.5) * gaussianFilterCutoff * gaussianFilterCutoff);
retval.boxMuller.stddev = stddev;
return retval;
}

vector2_type sample(const vector2_type randVec)
{
vector2_type remappedRand = randVec;
remappedRand.x *= 1.0 - truncation;
remappedRand.x *= scalar_type(1) - truncation;
remappedRand.x += truncation;
return boxMuller(remappedRand);
}
Expand Down
4 changes: 2 additions & 2 deletions include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ struct BoxMullerTransform
vector2_type operator()(const vector2_type xi)
{
scalar_type sinPhi, cosPhi;
math::sincos<scalar_type>(2.0 * numbers::pi<scalar_type> * xi.y - numbers::pi<scalar_type>, sinPhi, cosPhi);
return vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev;
math::sincos<scalar_type>(scalar_type(2) * numbers::pi<scalar_type> * xi.y - numbers::pi<scalar_type>, sinPhi, cosPhi);
return vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(scalar_type(-2) * nbl::hlsl::log(xi.x)) * stddev;
}

vector2_type backwardPdf(const vector2_type outPos)
Expand Down
16 changes: 9 additions & 7 deletions include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ struct decode_before_scramble_helper
{
uvec_type seqVal;
NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
seqVal[i] = val.get(i);
seqVal[i] = val.get(i) << Q::DiscardBits; // restore high bits
seqVal ^= scrambleKey;
return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(storage_scalar_type)> >(UNormConstant);
}
Expand Down Expand Up @@ -122,6 +122,7 @@ struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T
using store_type = T;
using scalar_type = typename vector_traits<T>::scalar_type;
NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<store_type>;
NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = uint16_t(0u);
NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = uint16_t(1u);

static this_t create(const store_type value)
Expand Down Expand Up @@ -223,6 +224,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
using store_type = T;
using scalar_type = typename vector_traits<T>::scalar_type;
NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<scalar_type>;
NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = uint16_t(0u);
NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;

static this_t create(const store_type value)
Expand Down Expand Up @@ -288,11 +290,11 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint32_t2> && Di
else if (idx == 1) // y
{
scalar_type y = glsl::bitfieldExtract(data[0], BitsPerComponent, DiscardBits);
y |= glsl::bitfieldExtract(data[1], 0u, DiscardBits - 1u) << DiscardBits;
y |= glsl::bitfieldExtract(data[1], 0u, BitsPerComponent - DiscardBits) << DiscardBits;
return y;
}
else // z
return glsl::bitfieldExtract(data[1], DiscardBits - 1u, BitsPerComponent);
return glsl::bitfieldExtract(data[1], BitsPerComponent - DiscardBits, BitsPerComponent);
}

void set(const uint16_t idx, const scalar_type value)
Expand All @@ -304,10 +306,10 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint32_t2> && Di
else if (idx == 1) // y
{
data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent, DiscardBits);
data[1] = glsl::bitfieldInsert(data[1], trunc_val >> DiscardBits, 0u, DiscardBits - 1u);
data[1] = glsl::bitfieldInsert(data[1], trunc_val >> DiscardBits, 0u, BitsPerComponent - DiscardBits);
}
else // z
data[1] = glsl::bitfieldInsert(data[1], trunc_val, DiscardBits - 1u, BitsPerComponent);
data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent - DiscardBits, BitsPerComponent);
}

template<typename F, bool FullWidth>
Expand All @@ -317,14 +319,14 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint32_t2> && Di
}

template<typename F>
vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey) NBL_CONST_MEMBER_FUNC
{
impl::decode_before_scramble_helper<this_t,F> helper;
helper.val.data = data;
return helper(scrambleKey);
}
template<typename F>
vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey) NBL_CONST_MEMBER_FUNC
{
impl::decode_after_scramble_helper<this_t,F> helper;
helper.val.data = data;
Expand Down
146 changes: 72 additions & 74 deletions include/nbl/core/sampling/OwenSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,92 +12,90 @@ namespace nbl::core

//! TODO: make the tree sampler/generator configurable and let RandomSampler be default
template<class SequenceSampler=SobolSampler>
class OwenSampler : protected SequenceSampler
class OwenSampler final : protected SequenceSampler
{
// if we don't limit the sample count, then due to IEEE754 precision, we'll get duplicate sample coordinate values, ruining the net property
constexpr static inline uint32_t OUT_BITS = sizeof(uint32_t)*8u;
constexpr static inline uint32_t MAX_SAMPLES_LOG2 = 24u;
constexpr static inline uint32_t MAX_SAMPLES = 0x1u<<MAX_SAMPLES_LOG2;

public:
OwenSampler(uint32_t _dimensions, uint32_t _seed) : SequenceSampler(_dimensions)
{
mersenneTwister.seed(_seed);
cachedFlip.resize(MAX_SAMPLES-1u);
resetDimensionCounter(0u);
}
~OwenSampler()
{
}
inline OwenSampler(uint32_t _dimensions, uint32_t _seed) : SequenceSampler(_dimensions), seed(_seed) {}
inline ~OwenSampler() = default;

//
inline uint32_t sample(uint32_t dim, uint32_t sampleNum)
struct SDimensionSampler final : public core::Unmovable
{
if (dim>lastDim)
resetDimensionCounter(dim);
else if (dim<lastDim)
assert(false);

uint32_t oldsample = SequenceSampler::sample(dim,sampleNum);
#ifdef _NBL_DEBUG
assert(sampleNum<MAX_SAMPLES);
if (sampleNum)
assert((oldsample&(0x7fffffffu>>hlsl::findMSB(sampleNum))) == 0u);
else
assert(oldsample == 0u);
#endif
constexpr uint32_t lastLevelStart = MAX_SAMPLES/2u-1u;
uint32_t index = oldsample>>(OUT_BITS+1u - MAX_SAMPLES_LOG2);
index += lastLevelStart;
inline uint32_t sample(uint32_t sampleNum) const
{
const uint32_t oldsample = sampler.sample(dimension,sampleNum);
#ifdef _NBL_DEBUG
assert(sampleNum<MAX_SAMPLES);
if (sampleNum)
assert((oldsample&(0x7fffffffu>>hlsl::findMSB(sampleNum))) == 0u);
else
assert(oldsample == 0u);
#endif
constexpr uint32_t lastLevelStart = MAX_SAMPLES/2u-1u;
uint32_t index = oldsample>>(OUT_BITS+1u - MAX_SAMPLES_LOG2);
index += lastLevelStart;

return oldsample^cachedFlip[index];
}
return oldsample^cachedFlip[index];
}

//!
inline void resetDimensionCounter(uint32_t dimension)
{
/** NOTES:
- For 64k samples, we can store their positions in uint16_t
- The last leves of Owen Tree can be collapsed to a single node (because trailing bits are always 00000.....)
- The above can be stored in 1x array of sample count uint16_t/uint32_t per Dimension
- We should store samples as uint32_t always because the total amount of memory to fetch is always the same
**/
for (uint32_t i=0u; i<MAX_SAMPLES-1u; i++)
{
uint32_t randMask = (i<(MAX_SAMPLES/2u-1u)) ? 0x80000000u:0xffffffffu;
cachedFlip[i] = mersenneTwister()&(randMask>>getTreeDepth(i));
}
for (uint32_t i=1u; i<MAX_SAMPLES_LOG2; i++)
{
uint32_t previousLevelStart = (0x1u<<(i-1u))-1u;
uint32_t currentLevelStart = (0x1u<<i)-1u;
uint32_t currentLevelSize = 0x1u<<i;
for (uint32_t j=0u; j<currentLevelSize; j++)
cachedFlip[currentLevelStart+j] |= cachedFlip[previousLevelStart+(j>>1u)];
#ifdef _NBL_DEBUG
for (uint32_t j=0u; j<currentLevelSize; j+=2)
private:
friend class OwenSampler;
inline SDimensionSampler(const SequenceSampler& _sampler, const uint32_t seed, const uint32_t _dimension) : sampler(_sampler), dimension(_dimension),
mersenneTwister(std::hash<uint64_t>()((uint64_t(_dimension)<<32)|seed))
{
const uint32_t highBitMask = 0xffffffffu<<(OUT_BITS-i);
uint32_t left = cachedFlip[currentLevelStart+j];
uint32_t right = cachedFlip[currentLevelStart+j+1];
assert(((left^right)&highBitMask)==0u);
assert((left&right&highBitMask)==cachedFlip[previousLevelStart+(j>>1u)]);
cachedFlip.resize(MAX_SAMPLES-1u);
/** NOTES:
- For 64k samples, we can store their positions in uint16_t
- The last leves of Owen Tree can be collapsed to a single node (because trailing bits are always 00000.....)
- The above can be stored in 1x array of sample count uint16_t/uint32_t per Dimension
- We should store samples as uint32_t always because the total amount of memory to fetch is always the same
**/
for (uint32_t i=0u; i<MAX_SAMPLES-1u; i++)
{
uint32_t randMask = (i<(MAX_SAMPLES/2u-1u)) ? 0x80000000u:0xffffffffu;
cachedFlip[i] = mersenneTwister()&(randMask>>getTreeDepth(i));
}
for (uint32_t i=1u; i<MAX_SAMPLES_LOG2; i++)
{
uint32_t previousLevelStart = (0x1u<<(i-1u))-1u;
uint32_t currentLevelStart = (0x1u<<i)-1u;
uint32_t currentLevelSize = 0x1u<<i;
for (uint32_t j=0u; j<currentLevelSize; j++)
cachedFlip[currentLevelStart+j] |= cachedFlip[previousLevelStart+(j>>1u)];
#ifdef _NBL_DEBUG
for (uint32_t j=0u; j<currentLevelSize; j+=2)
{
const uint32_t highBitMask = 0xffffffffu<<(OUT_BITS-i);
uint32_t left = cachedFlip[currentLevelStart+j];
uint32_t right = cachedFlip[currentLevelStart+j+1];
assert(((left^right)&highBitMask)==0u);
assert((left&right&highBitMask)==cachedFlip[previousLevelStart+(j>>1u)]);
}
#endif
}
}
inline uint32_t getTreeDepth(uint32_t sampleNum)
{
return hlsl::findMSB(sampleNum+1u);
}
#endif
}
lastDim = dimension;
}

protected:
// if we don't limit the sample count, then due to IEEE754 precision, we'll get duplicate sample coordinate values, ruining the net property
_NBL_STATIC_INLINE_CONSTEXPR uint32_t OUT_BITS = sizeof(uint32_t)*8u;
_NBL_STATIC_INLINE_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 24u;
_NBL_STATIC_INLINE_CONSTEXPR uint32_t MAX_SAMPLES = 0x1u<<MAX_SAMPLES_LOG2;

inline uint32_t getTreeDepth(uint32_t sampleNum)
const SequenceSampler& sampler;
std::mt19937 mersenneTwister;
core::vector<uint32_t> cachedFlip;
const uint32_t dimension;
};
inline SDimensionSampler prepareDimension(const uint64_t dim) const
{
return hlsl::findMSB(sampleNum+1u);
return SDimensionSampler(*this,seed,dim);
}

std::mt19937 mersenneTwister;
uint32_t lastDim;
core::vector<uint32_t> cachedFlip;
};
private:
uint32_t seed;
};


}
Expand Down
2 changes: 1 addition & 1 deletion include/nbl/core/sampling/SobolSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class SobolSampler
}

// Idea for optimization, do PoT samples per pass, then can precompute most of the `retval`
inline uint32_t sample(uint32_t dim, uint32_t sampleNum)
inline uint32_t sample(uint32_t dim, uint32_t sampleNum) const
{
#ifdef _DEBUG
assert(dim<dimensions);
Expand Down
13 changes: 10 additions & 3 deletions src/nbl/ext/ImGui/ImGui.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "nbl/ext/ImGui/ImGui.h"
#include "nbl/ext/ImGui/builtin/hlsl/common.hlsl"
#include "imgui/imgui.h"
#include "imgui/imgui_internal.h"
#include "imgui/misc/cpp/imgui_stdlib.h"

#ifdef NBL_EMBED_BUILTIN_RESOURCES
Expand Down Expand Up @@ -948,14 +949,20 @@ UI::UI(SCreationParameters&& creationParams, core::smart_refctd_ptr<video::IGPUG
UI::~UI()
{
// I assume somebody has not killed ImGUI context & atlas but if so then we do nothing
auto* const context = reinterpret_cast<ImGuiContext*>(m_imContextBackPointer);
ImGuiContext* const previousContext = ImGui::GetCurrentContext();
if (context && previousContext != context)
ImGui::SetCurrentContext(context);

// we must call it to unlock atlas from potential "render" state before we kill it (obvsly if its ours!)
if(m_imFontAtlasBackPointer)
if (m_imFontAtlasBackPointer && context && context->WithinFrameScope)
ImGui::EndFrame();

// context belongs to the instance, we must free it
if(m_imContextBackPointer)
ImGui::DestroyContext(reinterpret_cast<ImGuiContext*>(m_imContextBackPointer));
if (context)
ImGui::DestroyContext(context);
if (previousContext && previousContext != context)
ImGui::SetCurrentContext(previousContext);

// and if we own the atlas we must free it as well, if user passed its own at creation time then its "shared" - at this point m_imFontAtlasBackPointer is nullptr and we don't free anything
if (m_imFontAtlasBackPointer)
Expand Down
Loading