Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples_tests
Submodule examples_tests updated 67 files
+61 −56 31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl
+0 −1 31_HLSLPathTracer/app_resources/hlsl/scene_rectangle_light.hlsl
+169 −34 37_HLSLSamplingTests/CMakeLists.txt
+77 −27 37_HLSLSamplingTests/app_resources/common/alias_table.hlsl
+0 −1 37_HLSLSamplingTests/app_resources/common/array_accessor.hlsl
+6 −0 37_HLSLSamplingTests/app_resources/common/bilinear.hlsl
+3 −0 37_HLSLSamplingTests/app_resources/common/box_muller_transform.hlsl
+11 −1 37_HLSLSamplingTests/app_resources/common/concentric_mapping.hlsl
+2 −0 37_HLSLSamplingTests/app_resources/common/cumulative_probability.hlsl
+8 −9 37_HLSLSamplingTests/app_resources/common/discrete_sampler_bench.hlsl
+264 −0 37_HLSLSamplingTests/app_resources/common/jacobian_test.hlsl
+3 −0 37_HLSLSamplingTests/app_resources/common/linear.hlsl
+19 −1 37_HLSLSamplingTests/app_resources/common/polar_mapping.hlsl
+7 −1 37_HLSLSamplingTests/app_resources/common/projected_hemisphere.hlsl
+3 −0 37_HLSLSamplingTests/app_resources/common/projected_sphere.hlsl
+21 −23 37_HLSLSamplingTests/app_resources/common/projected_spherical_rectangle.hlsl
+13 −8 37_HLSLSamplingTests/app_resources/common/projected_spherical_triangle.hlsl
+12 −4 37_HLSLSamplingTests/app_resources/common/spherical_rectangle.hlsl
+11 −4 37_HLSLSamplingTests/app_resources/common/spherical_triangle.hlsl
+7 −1 37_HLSLSamplingTests/app_resources/common/uniform_hemisphere.hlsl
+8 −1 37_HLSLSamplingTests/app_resources/common/uniform_sphere.hlsl
+0 −77 37_HLSLSamplingTests/app_resources/shaders/alias_table_test.comp.hlsl
+17 −13 37_HLSLSamplingTests/app_resources/shaders/bilinear_test.comp.hlsl
+17 −13 37_HLSLSamplingTests/app_resources/shaders/box_muller_transform_test.comp.hlsl
+13 −9 37_HLSLSamplingTests/app_resources/shaders/concentric_mapping_test.comp.hlsl
+11 −10 37_HLSLSamplingTests/app_resources/shaders/cumulative_probability_test.comp.hlsl
+17 −13 37_HLSLSamplingTests/app_resources/shaders/linear_test.comp.hlsl
+114 −0 37_HLSLSamplingTests/app_resources/shaders/packed_alias_test.comp.hlsl
+13 −9 37_HLSLSamplingTests/app_resources/shaders/polar_mapping_test.comp.hlsl
+13 −9 37_HLSLSamplingTests/app_resources/shaders/projected_hemisphere_test.comp.hlsl
+13 −9 37_HLSLSamplingTests/app_resources/shaders/projected_sphere_test.comp.hlsl
+42 −16 37_HLSLSamplingTests/app_resources/shaders/projected_spherical_rectangle_test.comp.hlsl
+30 −13 37_HLSLSamplingTests/app_resources/shaders/projected_spherical_triangle_test.comp.hlsl
+89 −17 37_HLSLSamplingTests/app_resources/shaders/spherical_rectangle_test.comp.hlsl
+29 −13 37_HLSLSamplingTests/app_resources/shaders/spherical_triangle.comp.hlsl
+63 −5 37_HLSLSamplingTests/app_resources/shaders/test_compile.comp.hlsl
+13 −9 37_HLSLSamplingTests/app_resources/shaders/uniform_hemisphere_test.comp.hlsl
+13 −9 37_HLSLSamplingTests/app_resources/shaders/uniform_sphere_test.comp.hlsl
+277 −254 37_HLSLSamplingTests/benchmarks/CDiscreteSamplerBenchmark.h
+52 −74 37_HLSLSamplingTests/benchmarks/CSamplerBenchmark.h
+148 −75 37_HLSLSamplingTests/main.cpp
+36 −15 37_HLSLSamplingTests/tests/CAliasTableGPUTester.h
+4 −3 37_HLSLSamplingTests/tests/CBilinearTester.h
+2 −1 37_HLSLSamplingTests/tests/CBoxMullerTransformTester.h
+3 −2 37_HLSLSamplingTests/tests/CConcentricMappingTester.h
+2 −1 37_HLSLSamplingTests/tests/CCumulativeProbabilityGPUTester.h
+371 −237 37_HLSLSamplingTests/tests/CDiscreteTableTester.h
+6 −5 37_HLSLSamplingTests/tests/CLinearTester.h
+3 −2 37_HLSLSamplingTests/tests/CPolarMappingTester.h
+5 −4 37_HLSLSamplingTests/tests/CProjectedHemisphereTester.h
+4 −3 37_HLSLSamplingTests/tests/CProjectedSphereTester.h
+36 −47 37_HLSLSamplingTests/tests/CProjectedSphericalRectangleTester.h
+18 −14 37_HLSLSamplingTests/tests/CProjectedSphericalTriangleTester.h
+20 −15 37_HLSLSamplingTests/tests/CSphericalRectangleTester.h
+9 −6 37_HLSLSamplingTests/tests/CSphericalTriangleTester.h
+3 −2 37_HLSLSamplingTests/tests/CUniformHemisphereTester.h
+3 −2 37_HLSLSamplingTests/tests/CUniformSphereTester.h
+328 −154 37_HLSLSamplingTests/tests/SamplerTestHelpers.h
+148 −72 37_HLSLSamplingTests/tests/property/CSamplerPropertyTester.h
+8 −10 40_PathTracer/app_resources/pathtrace/beauty.hlsl
+13 −4 40_PathTracer/app_resources/pathtrace/common.hlsl
+32 −91 64_EmulatedFloatTest/main.cpp
+95 −0 75_SobolBenchmark/CMakeLists.txt
+199 −0 75_SobolBenchmark/app_resources/shaders/sobol_bench.comp.hlsl
+264 −0 75_SobolBenchmark/benchmarks/CSobolBenchmark.h
+274 −0 75_SobolBenchmark/main.cpp
+1 −0 CMakeLists.txt
4 changes: 3 additions & 1 deletion include/nbl/builtin/hlsl/algorithm.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ struct bound_t

void comp_step(NBL_REF_ARG(Accessor) accessor, const uint32_t testPoint, const uint32_t rightBegin)
{
if (compare(accessor[testPoint],value))
typename Accessor::value_type val;
accessor.get(testPoint, val);
if (compare(val,value))
it = rightBegin;
}
void comp_step(NBL_REF_ARG(Accessor) accessor, const uint32_t testPoint)
Expand Down
24 changes: 18 additions & 6 deletions include/nbl/builtin/hlsl/functional.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,23 @@ struct reference_wrapper : enable_if_t<
return lhs OP rhs; \
}

#define ALIAS_STD_CMP(NAME,OP) template<typename T NBL_STRUCT_CONSTRAINABLE > struct NAME { \
using type_t = T; \
\
bool operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \
{ \
return lhs OP rhs; \
}


#else // CPP

#define ALIAS_STD(NAME,OP) template<typename T> struct NAME : std::NAME<T> { \
using type_t = T;

#define ALIAS_STD_CMP(NAME,OP) template<typename T> struct NAME : std::NAME<T> { \
using type_t = T;

#endif

ALIAS_STD(bit_and,&)
Expand Down Expand Up @@ -136,14 +147,15 @@ ALIAS_STD(divides,/)
};


ALIAS_STD(equal_to, ==) };
ALIAS_STD(not_equal_to, !=) };
ALIAS_STD(greater, >) };
ALIAS_STD(less, <) };
ALIAS_STD(greater_equal, >=) };
ALIAS_STD(less_equal, <=) };
ALIAS_STD_CMP(equal_to, ==) };
ALIAS_STD_CMP(not_equal_to, !=) };
ALIAS_STD_CMP(greater, >) };
ALIAS_STD_CMP(less, <) };
ALIAS_STD_CMP(greater_equal, >=) };
ALIAS_STD_CMP(less_equal, <=) };

#undef ALIAS_STD
#undef ALIAS_STD_CMP

// The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector<bool, N>` for vectorial types

Expand Down
2 changes: 1 addition & 1 deletion include/nbl/builtin/hlsl/ies/sampler.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct CandelaSampler
const angle_t vAngle = degrees(polar.theta);
const angle_t hAngle = degrees(__wrapPhi(polar.phi, symmetry));

#define NBL_IES_DEF_ANGLE_ACC(T, EXPR) struct T { using value_type = angle_t; accessor_t acc; value_type operator[](uint32_t idx) NBL_CONST_MEMBER_FUNC { return EXPR; } };
#define NBL_IES_DEF_ANGLE_ACC(T, EXPR) struct T { using value_type = angle_t; accessor_t acc; value_type operator[](uint32_t idx) NBL_CONST_MEMBER_FUNC { return EXPR; } void get(uint32_t idx, NBL_REF_ARG(value_type) val) NBL_CONST_MEMBER_FUNC { val = EXPR; } };

NBL_IES_DEF_ANGLE_ACC(VAcc, acc.vAngle(idx))
NBL_IES_DEF_ANGLE_ACC(HAcc, acc.hAngle(idx))
Expand Down
19 changes: 14 additions & 5 deletions include/nbl/builtin/hlsl/math/functions.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,22 @@ scalar_type_t<T> lpNorm(NBL_CONST_REF_ARG(T) v)


// valid only for `theta` in [-PI,PI]
template <typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T>)
// UseRealSinCos=true -> back-to-back sin + cos. Saturates the special-function pipeline, enables vendor sincos fusion, full precision near multiples of pi.
// UseRealSinCos=false -> cos + sqrt(1-c*c) with sign recovered from theta. Saves one special-function op when cos alone is cheaper than sin+cos, but suffers catastrophic cancellation as |c| -> 1.
template <typename T, bool UseRealSinCos = true NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T>)
void sincos(T theta, NBL_REF_ARG(T) s, NBL_REF_ARG(T) c)
{
s = sin<T>(theta);
c = cos<T>(theta);
// s = sqrt<T>(T(NBL_FP64_LITERAL(1.0))-c*c);
// s = ieee754::flipSign(s, theta < T(NBL_FP64_LITERAL(0.0)));
if (UseRealSinCos)
{
s = sin<T>(theta);
c = cos<T>(theta);
}
else
{
c = cos<T>(theta);
s = sqrt<T>(T(NBL_FP64_LITERAL(1.0))-c*c);
s = ieee754::flipSign(s, theta < T(NBL_FP64_LITERAL(0.0)));
}
}

template <typename T NBL_FUNC_REQUIRES(vector_traits<T>::Dimension == 3)
Expand Down
201 changes: 152 additions & 49 deletions include/nbl/builtin/hlsl/sampling/alias_table.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <nbl/builtin/hlsl/cpp_compat.hlsl>
#include <nbl/builtin/hlsl/bit.hlsl>
#include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
#include <nbl/builtin/hlsl/concepts/core.hlsl>
#include <nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl>

Expand All @@ -17,84 +18,187 @@ namespace hlsl
namespace sampling
{

// Alias Method (Vose/Walker) discrete sampler.
//
// Samples a discrete index in [0, N) with probability proportional to
// precomputed weights in O(1) time per sample, using a prebuilt alias table.
//
// Accessor template parameters must satisfy GenericReadAccessor:
// accessor.template get<V, I>(index, outVal) // void, writes to outVal
//
// - ProbabilityAccessor: reads scalar_type threshold in [0, 1] for bin i
// - AliasIndexAccessor: reads uint32_t redirect index for bin i
// - PdfAccessor: reads scalar_type weight[i] / totalWeight
//
// Satisfies TractableSampler (not BackwardTractableSampler: the mapping is discrete).
// The cache stores the PDF value looked up during generate, avoiding redundant
// storage of the codomain (sampled index) which is already the return value.
template<typename T, typename Domain, typename Codomain, typename ProbabilityAccessor, typename AliasIndexAccessor, typename PdfAccessor
// Packed alias-entry bit layout shared by every packed variant. One 32-bit
// word holds the redirect index in the low Log2N bits and the stay-
// probability quantized as an unorm in the high (32 - Log2N) bits.
// u * N = scaled; bin = floor(scaled); remainder = scaled - bin
// if (remainder < getStayProb(word)) -> result = bin
// else -> result = getTarget(word)
// Quantizing the threshold to (32 - Log2N) bits is precision-neutral: `u`
// already consumed Log2N bits of randomness producing `bin`, so `remainder`
// carries exactly that many bits of discriminatory power.
namespace impl
{
template<uint32_t Log2N>
struct AliasBitDecoder
{
static uint32_t getTarget(uint32_t word)
{
return word & ((1u << Log2N) - 1u);
}
template<typename T>
static T getStayProb(uint32_t word)
{
const uint32_t unormMax = (~0u) >> Log2N;
return T(word >> Log2N) / T(unormMax);
}
};
} // namespace impl

// 8 B entry used by the NBig == true variant. Embeds the bin's own pdf
// alongside the packed word so the common stay-case needs no extra tap.
template<typename T>
struct PackedAliasEntryB
{
uint32_t packedWord; // low Log2N: redirect target; high 32-Log2N: stayProb unorm
T ownPdf; // pdf of this bin
};


// NBig == false: 4 B packed word per bin + separate pdf[] array. Per sample
// = one 4 B word load + one unconditional 4 B pdf[] tap indexed by the
// selected bin (either the current bin or its redirect). Total 8 B whether
// the sample stays or aliases. Favours small N.
template<typename T, typename Domain, typename Codomain, typename PackedWordAccessor, typename PdfAccessor, uint32_t Log2N
NBL_PRIMARY_REQUIRES(
concepts::UnsignedIntegralScalar<Codomain> &&
concepts::accessors::GenericReadAccessor<ProbabilityAccessor, T, Codomain> &&
concepts::accessors::GenericReadAccessor<AliasIndexAccessor, Codomain, Codomain> &&
concepts::accessors::GenericReadAccessor<PackedWordAccessor, uint32_t, Codomain> &&
concepts::accessors::GenericReadAccessor<PdfAccessor, T, Codomain>)
struct AliasTable
struct PackedAliasTableA
{
using scalar_type = T;

using domain_type = Domain;
using codomain_type = Codomain;
using density_type = scalar_type;
using weight_type = density_type;
using decoder = impl::AliasBitDecoder<Log2N>;
NBL_CONSTEXPR_STATIC_INLINE bool NBig = false;

struct cache_type
{
density_type pdf;
};

static AliasTable create(NBL_CONST_REF_ARG(ProbabilityAccessor) _probAccessor, NBL_CONST_REF_ARG(AliasIndexAccessor) _aliasAccessor, NBL_CONST_REF_ARG(PdfAccessor) _pdfAccessor, codomain_type _size)
static PackedAliasTableA create(NBL_CONST_REF_ARG(PackedWordAccessor) _entryAcc, NBL_CONST_REF_ARG(PdfAccessor) _pdfAcc, codomain_type _size)
{
AliasTable retval;
retval.probAccessor = _probAccessor;
retval.aliasAccessor = _aliasAccessor;
retval.pdfAccessor = _pdfAccessor;
// Precompute tableSize as float minus 1 ULP so that u=1.0 maps to bin N-1
PackedAliasTableA retval;
retval.entryAcc = _entryAcc;
retval.pdfAcc = _pdfAcc;
const scalar_type exact = scalar_type(_size);
retval.tableSizeMinusUlp = nbl::hlsl::bit_cast<scalar_type>(nbl::hlsl::bit_cast<uint32_t>(exact) - 1u);
return retval;
}

// BasicSampler interface
codomain_type generate(const domain_type u) NBL_CONST_MEMBER_FUNC
{
const scalar_type scaled = u * tableSizeMinusUlp;
const codomain_type bin = _static_cast<codomain_type>(scaled);
const scalar_type remainder = scaled - scalar_type(bin);

scalar_type prob;
probAccessor.template get<scalar_type, codomain_type>(bin, prob);

// Use if-statement to avoid select: aliasIndex is a dependent read
codomain_type result;
if (remainder < prob)
{
result = bin;
}
else
{
codomain_type alias;
aliasAccessor.template get<codomain_type, codomain_type>(bin, alias);
result = alias;
}
uint32_t packedWord;
entryAcc.template get<uint32_t, codomain_type>(bin, packedWord);
return hlsl::select(remainder < decoder::template getStayProb<scalar_type>(packedWord), bin, codomain_type(decoder::getTarget(packedWord)));
}

codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC
{
const codomain_type result = generate(u);
pdfAcc.template get<scalar_type, codomain_type>(result, cache.pdf);
return result;
}

// TractableSampler interface
density_type forwardPdf(const domain_type u, NBL_CONST_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC
{
return cache.pdf;
}

weight_type forwardWeight(const domain_type u, NBL_CONST_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC
{
return cache.pdf;
}

density_type backwardPdf(const codomain_type v) NBL_CONST_MEMBER_FUNC
{
scalar_type pdf;
pdfAcc.template get<scalar_type, codomain_type>(v, pdf);
return pdf;
}

weight_type backwardWeight(const codomain_type v) NBL_CONST_MEMBER_FUNC
{
return backwardPdf(v);
}

PackedWordAccessor entryAcc;
PdfAccessor pdfAcc;
scalar_type tableSizeMinusUlp;
};

// NBig == true: 8 B entry {packedWord, ownPdf} + separate pdf[] array. Per
// sample = one 8 B entry load (covers the common stay case where cache
// already has ownPdf). If the sample aliases, a conditional 4 B pdf[target]
// tap fills the cache. Total 8 B stay, 12 B aliased. Favours large N.
template<typename T, typename Domain, typename Codomain, typename EntryAccessor, typename PdfAccessor, uint32_t Log2N
NBL_PRIMARY_REQUIRES(
concepts::UnsignedIntegralScalar<Codomain> &&
concepts::accessors::GenericReadAccessor<EntryAccessor, PackedAliasEntryB<T>, Codomain> &&
concepts::accessors::GenericReadAccessor<PdfAccessor, T, Codomain>)
struct PackedAliasTableB
{
using scalar_type = T;
using domain_type = Domain;
using codomain_type = Codomain;
using density_type = scalar_type;
using weight_type = density_type;
using entry_type = PackedAliasEntryB<scalar_type>;
using decoder = impl::AliasBitDecoder<Log2N>;
NBL_CONSTEXPR_STATIC_INLINE bool NBig = true;

struct cache_type
{
density_type pdf;
};

static PackedAliasTableB create(NBL_CONST_REF_ARG(EntryAccessor) _entryAcc, NBL_CONST_REF_ARG(PdfAccessor) _pdfAcc, codomain_type _size)
{
PackedAliasTableB retval;
retval.entryAcc = _entryAcc;
retval.pdfAcc = _pdfAcc;
const scalar_type exact = scalar_type(_size);
retval.tableSizeMinusUlp = nbl::hlsl::bit_cast<scalar_type>(nbl::hlsl::bit_cast<uint32_t>(exact) - 1u);
return retval;
}

codomain_type generate(const domain_type u) NBL_CONST_MEMBER_FUNC
{
const scalar_type scaled = u * tableSizeMinusUlp;
const codomain_type bin = _static_cast<codomain_type>(scaled);
const scalar_type remainder = scaled - scalar_type(bin);

entry_type entry;
entryAcc.template get<entry_type, codomain_type>(bin, entry);
return hlsl::select(remainder < decoder::template getStayProb<scalar_type>(entry.packedWord), bin, codomain_type(decoder::getTarget(entry.packedWord)));
}

codomain_type generate(const domain_type u, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC
{
const codomain_type result = generate(u);
pdfAccessor.template get<scalar_type, codomain_type>(result, cache.pdf);
const scalar_type scaled = u * tableSizeMinusUlp;
const codomain_type bin = _static_cast<codomain_type>(scaled);
const scalar_type remainder = scaled - scalar_type(bin);

entry_type entry;
entryAcc.template get<entry_type, codomain_type>(bin, entry);

const bool stay = remainder < decoder::template getStayProb<scalar_type>(entry.packedWord);

cache.pdf = entry.ownPdf;
codomain_type result = bin;
if (!stay)
{
const codomain_type target = codomain_type(decoder::getTarget(entry.packedWord));
pdfAcc.template get<scalar_type, codomain_type>(target, cache.pdf);
result = target;
}
return result;
}

Expand All @@ -111,7 +215,7 @@ struct AliasTable
density_type backwardPdf(const codomain_type v) NBL_CONST_MEMBER_FUNC
{
scalar_type pdf;
pdfAccessor.template get<scalar_type, codomain_type>(v, pdf);
pdfAcc.template get<scalar_type, codomain_type>(v, pdf);
return pdf;
}

Expand All @@ -120,9 +224,8 @@ struct AliasTable
return backwardPdf(v);
}

ProbabilityAccessor probAccessor;
AliasIndexAccessor aliasAccessor;
PdfAccessor pdfAccessor;
EntryAccessor entryAcc;
PdfAccessor pdfAcc;
scalar_type tableSizeMinusUlp;
};

Expand Down
Loading
Loading