Devsh-Graphics-Programming
diff --git a/‎examples_tests‎ b/‎examples_tests‎
diff --git a/‎include/nbl/builtin/hlsl/algorithm.hlsl‎
Lines changed: 3 additions & 1 deletion b/‎include/nbl/builtin/hlsl/algorithm.hlsl‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎include/nbl/builtin/hlsl/functional.hlsl‎
Lines changed: 18 additions & 6 deletions b/‎include/nbl/builtin/hlsl/functional.hlsl‎
Lines changed: 18 additions & 6 deletions
diff --git a/‎include/nbl/builtin/hlsl/ies/sampler.hlsl‎
Lines changed: 1 addition & 1 deletion b/‎include/nbl/builtin/hlsl/ies/sampler.hlsl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/nbl/builtin/hlsl/math/functions.hlsl‎
Lines changed: 14 additions & 5 deletions b/‎include/nbl/builtin/hlsl/math/functions.hlsl‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎include/nbl/builtin/hlsl/sampling/bilinear.hlsl‎
Lines changed: 1 addition & 1 deletion b/‎include/nbl/builtin/hlsl/sampling/bilinear.hlsl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl‎
Lines changed: 1 addition & 1 deletion b/‎include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/nbl/builtin/hlsl/sampling/linear.hlsl‎
Lines changed: 9 additions & 9 deletions b/‎include/nbl/builtin/hlsl/sampling/linear.hlsl‎
Lines changed: 9 additions & 9 deletions
@@ -142,7 +142,9 @@ struct bound_t
 
     void comp_step(NBL_REF_ARG(Accessor) accessor, const uint32_t testPoint, const uint32_t rightBegin)
     {
-        if (compare(accessor[testPoint],value))
+        typename Accessor::value_type val;
+        accessor.get(testPoint, val);
+        if (compare(val,value))
             it = rightBegin;
     }
     void comp_step(NBL_REF_ARG(Accessor) accessor, const uint32_t testPoint)
 
@@ -89,12 +89,23 @@ struct reference_wrapper : enable_if_t<
         return lhs OP rhs; \
     }
 
+#define ALIAS_STD_CMP(NAME,OP) template<typename T NBL_STRUCT_CONSTRAINABLE > struct NAME { \
+    using type_t = T; \
+    \
+    bool operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \
+    { \
+        return lhs OP rhs; \
+    }
+
 
 #else // CPP
 
 #define ALIAS_STD(NAME,OP) template<typename T> struct NAME : std::NAME<T> { \
     using type_t = T;
 
+#define ALIAS_STD_CMP(NAME,OP) template<typename T> struct NAME : std::NAME<T> { \
+    using type_t = T;
+
 #endif
 
 ALIAS_STD(bit_and,&)
@@ -136,14 +147,15 @@ ALIAS_STD(divides,/)
 };
 
 
-ALIAS_STD(equal_to, ==) };
-ALIAS_STD(not_equal_to, !=) };
-ALIAS_STD(greater, >) };
-ALIAS_STD(less, <) };
-ALIAS_STD(greater_equal, >=) };
-ALIAS_STD(less_equal, <=) };
+ALIAS_STD_CMP(equal_to, ==) };
+ALIAS_STD_CMP(not_equal_to, !=) };
+ALIAS_STD_CMP(greater, >) };
+ALIAS_STD_CMP(less, <) };
+ALIAS_STD_CMP(greater_equal, >=) };
+ALIAS_STD_CMP(less_equal, <=) };
 
 #undef ALIAS_STD
+#undef ALIAS_STD_CMP
 
 // The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector<bool, N>` for vectorial types
 
 
@@ -85,7 +85,7 @@ struct CandelaSampler
         const angle_t vAngle = degrees(polar.theta);
         const angle_t hAngle = degrees(__wrapPhi(polar.phi, symmetry));
 
-#define NBL_IES_DEF_ANGLE_ACC(T, EXPR) struct T { using value_type = angle_t; accessor_t acc; value_type operator[](uint32_t idx) NBL_CONST_MEMBER_FUNC { return EXPR; } };
+#define NBL_IES_DEF_ANGLE_ACC(T, EXPR) struct T { using value_type = angle_t; accessor_t acc; value_type operator[](uint32_t idx) NBL_CONST_MEMBER_FUNC { return EXPR; } void get(uint32_t idx, NBL_REF_ARG(value_type) val) NBL_CONST_MEMBER_FUNC { val = EXPR; } };
 
         NBL_IES_DEF_ANGLE_ACC(VAcc, acc.vAngle(idx))
         NBL_IES_DEF_ANGLE_ACC(HAcc, acc.hAngle(idx))
 
@@ -93,13 +93,22 @@ scalar_type_t<T> lpNorm(NBL_CONST_REF_ARG(T) v)
 
 
 // valid only for `theta` in [-PI,PI]
-template <typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T>)
+// UseRealSinCos=true  -> back-to-back sin + cos. Saturates the special-function pipeline, enables vendor sincos fusion, full precision near multiples of pi.
+// UseRealSinCos=false -> cos + sqrt(1-c*c) with sign recovered from theta. Saves one special-function op when cos alone is cheaper than sin+cos, but suffers catastrophic cancellation as |c| -> 1.
+template <typename T, bool UseRealSinCos = true NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T>)
 void sincos(T theta, NBL_REF_ARG(T) s, NBL_REF_ARG(T) c)
 {
-    s = sin<T>(theta);
-    c = cos<T>(theta);
-    // s = sqrt<T>(T(NBL_FP64_LITERAL(1.0))-c*c);
-    // s = ieee754::flipSign(s, theta < T(NBL_FP64_LITERAL(0.0)));
+    if (UseRealSinCos)
+    {
+        s = sin<T>(theta);
+        c = cos<T>(theta);
+    }
+    else
+    {
+        c = cos<T>(theta);
+        s = sqrt<T>(T(NBL_FP64_LITERAL(1.0))-c*c);
+        s = ieee754::flipSign(s, theta < T(NBL_FP64_LITERAL(0.0)));
+    }
 }
 
 template <typename T NBL_FUNC_REQUIRES(vector_traits<T>::Dimension == 3)
 
@@ -65,7 +65,7 @@ struct Bilinear
 
         // bilinear PDF = marginal_y_pdf * conditional_x_pdf; reuse both linear caches
         const scalar_type yPdf = lineary.forwardPdf(u.y, linearYCache);
-        cache.normalizedStart = yPdf * linearx.linearCoeffStart;
+        cache.normalizedStart = yPdf * linearx.normalizedCoeffStart;
         cache.linearXCache.diffTimesX *= yPdf;
         return p;
     }
 
@@ -93,7 +93,7 @@ struct ProjectedSphere
 	static codomain_type __generate(NBL_REF_ARG(domain_type) u)
 	{
 		vector_t3 retval = hemisphere_t::__generate(u.xy);
-		const bool chooseLower = u.z > T(0.5);
+		const bool chooseLower = u.z > scalar_type(0.5);
 		retval.z = chooseLower ? (-retval.z) : retval.z;
 		if (chooseLower)
 			u.z -= T(0.5);
 
@@ -39,11 +39,11 @@ struct Linear
         // add min to both coefficients so (0,0) input produces a valid uniform sampler
         // instead of inf normalization (2/0) leading to NaN; negligible for normal inputs
         const vector2_type safeCoeffs = linearCoeffs + vector2_type(hlsl::numeric_limits<scalar_type>::min, hlsl::numeric_limits<scalar_type>::min);
-        // normalize coefficients so that the PDF is simply linearCoeffStart + linearCoeffDiff * x
+        // normalize coefficients so that the PDF is simply normalizedCoeffStart + linearCoeffDiff * x
         const scalar_type normFactor = scalar_type(2.0) / (safeCoeffs[0] + safeCoeffs[1]);
         const vector2_type normalized = safeCoeffs * normFactor;
-        retval.linearCoeffStart = normalized[0];
-        retval.linearCoeffEnd = normalized[1];
+        retval.normalizedCoeffStart = normalized[0];
+        retval.normalizedCoeffEnd = normalized[1];
         // precompute for the stable quadratic in generate()
         retval.squaredCoeffStart = normalized[0] * normalized[0];
         retval.twoTimesDiff = scalar_type(2.0) * (normalized[1] - normalized[0]);
@@ -57,18 +57,18 @@ struct Linear
         // Quadratic (1-start)*x^2 + start*x - u = 0; since start >= 0 the stable root is
         // x = 2u / (start + sqrt(start^2 + 2*diff*u)), which never cancels.
         const scalar_type sqrtTerm = sqrt(squaredCoeffStart + twoTimesDiff * u);
-        const scalar_type denom = linearCoeffStart + sqrtTerm;
+        const scalar_type denom = normalizedCoeffStart + sqrtTerm;
         // NOTE: floating point can make x slightly > 1 when u~1 and diff < 0; callers needing
         // non-negative PDF at the boundary should clamp with min(x, 1).
         const codomain_type x = (u + u) / denom;
         // diff*x == sqrtTerm - start algebraically (conjugate identity), saves 1 mul
-        cache.diffTimesX = sqrtTerm - linearCoeffStart;
+        cache.diffTimesX = sqrtTerm - normalizedCoeffStart;
         return x;
     }
 
     density_type forwardPdf(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC
     {
-        return linearCoeffStart + cache.diffTimesX;
+        return normalizedCoeffStart + cache.diffTimesX;
     }
 
     weight_type forwardWeight(const domain_type u, const cache_type cache) NBL_CONST_MEMBER_FUNC
@@ -83,16 +83,16 @@ struct Linear
     density_type backwardPdf(const codomain_type x) NBL_CONST_MEMBER_FUNC
     {
         assert(x >= scalar_type(0.0) && x <= scalar_type(1.0));
-        return hlsl::mix(linearCoeffStart, linearCoeffEnd, x);
+        return hlsl::mix(normalizedCoeffStart, normalizedCoeffEnd, x);
     }
 
     weight_type backwardWeight(const codomain_type x) NBL_CONST_MEMBER_FUNC
     {
         return backwardPdf(x);
     }
 
-    scalar_type linearCoeffStart;
-    scalar_type linearCoeffEnd;
+    scalar_type normalizedCoeffStart;
+    scalar_type normalizedCoeffEnd;
     scalar_type squaredCoeffStart;
     scalar_type twoTimesDiff;
 };
Original file line number	Diff line number	Diff line change
`@@ -142,7 +142,9 @@ struct bound_t`
`142`	`142`
`143`	`143`	`void comp_step(NBL_REF_ARG(Accessor) accessor, const uint32_t testPoint, const uint32_t rightBegin)`
`144`	`144`	`{`
`145`		`- if (compare(accessor[testPoint],value))`
	`145`	`+ typename Accessor::value_type val;`
	`146`	`+ accessor.get(testPoint, val);`
	`147`	`+ if (compare(val,value))`
`146`	`148`	`it = rightBegin;`
`147`	`149`	`}`
`148`	`150`	`void comp_step(NBL_REF_ARG(Accessor) accessor, const uint32_t testPoint)`
Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,7 @@ struct Bilinear`
`65`	`65`
`66`	`66`	`// bilinear PDF = marginal_y_pdf * conditional_x_pdf; reuse both linear caches`
`67`	`67`	`const scalar_type yPdf = lineary.forwardPdf(u.y, linearYCache);`
`68`		`- cache.normalizedStart = yPdf * linearx.linearCoeffStart;`
	`68`	`+ cache.normalizedStart = yPdf * linearx.normalizedCoeffStart;`
`69`	`69`	`cache.linearXCache.diffTimesX *= yPdf;`
`70`	`70`	`return p;`
`71`	`71`	`}`
Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,7 @@ struct ProjectedSphere`
`93`	`93`	`static codomain_type __generate(NBL_REF_ARG(domain_type) u)`
`94`	`94`	`{`
`95`	`95`	`vector_t3 retval = hemisphere_t::__generate(u.xy);`
`96`		`- const bool chooseLower = u.z > T(0.5);`
	`96`	`+ const bool chooseLower = u.z > scalar_type(0.5);`
`97`	`97`	`retval.z = chooseLower ? (-retval.z) : retval.z;`
`98`	`98`	`if (chooseLower)`
`99`	`99`	`u.z -= T(0.5);`