Code review. Fix float comparisons. Pass clamp args in a buffer

alsepkow · alsepkow · commit abd5649764cd · 2025-04-17T18:07:26.000-07:00
diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h
@@ -470,15 +470,19 @@ inline bool GetTestParamUseWARP(bool defaultVal) {
 
 #ifdef FP_SUBNORMAL
 
-inline bool isdenorm(float f) { return FP_SUBNORMAL == std::fpclassify(f); }
+template <typename T = float>
+inline bool isdenorm(T f) {
+  return FP_SUBNORMAL == std::fpclassify(f);
+}
 
 #else
 
-inline bool isdenorm(float f) {
-  return (std::numeric_limits<float>::denorm_min() <= f &&
-          f < std::numeric_limits<float>::min()) ||
-         (-std::numeric_limits<float>::min() < f &&
-          f <= -std::numeric_limits<float>::denorm_min());
+template <typename T = float>
+inline bool isdenorm(T f) {
+  return (std::numeric_limits<T>::denorm_min() <= f &&
+          f < std::numeric_limits<T>::min()) ||
+         (-std::numeric_limits<T>::min() < f &&
+          f <= -std::numeric_limits<T>::denorm_min());
 }
 
 #endif // FP_SUBNORMAL
@@ -526,6 +530,31 @@ inline bool isnanFloat16(uint16_t val) {
 uint16_t ConvertFloat32ToFloat16(float val) throw();
 float ConvertFloat16ToFloat32(uint16_t val) throw();
 
+inline bool CompareDoubleULP(
+    const double &fsrc, const double &fref, int64_t ULPTolerance,
+    hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) {
+  if (fsrc == fref) {
+    return true;
+  }
+  if (std::isnan(fsrc)) {
+    return std::isnan(fref);
+  }
+
+  if (mode == hlsl::DXIL::Float32DenormMode::Any) {
+    // If denorm expected, output can be sign preserved zero. Otherwise output
+    // should pass the regular ulp testing.
+    if (isdenorm(fref) && fsrc == 0 && std::signbit(fsrc) == std::signbit(fref))
+      return true;
+  }
+
+  // For FTZ or Preserve mode, we should get the expected number within
+  // ULPTolerance for any operations.
+  int64_t diff = *((const DWORD64 *)&fsrc) - *((const DWORD64 *)&fref);
+
+  int64_t uDiff = diff < 0 ? -diff : diff;
+  return uDiff <= (unsigned int)ULPTolerance;
+}
+
 inline bool CompareFloatULP(
     const float &fsrc, const float &fref, int ULPTolerance,
     hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) {
diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
@@ -628,7 +628,6 @@ class ExecutionTest {
   bool m_D3DInitCompleted = false;
   bool m_ExperimentalModeEnabled = false;
   bool m_AgilitySDKEnabled = false;
-  bool m_HLKModeEnabled = false; // Prevent skip logic when running HLK tests.
 
   const float ClearColor[4] = {0.0f, 0.2f, 0.4f, 1.0f};
 
@@ -672,13 +671,6 @@ class ExecutionTest {
       } else {
         LogCommentFmt(L"Debug layer enabled.");
       }
-
-      hr = WEX::TestExecution::RuntimeParameters::TryGetValue(L"HLKModeEnabled",
-                                                              m_HLKModeEnabled);
-
-      if (SUCCEEDED(hr) && m_HLKModeEnabled) {
-        LogCommentFmt(L"HLK mode enabled.");
-      }
     }
 
     return true;
@@ -11433,6 +11425,9 @@ template <typename T> struct LongVectorOpTestConfig {
   LongVectorOpTestConfig(LongVectorOpType OpType) : OpType(OpType) {
     IntrinsicString = "";
 
+    if (IsFloatingPointType())
+      Tolerance = 1;
+
     switch (OpType) {
     case LongVectorOpType_ScalarAdd:
       OperatorString = "+";
@@ -11457,6 +11452,7 @@ template <typename T> struct LongVectorOpTestConfig {
       IntrinsicString = "max";
       break;
     case LongVectorOpType_Clamp:
+      OperatorString = ",";
       IntrinsicString = "TestClamp";
       IsBinaryOp = false;
       break;
@@ -11469,6 +11465,12 @@ template <typename T> struct LongVectorOpTestConfig {
     }
   }
 
+  bool IsFloatingPointType() const {
+    return std::is_same_v<T, float> ||
+           std::is_same_v<T, double> ||
+           std::is_same_v<T, HLSLHalf_t>;
+  }
+
   // A helper to get the hlsl type as a string for a given C++ type.
   // Used in the long vector tests.
   std::string GetHLSLTypeString() {
@@ -11565,7 +11567,7 @@ template <typename T> class DeterministicNumberGenerator {
     if constexpr (std::is_same_v<T, int64_t>)
       return Int64Dist(generator);
     if constexpr (std::is_same_v<T, float>)
-    return FloatDist(generator);
+      return FloatDist(generator);
     if constexpr (std::is_same_v<T, double>)
       return DoubleDist(generator);
     if constexpr (std::is_same_v<T, uint16_t>)
@@ -11618,20 +11620,19 @@ bool DoArraysMatch(const std::array<T, N> &ActualValues,
     } else if constexpr (std::is_same_v<T, HLSLHalf_t>) {
       const DirectX::PackedVector::HALF a = ActualValues[Index].val;
       const DirectX::PackedVector::HALF b = ExpectedValues[Index].val;
-      if(!CompareHalfULP(a, b, Tolerance))
-      {
+      if (!CompareHalfULP(a, b, Tolerance)) {
         MismatchedIndexes.push_back(Index);
       }
     } else if constexpr (std::is_same_v<T, float>) {
       const int IntTolerance = static_cast<int>(Tolerance);
-      if(!CompareFloatULP(ActualValues[Index], ExpectedValues[Index], IntTolerance))
-      {
+      if (!CompareFloatULP(ActualValues[Index], ExpectedValues[Index], IntTolerance)) {
         MismatchedIndexes.push_back(Index);
       }
     } else if constexpr (std::is_same_v<T, double>) {
-      WEX::Logging::Log::Warning(L"Double comparison not implemented yet. Defaulting to simple comparison for now.");
-      if(ActualValues[Index] != ExpectedValues[Index])
+      const int64_t IntTolerance = static_cast<int64_t>(Tolerance);
+      if (!CompareDoubleULP(ActualValues[Index], ExpectedValues[Index], IntTolerance)) {
         MismatchedIndexes.push_back(Index);
+      }
     } else if (Tolerance == 0 && ActualValues[Index] != ExpectedValues[Index]) {
       MismatchedIndexes.push_back(Index);
     } else {
@@ -12164,15 +12165,16 @@ void ExecutionTest::LongVectorOpTestBase(
   if (!CreateDevice(&D3DDevice, D3D_SHADER_MODEL_6_9) &&
       !m_ExperimentalModeEnabled) {
 
-    if (m_HLKModeEnabled) {
-      LogErrorFmtThrow(
-          L"Device does not support SM 6.9. Can't run these tests.");
+    #ifdef _HLK_CONF
+    LogErrorFmtThrow(
+      L"Device does not support SM 6.9. Can't run these tests.");
     }
-
+    #else
     WEX::Logging::Log::Comment(
         "Device does not support SM 6.9. Can't run these tests.");
     WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped);
     return;
+    #endif
   }
 
   DeterministicNumberGenerator<T> NumberGenerator(1337);
@@ -12260,25 +12262,30 @@ void ExecutionTest::LongVectorOpTestBase(
   CompilerOptions << (Is16BitType ? " -enable-16bit-types" : "");
   CompilerOptions << " -DOPERATOR=";
   CompilerOptions << TestConfig.OperatorString;
-  CompilerOptions << " -DOPERAND2=";
   if (TestConfig.IsBinaryOp) {
+    CompilerOptions << " -DOPERAND2=";
     CompilerOptions << (TestConfig.IsScalarOp ? "InputScalar" : "InputVector2");
-  }
-  CompilerOptions << " -DFUNC=";
-  CompilerOptions << TestConfig.IntrinsicString;
-  switch (TestConfig.OpType) {
-  case LongVectorOpType_Clamp:
-    CompilerOptions << " -DFUNC_CLAMP=1";
-    CompilerOptions << " -DCLAMP_ARGMIN=";
-    // We need to set the precision for the float values.
-    CompilerOptions << std::setprecision(16);
-    CompilerOptions << ClampArgMin;
-    CompilerOptions << " -DCLAMP_ARGMAX=";
-    CompilerOptions << ClampArgMax;
-    break;
-  case LongVectorOpType_Initialize:
-    CompilerOptions << " -DFUNC_INITIALIZE=1";
-    break;
+
+    if(TestConfig.IsScalarOp) {
+      CompilerOptions << " -DIS_SCALAR_OP=1";
+    } else {
+      CompilerOptions << " -DIS_BINARY_VECTOR_OP=1";
+    }
+    CompilerOptions << " -DFUNC=";
+    CompilerOptions << TestConfig.IntrinsicString;
+  } else {
+    CompilerOptions << " -DFUNC=";
+    CompilerOptions << TestConfig.IntrinsicString;
+    CompilerOptions << " -DOPERAND2=";
+    switch (TestConfig.OpType) {
+    case LongVectorOpType_Clamp:
+      CompilerOptions << "ClampArgMinMax";
+      CompilerOptions << " -DFUNC_CLAMP=1";
+      break;
+    case LongVectorOpType_Initialize:
+      CompilerOptions << " -DFUNC_INITIALIZE=1";
+      break;
+    }
   }
 
   // We have to construct the string outside of the lambda. Otherwise it's
@@ -12312,10 +12319,13 @@ void ExecutionTest::LongVectorOpTestBase(
           return;
         }
 
-        // Process the callback for the InputScalar resource.
-        if (0 == _stricmp(Name, "InputScalar")) {
+        // Process the callback for the InputFuncArgs resource.
+        if (0 == _stricmp(Name, "InputFuncArgs")) {
           if (TestConfig.IsScalarOp) {
             FillShaderBufferFromLongVectorData<T, 1>(ShaderData, ScalarInput);
+          } else if (TestConfig.OpType == LongVectorOpType_Clamp) {
+            std::array<T, 2> ClampArgs ={ClampArgMin, ClampArgMax};
+            FillShaderBufferFromLongVectorData<T, 2>(ShaderData, ClampArgs);
           }
 
           return;
diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml
@@ -3753,8 +3753,8 @@ void MSMain(uint GID : SV_GroupIndex,
   <ShaderOp Name="LongVectorOp" CS="CS">
     <RootSignature>RootFlags(0), UAV(u0), UAV(u1), UAV(u2),
     UAV(u3)</RootSignature>
-    <!-- Width="8" BYTES to account for largest scalar type (64 bits)-->
-    <Resource Name="InputScalar" Dimension="BUFFER" Width="8"
+    <!-- Width="16" BYTES to account for two largest scalar types (64 bits)-->
+    <Resource Name="InputFuncArgs" Dimension="BUFFER" Width="16"
     Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
     TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
     <!-- Width="8192" BYTES to account for largest type (64 bits) and vector
@@ -3763,7 +3763,7 @@ void MSMain(uint GID : SV_GroupIndex,
     <Resource Name="InputVector2" Dimension="BUFFER" Width="8192" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
     <Resource Name="OutputVector" Dimension="BUFFER" Width="8192" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
     <RootValues>
-      <RootValue Index="0" ResName="InputScalar" />
+      <RootValue Index="0" ResName="InputFuncArgs" />
       <RootValue Index="1" ResName="InputVector1" />
       <RootValue Index="2" ResName="InputVector2" />
       <RootValue Index="3" ResName="OutputVector" />
@@ -3786,21 +3786,38 @@ void MSMain(uint GID : SV_GroupIndex,
         #endif
 
         #ifdef FUNC_CLAMP
-        vector<TYPE, NUM> TestClamp(vector<TYPE, NUM> Vector)
+        vector<TYPE, NUM> TestClamp(vector<TYPE, NUM> Vector, vector<TYPE, 2> ClampArgMinMax)
         {
-          return clamp(Vector, TYPE(CLAMP_ARGMIN), TYPE(CLAMP_ARGMAX));
+          TYPE ClampArgMin = ClampArgMinMax[0];
+          TYPE ClampArgMax = ClampArgMinMax[1];
+          return clamp(Vector, ClampArgMin, ClampArgMax);
         }
         #endif
 
-        RWByteAddressBuffer g_InputScalar : register(u0);
+        RWByteAddressBuffer g_InputFuncArgs : register(u0);
         RWByteAddressBuffer g_InputVector1 : register(u1);
         RWByteAddressBuffer g_InputVector2 : register(u2);
         RWByteAddressBuffer g_OutputVector : register(u3);
         [numthreads(1,1,1)]
         void main(uint GI : SV_GroupIndex) {
-          vector<TYPE, NUM> InputVector1 = g_InputVector1.Load< vector<TYPE, NUM> >(0);
-          vector<TYPE, NUM> InputVector2 = g_InputVector2.Load< vector<TYPE, NUM> >(0);
-          TYPE InputScalar = g_InputScalar.Load<TYPE>(0);
+
+          vector<TYPE, NUM> InputVector1 = g_InputVector1.Load< vector<TYPE,
+          NUM> >(0);
+
+          #ifdef IS_BINARY_VECTOR_OP
+          vector<TYPE, NUM> InputVector2 = g_InputVector2.Load< vector<TYPE,
+          NUM> >(0);
+          #endif
+
+          #ifdef IS_SCALAR_OP
+          TYPE InputScalar = g_InputFuncArgs.Load<TYPE>(0);
+          #endif
+
+          #ifdef FUNC_CLAMP
+          TYPE Clamp_ArgMin = g_InputFuncArgs.Load<TYPE>(0);
+          TYPE Clamp_ArgMax = g_InputFuncArgs.Load<TYPE>(sizeof(TYPE));
+          vector<TYPE, 2> ClampArgMinMax = {Clamp_ArgMin, Clamp_ArgMax};
+          #endif
 
           vector<TYPE, NUM> OutputVector = FUNC(InputVector1 OPERATOR OPERAND2);