Add bitwise and shift ops for min precision types

alsepkow · Copilot · alsepkow · commit e8d9169d4433 · 2026-03-18T18:33:26.000-07:00
Add LeftShift and RightShift test entries for min16int and min16uint.
Both produce valid min-precision DXIL (shl/ashr/lshr i16 with 4-bit
shift masking).

ReverseBits, CountBits, FirstBitHigh, FirstBitLow are excluded — DXC
promotes min precision to i32 before calling these DXIL intrinsics,
so they don't actually test min precision behavior.

Infrastructure changes:
- LongVectorTestData.h: Add Bitwise and BitShiftRhs input sets for
  HLSLMin16Int_t and HLSLMin16Uint_t matching int16_t/uint16_t names.
  Values constrained to 16-bit safe range.
- LongVectorTestData.h: Add compound assignment operators (&lt;&lt;=, &gt;&gt;=,
  |=, &amp;=, ^=) and unary ~ to both wrapper types to resolve ambiguity
  with integer promotion in template functions.
- LongVectorTestData.h: Specialize std::is_signed for wrapper types
  so FirstBitHigh SFINAE selects the correct signed/unsigned variant.
- LongVectors.cpp: Fix ReverseBits, ScanFromMSB, FirstBitLow to use
  explicit static_cast&lt;T&gt; for integer literals, avoiding ambiguous
  operator overload resolution with wrapper types.

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
@@ -334,6 +334,27 @@ struct HLSLMin16Int_t {
   HLSLMin16Int_t operator>>(const HLSLMin16Int_t &O) const {
     return HLSLMin16Int_t(Val >> O.Val);
   }
+  HLSLMin16Int_t operator~() const { return HLSLMin16Int_t(~Val); }
+  HLSLMin16Int_t &operator<<=(const HLSLMin16Int_t &O) {
+    Val <<= O.Val;
+    return *this;
+  }
+  HLSLMin16Int_t &operator>>=(const HLSLMin16Int_t &O) {
+    Val >>= O.Val;
+    return *this;
+  }
+  HLSLMin16Int_t &operator|=(const HLSLMin16Int_t &O) {
+    Val |= O.Val;
+    return *this;
+  }
+  HLSLMin16Int_t &operator&=(const HLSLMin16Int_t &O) {
+    Val &= O.Val;
+    return *this;
+  }
+  HLSLMin16Int_t &operator^=(const HLSLMin16Int_t &O) {
+    Val ^= O.Val;
+    return *this;
+  }
   HLSLMin16Int_t operator&&(const HLSLMin16Int_t &O) const {
     return HLSLMin16Int_t(Val && O.Val);
   }
@@ -399,6 +420,27 @@ struct HLSLMin16Uint_t {
   HLSLMin16Uint_t operator>>(const HLSLMin16Uint_t &O) const {
     return HLSLMin16Uint_t(Val >> O.Val);
   }
+  HLSLMin16Uint_t operator~() const { return HLSLMin16Uint_t(~Val); }
+  HLSLMin16Uint_t &operator<<=(const HLSLMin16Uint_t &O) {
+    Val <<= O.Val;
+    return *this;
+  }
+  HLSLMin16Uint_t &operator>>=(const HLSLMin16Uint_t &O) {
+    Val >>= O.Val;
+    return *this;
+  }
+  HLSLMin16Uint_t &operator|=(const HLSLMin16Uint_t &O) {
+    Val |= O.Val;
+    return *this;
+  }
+  HLSLMin16Uint_t &operator&=(const HLSLMin16Uint_t &O) {
+    Val &= O.Val;
+    return *this;
+  }
+  HLSLMin16Uint_t &operator^=(const HLSLMin16Uint_t &O) {
+    Val ^= O.Val;
+    return *this;
+  }
 
   bool operator&&(const HLSLMin16Uint_t &O) const { return Val && O.Val; }
   bool operator||(const HLSLMin16Uint_t &O) const { return Val || O.Val; }
@@ -415,6 +457,7 @@ struct HLSLMin16Uint_t {
 
   uint32_t Val;
 };
+
 enum class InputSet {
 #define INPUT_SET(SYMBOL) SYMBOL,
 #include "LongVectorOps.def"
@@ -656,6 +699,7 @@ BEGIN_INPUT_SETS(HLSLMin16Int_t)
 INPUT_SET(InputSet::Default1, -6, 1, 7, 3, 8, 4, -3, 8, 8, -2);
 INPUT_SET(InputSet::Default2, 5, -6, -3, -2, 9, 3, 1, -3, -7, 2);
 INPUT_SET(InputSet::Default3, -5, 6, 3, 2, -9, -3, -1, 3, 7, -2);
+INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 11, 11, 14);
 INPUT_SET(InputSet::Zero, 0);
 INPUT_SET(InputSet::NoZero, 1);
 INPUT_SET(InputSet::SelectCond, 0, 1);
@@ -671,6 +715,7 @@ INPUT_SET(InputSet::Default1, 3, 199, 3, 200, 5, 10, 22, 8, 9, 10);
 INPUT_SET(InputSet::Default2, 2, 111, 3, 4, 5, 9, 21, 8, 9, 10);
 INPUT_SET(InputSet::Default3, 4, 112, 4, 5, 3, 7, 21, 1, 11, 9);
 INPUT_SET(InputSet::Zero, 0);
+INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 11, 12, 12, 12);
 INPUT_SET(InputSet::SelectCond, 0, 1);
 INPUT_SET(InputSet::AllOnes, 1);
 INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF,
diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -743,12 +743,12 @@ template <typename T> T Saturate(T A) {
 }
 
 template <typename T> T ReverseBits(T A) {
-  T Result = 0;
+  T Result = static_cast<T>(0);
   const size_t NumBits = sizeof(T) * 8;
   for (size_t I = 0; I < NumBits; I++) {
-    Result <<= 1;
-    Result |= (A & 1);
-    A >>= 1;
+    Result <<= static_cast<T>(1);
+    Result |= (A & static_cast<T>(1));
+    A >>= static_cast<T>(1);
   }
   return Result;
 }
@@ -760,12 +760,13 @@ template <typename T> uint32_t CountBits(T A) {
 // General purpose bit scan from the MSB. Based on the value of LookingForZero
 // returns the index of the first high/low bit found.
 template <typename T> uint32_t ScanFromMSB(T A, bool LookingForZero) {
-  if (A == 0)
+  if (A == static_cast<T>(0))
     return std::numeric_limits<uint32_t>::max();
 
   constexpr uint32_t NumBits = sizeof(T) * 8;
   for (int32_t I = NumBits - 1; I >= 0; --I) {
-    bool BitSet = (A & (static_cast<T>(1) << I)) != 0;
+    bool BitSet =
+        (A & (static_cast<T>(1) << static_cast<T>(I))) != static_cast<T>(0);
     if (BitSet != LookingForZero)
       return static_cast<uint32_t>(I);
   }
@@ -788,11 +789,11 @@ FirstBitHigh(T A) {
 template <typename T> uint32_t FirstBitLow(T A) {
   const uint32_t NumBits = sizeof(T) * 8;
 
-  if (A == 0)
+  if (A == static_cast<T>(0))
     return std::numeric_limits<uint32_t>::max();
 
   for (uint32_t I = 0; I < NumBits; ++I) {
-    if (A & (static_cast<T>(1) << I))
+    if (A & (static_cast<T>(1) << static_cast<T>(I)))
       return static_cast<T>(I);
   }
 
@@ -1888,8 +1889,8 @@ void dispatchMinPrecisionTest(ID3D12Device *D3DDevice, bool VerboseLogging,
   constexpr const Operation &Operation = getOperation(OP);
   Op<OP, T, Operation.Arity> Op;
 
-  // Min precision buffer storage width is implementation-defined, so we use
-  // full-precision types for Load/Store via BUFFER_TYPE/BUFFER_OUT_TYPE defines.
+  // Min precision buffer storage width is implementation-defined, so we
+  // use full-precision types for buffer I/O via BUFFER_TYPE/BUFFER_OUT_TYPE.
   for (size_t VectorSize : InputVectorSizes) {
     std::vector<std::vector<T>> Inputs =
         buildTestInputs<T>(VectorSize, Operation.InputSets, Operation.Arity);
@@ -1919,14 +1920,13 @@ void dispatchMinPrecisionWaveOpTest(ID3D12Device *D3DDevice,
   constexpr const Operation &Operation = getOperation(OP);
   Op<OP, T, Operation.Arity> Op;
 
-  // Min precision buffer storage width is implementation-defined, so we use
-  // full-precision types for Load/Store via BUFFER_TYPE/BUFFER_OUT_TYPE defines.
+  // Min precision buffer storage width is implementation-defined, so we
+  // use full-precision types for buffer I/O via BUFFER_TYPE/BUFFER_OUT_TYPE.
   for (size_t VectorSize : InputVectorSizes) {
     std::vector<std::vector<T>> Inputs =
         buildTestInputs<T>(VectorSize, Operation.InputSets, Operation.Arity);
 
-    auto Expected =
-        ExpectedBuilder<OP, T>::buildExpected(Op, Inputs, WaveSize);
+    auto Expected = ExpectedBuilder<OP, T>::buildExpected(Op, Inputs, WaveSize);
 
     using OutT = typename decltype(Expected)::value_type;
 
@@ -3015,11 +3015,15 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon {
   HLK_MIN_PRECISION_TEST(Min, HLSLMin16Int_t);
   HLK_MIN_PRECISION_TEST(Max, HLSLMin16Int_t);
 
-  // Bitwise (logical and shift — bit-manipulation excluded)
+  // Bitwise
   HLK_MIN_PRECISION_TEST(And, HLSLMin16Int_t);
   HLK_MIN_PRECISION_TEST(Or, HLSLMin16Int_t);
   HLK_MIN_PRECISION_TEST(Xor, HLSLMin16Int_t);
-
+  HLK_MIN_PRECISION_TEST(LeftShift, HLSLMin16Int_t);
+  HLK_MIN_PRECISION_TEST(RightShift, HLSLMin16Int_t);
+  // Note: ReverseBits, CountBits, FirstBitHigh, FirstBitLow excluded -
+  // DXC promotes min precision to i32 before these intrinsics, so they
+  // don't operate at min precision.
 
   // UnaryMath
   HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Int_t);
@@ -3111,11 +3115,15 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon {
   HLK_MIN_PRECISION_TEST(Min, HLSLMin16Uint_t);
   HLK_MIN_PRECISION_TEST(Max, HLSLMin16Uint_t);
 
-  // Bitwise (logical and shift — bit-manipulation excluded)
+  // Bitwise
   HLK_MIN_PRECISION_TEST(And, HLSLMin16Uint_t);
   HLK_MIN_PRECISION_TEST(Or, HLSLMin16Uint_t);
   HLK_MIN_PRECISION_TEST(Xor, HLSLMin16Uint_t);
-
+  HLK_MIN_PRECISION_TEST(LeftShift, HLSLMin16Uint_t);
+  HLK_MIN_PRECISION_TEST(RightShift, HLSLMin16Uint_t);
+  // Note: ReverseBits, CountBits, FirstBitHigh, FirstBitLow excluded -
+  // DXC promotes min precision to i32 before these intrinsics, so they
+  // don't operate at min precision.
 
   // UnaryMath
   HLK_MIN_PRECISION_TEST(Abs, HLSLMin16Uint_t);