Give Asdouble some love (i64 cast and folding) (#3666)

Greg Roth · web-flow · commit 0db66a3b671e · 2021-05-27T15:41:43.000-07:00
The type of the overload allowed i64 and refused casts. As a result, if
an i64 value was used, invalid code is produced leading to asserts or
validation errors.

Added support for folding when the params of asdouble are constants
diff --git a/lib/Analysis/DxilConstantFolding.cpp b/lib/Analysis/DxilConstantFolding.cpp
@@ -363,6 +363,20 @@ static Constant *ConstantFoldBinaryIntIntrinsic(OP::OpCode opcode, Type *Ty, Con
   return nullptr;
 }
 
+// Constant fold MakeDouble
+static Constant *ConstantFoldMakeDouble(Type *Ty, const DxilIntrinsicOperands &IntrinsicOperands) {
+  assert(IntrinsicOperands.Size() == 2);
+  ConstantInt *Op1 = IntrinsicOperands.GetConstantInt(0);
+  ConstantInt *Op2 = IntrinsicOperands.GetConstantInt(1);
+  if (!Op1 || !Op2)
+    return nullptr;
+  uint64_t C1 = Op1->getZExtValue();
+  uint64_t C2 = Op2->getZExtValue();
+  uint64_t dbits = C2 << 32 | C1;
+  double dval = *(double*)&dbits;
+  return ConstantFP::get(Ty, dval);
+}
+
 // Compute bit field extract for ibfe and ubfe.
 // The comptuation for ibfe and ubfe is the same except for the right shift,
 // which is an arithemetic shift for ibfe and logical shift for ubfe.
@@ -477,6 +491,8 @@ static Constant *ConstantFoldFPIntrinsic(OP::OpCode opcode, Type *Ty, const Dxil
   case OP::OpCodeClass::Dot3:
   case OP::OpCodeClass::Dot4:
     return ConstantFoldDot(opcode, Ty, IntrinsicOperands);
+  case OP::OpCodeClass::MakeDouble:
+    return ConstantFoldMakeDouble(Ty, IntrinsicOperands);
   }
 
   return nullptr;
@@ -589,6 +605,7 @@ bool hlsl::CanConstantFoldCallTo(const Function *F) {
     case OP::OpCodeClass::Dot2:
     case OP::OpCodeClass::Dot3:
     case OP::OpCodeClass::Dot4:
+    case OP::OpCodeClass::MakeDouble:
       return true;
     case OP::OpCodeClass::IsHelperLane: {
       const hlsl::ShaderModel *pSM =
diff --git a/tools/clang/lib/Sema/gen_intrin_main_tables_15.h b/tools/clang/lib/Sema/gen_intrin_main_tables_15.h
@@ -858,8 +858,8 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args124[] =
 static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args125[] =
 {
     {"asdouble", AR_QUAL_OUT, 0, LITEMPLATE_ANY, 0, LICOMPTYPE_DOUBLE, IA_R, IA_C},
-    {"x", AR_QUAL_IN, 0, LITEMPLATE_ANY, 1, LICOMPTYPE_UINT_ONLY, IA_R, IA_C},
-    {"y", AR_QUAL_IN, 0, LITEMPLATE_ANY, 2, LICOMPTYPE_UINT_ONLY, IA_R, IA_C},
+    {"x", AR_QUAL_IN, 0, LITEMPLATE_ANY, 1, LICOMPTYPE_UINT, IA_R, IA_C},
+    {"y", AR_QUAL_IN, 0, LITEMPLATE_ANY, 2, LICOMPTYPE_UINT, IA_R, IA_C},
 };
 
 static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args126[] =
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/cast/asdoublefold.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/cast/asdoublefold.hlsl
@@ -0,0 +1,14 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// Verify that constants provided to asdouble can be folded
+// CHECK-NOT: makeDouble
+// CHECK: splitDouble.f64(i32 102, double -1.000000e+00
+
+
+uint2 main() : SV_Target
+{
+  double d = asdouble(0, 0xBFF00000);
+  uint2 ret;
+  asuint(d, ret.x, ret.y);
+  return ret;
+}
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/cast/asdoubleuint64.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/cast/asdoubleuint64.hlsl
@@ -0,0 +1,15 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// Verify that i64 params to asdouble don't produce an invalid MakeDouble that takes i64
+// CHECK: call double @dx.op.makeDouble.f64(i32 101, i32
+// CHECK: SplitDouble
+
+uint64_t i;
+
+uint2 main() : SV_Target
+{
+  double d = asdouble(i&0xFFFFFFFF, i >> 32);
+  uint2 ret;
+  asuint(d, ret.x, ret.y);
+  return ret;
+}
diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt
@@ -95,7 +95,7 @@ bool [[rn]] all(in any<> x);
 void [[]] AllMemoryBarrier() : syncallmemory_ug;
 void [[]] AllMemoryBarrierWithGroupSync() : syncgroupandallmemory_ug;
 bool [[rn]] any(in any<> x);
-double<> [[rn]] asdouble(in $match<0, 1> uint_only<> x, in $match<0, 2> uint_only<> y) : reinterpret_fuse_double;
+double<> [[rn]] asdouble(in $match<0, 1> uint<> x, in $match<0, 2> uint<> y) : reinterpret_fuse_double;
 float<> [[rn]] asfloat(in $match<0, 1> numeric32_only<> x) : reinterpret_float;
 float16_t<> [[rn]] asfloat16(in $match<0,1> numeric16_only<> x) : reinterpret_float16;
 int16_t<> [[rn]] asint16(in $match<0,1> numeric16_only<> x) : reinterpret_int16;