[SM6.9] Lower vector dot calls to vector op (microsoft#7730)

V-FEXrt · web-flow · commit 1390b76d0581 · 2025-09-10T16:32:41.000-06:00
Fixes microsoft#7689 Lowers vector calls to `dot` under SM6.9 to a single operation. Relies on microsoft/hlsl-specs#622 being accepted
diff --git a/docs/DXIL.rst b/docs/DXIL.rst
@@ -2423,6 +2423,9 @@ ID  Name                                                  Description
 306 MatVecMulAdd                                          multiplies a MxK dimension matrix and a K sized input vector and adds an M-sized bias vector
 307 OuterProductAccumulate                                Computes the outer product between column vectors and an MxN matrix is accumulated component-wise atomically (with device scope) in memory
 308 VectorAccumulate                                      Accumulates the components of a vector component-wise atomically (with device scope) to the corresponding elements of an array in memory
+309 ReservedD0                                            reserved
+310 ReservedD1                                            reserved
+311 FDot                                                  computes the n-dimensional vector dot-product
 === ===================================================== =======================================================================================================================================================================================================================
 
 
diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
@@ -524,6 +524,8 @@ enum class OpCode : unsigned {
   ReservedC7 = 300,  // reserved
   ReservedC8 = 301,  // reserved
   ReservedC9 = 302,  // reserved
+  ReservedD0 = 309,  // reserved
+  ReservedD1 = 310,  // reserved
 
   // Amplification shader instructions
   DispatchMesh = 173, // Amplification shader intrinsic DispatchMesh
@@ -618,9 +620,10 @@ enum class OpCode : unsigned {
                          // i32, with accumulate to i32
 
   // Dot
-  Dot2 = 54, // Two-dimensional vector dot-product
-  Dot3 = 55, // Three-dimensional vector dot-product
-  Dot4 = 56, // Four-dimensional vector dot-product
+  Dot2 = 54,  // Two-dimensional vector dot-product
+  Dot3 = 55,  // Three-dimensional vector dot-product
+  Dot4 = 56,  // Four-dimensional vector dot-product
+  FDot = 311, // computes the n-dimensional vector dot-product
 
   // Double precision
   LegacyDoubleToFloat = 132,  // legacy fuction to convert double to float
@@ -1082,7 +1085,7 @@ enum class OpCode : unsigned {
   NumOpCodes_Dxil_1_7 = 226,
   NumOpCodes_Dxil_1_8 = 258,
 
-  NumOpCodes = 309 // exclusive last value of enumeration
+  NumOpCodes = 312 // exclusive last value of enumeration
 };
 // OPCODE-ENUM:END
 
@@ -1154,6 +1157,7 @@ enum class OpCodeClass : unsigned {
   Dot4AddPacked,
 
   // Dot
+  Dot,
   Dot2,
   Dot3,
   Dot4,
@@ -1413,7 +1417,7 @@ enum class OpCodeClass : unsigned {
   NumOpClasses_Dxil_1_7 = 153,
   NumOpClasses_Dxil_1_8 = 174,
 
-  NumOpClasses = 194 // exclusive last value of enumeration
+  NumOpClasses = 195 // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 
diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
@@ -10148,5 +10148,34 @@ struct DxilInst_VectorAccumulate {
   llvm::Value *get_arrayOffset() const { return Instr->getOperand(3); }
   void set_arrayOffset(llvm::Value *val) { Instr->setOperand(3, val); }
 };
+
+/// This instruction computes the n-dimensional vector dot-product
+struct DxilInst_FDot {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_FDot(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::FDot);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_a = 1,
+    arg_b = 2,
+  };
+  // Accessors
+  llvm::Value *get_a() const { return Instr->getOperand(1); }
+  void set_a(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_b() const { return Instr->getOperand(2); }
+  void set_b(llvm::Value *val) { Instr->setOperand(2, val); }
+};
 // INSTR-HELPER:END
 } // namespace hlsl
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
@@ -2686,6 +2686,33 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      1,
      {{0x400}},
      {{0x63}}}, // Overloads: <hfwi
+
+    {OC::ReservedD0,
+     "ReservedD0",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedD1,
+     "ReservedD1",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Dot
+    {OC::FDot,
+     "FDot",
+     OCC::Dot,
+     "dot",
+     Attribute::ReadNone,
+     1,
+     {{0x400}},
+     {{0x3}}}, // Overloads: <hf
 };
 // OPCODE-OLOADS:END
 
@@ -3792,10 +3819,12 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
   Type *pI16 = Type::getInt16Ty(m_Ctx);
   Type *pI32 = Type::getInt32Ty(m_Ctx);
   Type *pOlTplI32 = Type::getInt32Ty(m_Ctx);
+  Type *pVecElt = nullptr;
   if (pOverloadType->isVectorTy()) {
     pOlTplI32 =
         VectorType::get(pOlTplI32, pOverloadType->getVectorNumElements());
     pOlTplI1 = VectorType::get(pOlTplI1, pOverloadType->getVectorNumElements());
+    pVecElt = pOverloadType->getVectorElementType();
   }
 
   Type *pPI32 = Type::getInt32PtrTy(m_Ctx);
@@ -5988,6 +6017,24 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pRes);
     A(pI32);
     break;
+
+    //
+  case OpCode::ReservedD0:
+    A(pV);
+    A(pI32);
+    break;
+  case OpCode::ReservedD1:
+    A(pV);
+    A(pI32);
+    break;
+
+    // Dot
+  case OpCode::FDot:
+    A(pVecElt);
+    A(pI32);
+    A(pETy);
+    A(pETy);
+    break;
   // OPCODE-OLOAD-FUNCS:END
   default:
     DXASSERT(false, "otherwise unhandled case");
@@ -6160,6 +6207,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::CreateHandleForLib:
   case OpCode::WaveMatch:
   case OpCode::VectorAccumulate:
+  case OpCode::FDot:
     if (FT->getNumParams() <= 1)
       return nullptr;
     return FT->getParamType(1);
@@ -6276,6 +6324,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::ReservedC7:
   case OpCode::ReservedC8:
   case OpCode::ReservedC9:
+  case OpCode::ReservedD0:
+  case OpCode::ReservedD1:
     return Type::getVoidTy(Ctx);
   case OpCode::CheckAccessFullyMapped:
   case OpCode::SampleIndex:
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
@@ -2556,10 +2556,23 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   hlsl::OP *hlslOP = &helper.hlslOP;
   Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
   Type *Ty = arg0->getType();
+  Type *EltTy = Ty->getScalarType();
+
+  // SM6.9 introduced a DXIL operation for vectorized dot product
+  if (hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() &&
+      EltTy->isFloatingPointTy()) {
+    Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
+    IRBuilder<> Builder(CI);
+    Constant *opArg = hlslOP->GetU32Const((unsigned)DXIL::OpCode::FDot);
+    Value *args[] = {opArg, arg0, arg1};
+    Function *dxilFunc = hlslOP->GetOpFunc(DXIL::OpCode::FDot, Ty);
+    return TrivialDxilVectorOperation(dxilFunc, DXIL::OpCode::FDot, args, Ty,
+                                      hlslOP, Builder);
+  }
+
   unsigned vecSize = Ty->getVectorNumElements();
   Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
   IRBuilder<> Builder(CI);
-  Type *EltTy = Ty->getScalarType();
   if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 4)
     return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
 
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
@@ -519,6 +519,16 @@ void main() {
   // CHECK: select <[[NUM]] x i1> [[bvec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]]
   sRes += select(sVec1, sVec2, sVec3);
 
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call float @dx.op.dot.[[FTY]](i32 311, <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]])  ; FDot(a,b)
+  // One pair of extract/insert is expected for the [0]
+  // CHECK: extractelement <[[NUM]] x float> {{%.*}}, i32 0
+  // CHECK: insertelement <[[NUM]] x float> {{%.*}}, float {{%.*}}, i32 0
+  float fResScalar = dot(fVec1, fVec2);
+  fRes[0] += fResScalar;
+
   // CHECK-NOT: extractelement
   // CHECK-NOT: insertelement
   buf.Store<vector<float16_t, NUM> >(0, hRes);
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl
@@ -65,20 +65,6 @@ export void test_modf(inout vector<float, 8> vec1, vector<float, 8> vec2) {
   vec1 = modf(vec1, vec2);
 }
 
-// CHECK-LABEL: test_dot
-// CHECK: [[el:%.*]] = extractelement <8 x float>
-// CHECK: [[mul:%.*]] = fmul fast float [[el]]
-// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c)
-// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c)
-// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c)
-// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c)
-// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c)
-// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c)
-// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c)
-export void test_dot(inout vector<float, 8> vec1, vector<float, 8> vec2) {
-  vec1 = dot(vec1, vec2);
-}
-
 // CHECK-LABEL: test_any
 // CHECK: or i1
 // CHECK: or i1
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl
@@ -156,21 +156,10 @@ void main() {
   // CHECK: call <13 x i1> @dx.op.isSpecialFloat.v13f32(i32 8, <13 x float> [[fvec2]])  ; IsNaN(value)
   uRes += isnan(fVec2);
 
-  // CHECK: [[el1:%.*]] = extractelement <13 x float> [[fvec1]]
-  // CHECK: [[el2:%.*]] = extractelement <13 x float> [[fvec2]]
-  // CHECK: [[mul:%.*]] = fmul fast float [[el2]], [[el1]]
-  // CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c)
-  // CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad1]]) ; FMad(a,b,c)
-  // CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad2]]) ; FMad(a,b,c)
-  // CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad3]]) ; FMad(a,b,c)
-  // CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad4]]) ; FMad(a,b,c)
-  // CHECK: [[mad6:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad5]]) ; FMad(a,b,c)
-  // CHECK: [[mad7:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad6]]) ; FMad(a,b,c)
-  // CHECK: [[mad8:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad7]]) ; FMad(a,b,c)
-  // CHECK: [[mad9:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad8]]) ; FMad(a,b,c)
-  // CHECK: [[mad10:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad9]]) ; FMad(a,b,c)
-  // CHECK: [[mad11:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad10]]) ; FMad(a,b,c)
-  // CHECK: [[mad12:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad11]]) ; FMad(a,b,c)
+  // CHECK: [[dotres:%.*]] = call float @dx.op.dot.v13f32(i32 311, <13 x float> [[fvec1]], <13 x float> [[fvec2]])  ; FDot(a,b)
+  // Upcast float to <13 x float>
+  // CHECK: [[dotresvec:%.*]] = insertelement <13 x float> undef, float [[dotres]], i32 0
+  // CHECK: shufflevector <13 x float> [[dotresvec]], <13 x float> undef, <13 x i32> zeroinitializer
   fRes += dot(fVec1, fVec2);
 
   // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 17, <13 x float> [[fvec1]])  ; Atan(value)
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll
@@ -265,27 +265,9 @@ bb:
   %inres = add <7 x i32> %tmp100, %inext
 
   ; Dot operation.
-  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 0
-  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 0
-  ; CHECK: [[mul:%.*]] = fmul fast float [[el1]], [[el2]]
-  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 1
-  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 1
-  ; CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mul]])
-  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 2
-  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 2
-  ; CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad1]])
-  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 3
-  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 3
-  ; CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad2]])
-  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 4
-  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 4
-  ; CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad3]])
-  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 5
-  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 5
-  ; CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad4]])
-  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 6
-  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 6
-  ; CHECK: call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad5]])
+  ; CHECK: [[dotres:%.*]] = call float @dx.op.dot.v7f32(i32 311, <7 x float> [[fvec1]], <7 x float> [[fvec2]])
+  ; CHECK: [[dotresvec:%.*]] = insertelement <7 x float> undef, float [[dotres]], i32 0
+  ; CHECK: shufflevector <7 x float> [[dotresvec]], <7 x float> undef, <7 x i32> zeroinitializer
   %tmp103 = call float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32 134, <7 x float> %tmp4, <7 x float> %tmp9) ; line:152 col:11
   %tmp104 = insertelement <7 x float> undef, float %tmp103, i32 0 ; line:152 col:11
   %tmp105 = shufflevector <7 x float> %tmp104, <7 x float> undef, <7 x i32> zeroinitializer ; line:152 col:11
diff --git a/tools/clang/test/LitDXILValidation/lv_illegalDxilOp.ll b/tools/clang/test/LitDXILValidation/lv_illegalDxilOp.ll
@@ -0,0 +1,69 @@
+; REQUIRES: dxil-1-9
+; RUN: not %dxv %s 2>&1 | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResBind = type { i32, i32, i32, i8 }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.v8f64 = type { <8 x double>, i32 }
+%struct.RWByteAddressBuffer = type { i32 }
+
+define void @main() {
+  %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false)  ; CreateHandleFromBinding(bind,index,nonUniformIndex)
+  %2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })  ; AnnotateHandle(res,props)  resource: RWByteAddressBuffer
+  %3 = call %dx.types.ResRet.v8f64 @dx.op.rawBufferVectorLoad.v8f64(i32 303, %dx.types.Handle %2, i32 0, i32 undef, i32 4)  ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
+  %4 = extractvalue %dx.types.ResRet.v8f64 %3, 0
+  %5 = call %dx.types.ResRet.v8f64 @dx.op.rawBufferVectorLoad.v8f64(i32 303, %dx.types.Handle %2, i32 32, i32 undef, i32 4)  ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
+  %6 = extractvalue %dx.types.ResRet.v8f64 %5, 0
+
+
+; CHECK: Function: main: error: DXIL intrinsic overload must be valid.
+; CHECK: note: at '%7 = call double @dx.op.dot.v8f64(i32 311, <8 x double> %4, <8 x double> %6)' in block '#0' of function 'main'.
+  %7 = call double @dx.op.dot.v8f64(i32 311, <8 x double> %4, <8 x double> %6)  ; FDot(a,b)
+
+
+  %8 = extractelement <8 x double> %6, i32 0
+  %9 = fadd fast double %8, %7
+  %10 = insertelement <8 x double> %6, double %9, i32 0
+  call void @dx.op.rawBufferVectorStore.v8f64(i32 304, %dx.types.Handle %2, i32 0, i32 undef, <8 x double> %10, i32 4)  ; RawBufferVectorStore(uav,index,elementOffset,value0,alignment)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare %dx.types.ResRet.v8f64 @dx.op.rawBufferVectorLoad.v8f64(i32, %dx.types.Handle, i32, i32, i32) #0
+
+; Function Attrs: nounwind readnone
+declare double @dx.op.dot.v8f64(i32, <8 x double>, <8 x double>) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.rawBufferVectorStore.v8f64(i32, %dx.types.Handle, i32, i32, <8 x double>, i32) #2
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #1
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!llvm.ident = !{!0}
+!dx.version = !{!1}
+!dx.valver = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.entryPoints = !{!6}
+
+!0 = !{!"dxc(private) 1.8.0.15017 (main, 4e0f5364a-dirty)"}
+!1 = !{i32 1, i32 9}
+!2 = !{!"cs", i32 6, i32 9}
+!3 = !{null, !4, null, null}
+!4 = !{!5}
+!5 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!6 = !{void ()* @main, !"main", null, !3, !7}
+!7 = !{i32 0, i64 8598323220, i32 4, !8}
+!8 = !{i32 4, i32 1, i32 1}
+
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py