microsoft
diff --git a/‎docs/DXIL.rst‎
Lines changed: 3 additions & 3 deletions b/‎docs/DXIL.rst‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎include/dxc/DXIL/DxilConstants.h‎
Lines changed: 12 additions & 9 deletions b/‎include/dxc/DXIL/DxilConstants.h‎
Lines changed: 12 additions & 9 deletions
diff --git a/‎include/dxc/DXIL/DxilInstructions.h‎
Lines changed: 34 additions & 0 deletions b/‎include/dxc/DXIL/DxilInstructions.h‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎include/dxc/HlslIntrinsicOp.h‎
Lines changed: 2 additions & 1 deletion b/‎include/dxc/HlslIntrinsicOp.h‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎lib/DXIL/DxilOperations.cpp‎
Lines changed: 27 additions & 23 deletions b/‎lib/DXIL/DxilOperations.cpp‎
Lines changed: 27 additions & 23 deletions
diff --git a/‎lib/HLSL/HLOperationLower.cpp‎
Lines changed: 27 additions & 0 deletions b/‎lib/HLSL/HLOperationLower.cpp‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/convert/nominal.hlsl‎
Lines changed: 18 additions & 0 deletions b/‎tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/convert/nominal.hlsl‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll‎
Lines changed: 6 additions & 0 deletions b/‎tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll‎
Lines changed: 6 additions & 0 deletions b/‎tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll‎
Lines changed: 6 additions & 0 deletions b/‎tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll‎
Lines changed: 6 additions & 0 deletions
@@ -3095,9 +3095,9 @@ ID         Name                                     Description
 2147483675 LinAlgMatrixAccumulateToDescriptor       accumulates a matrix to a RWByteAddressBuffer
 2147483676 LinAlgMatrixAccumulateToMemory           accumulates a matrix to groupshared memory
 2147483677 LinAlgMatrixOuterProduct                 Outer products an M sized vector and a N sized vector producing an MxN matrix
-2147483678 ReservedE1                               reserved
-2147483679 ReservedE2                               reserved
-2147483680 ReservedE3                               reserved
+2147483678 LinAlgConvert                            Convert vector components from one interpretation to another
+2147483679 ReservedE0                               reserved
+2147483680 ReservedE1                               reserved
 2147483681 DebugBreak                               triggers a breakpoint if a debugger is attached
 2147483682 IsDebuggerPresent                        returns true if a debugger is attached
 ========== ======================================== ===================================================================================================================
 
@@ -531,9 +531,8 @@ static const OpCodeTableID TableID = OpCodeTableID::ExperimentalOps;
 // Enumeration for ExperimentalOps DXIL operations
 enum class OpCode : unsigned {
   //
-  ReservedE1 = 30, // reserved
-  ReservedE2 = 31, // reserved
-  ReservedE3 = 32, // reserved
+  ReservedE0 = 31, // reserved
+  ReservedE1 = 32, // reserved
 
   // Debugging
   DebugBreak = 33,        // triggers a breakpoint if a debugger is attached
@@ -552,6 +551,8 @@ enum class OpCode : unsigned {
       9, // returns committed triangle vertices in object space as <9 x float>
 
   // Linear Algebra Operations
+  LinAlgConvert =
+      30, // Convert vector components from one interpretation to another
   LinAlgCopyConvertMatrix =
       13, // Converts and copies the element and use type of the source matrix
           // to the destination matrix with optional transpose
@@ -1349,12 +1350,13 @@ enum class OpCode : unsigned {
       ExperimentalOps,
       LinAlgMatrixOuterProduct), // Outer products an M sized vector and a N
                                  // sized vector producing an MxN matrix
-  // ReservedE1 = 0x8000001E, 2147483678U, -2147483618
+  // LinAlgConvert = 0x8000001E, 2147483678U, -2147483618
+  EXP_OPCODE(ExperimentalOps, LinAlgConvert), // Convert vector components from
+                                              // one interpretation to another
+  // ReservedE0 = 0x8000001F, 2147483679U, -2147483617
+  EXP_OPCODE(ExperimentalOps, ReservedE0), // reserved
+  // ReservedE1 = 0x80000020, 2147483680U, -2147483616
   EXP_OPCODE(ExperimentalOps, ReservedE1), // reserved
-  // ReservedE2 = 0x8000001F, 2147483679U, -2147483617
-  EXP_OPCODE(ExperimentalOps, ReservedE2), // reserved
-  // ReservedE3 = 0x80000020, 2147483680U, -2147483616
-  EXP_OPCODE(ExperimentalOps, ReservedE3), // reserved
   // DebugBreak = 0x80000021, 2147483681U, -2147483615
   EXP_OPCODE(ExperimentalOps,
              DebugBreak), // triggers a breakpoint if a debugger is attached
@@ -1520,6 +1522,7 @@ enum class OpCodeClass : unsigned {
   CreateHandleForLib,
 
   // Linear Algebra Operations
+  LinAlgConvert,
   LinAlgCopyConvertMatrix,
   LinAlgFillMatrix,
   LinAlgMatVecMul,
@@ -1725,7 +1728,7 @@ enum class OpCodeClass : unsigned {
   NodeOutputIsValid,
   OutputComplete,
 
-  NumOpClasses = 221, // exclusive last value of enumeration
+  NumOpClasses = 222, // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 
 
@@ -10920,6 +10920,40 @@ struct DxilInst_LinAlgMatrixOuterProduct {
   void set_vectorB(llvm::Value *val) { Instr->setOperand(2, val); }
 };
 
+/// This instruction Convert vector components from one interpretation to
+/// another
+struct DxilInst_LinAlgConvert {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_LinAlgConvert(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::LinAlgConvert);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_inputVector = 1,
+    arg_inputInterpretation = 2,
+    arg_outputInterpretation = 3,
+  };
+  // Accessors
+  llvm::Value *get_inputVector() const { return Instr->getOperand(1); }
+  void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_inputInterpretation() const { return Instr->getOperand(2); }
+  void set_inputInterpretation(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_outputInterpretation() const { return Instr->getOperand(3); }
+  void set_outputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); }
+};
+
 /// This instruction triggers a breakpoint if a debugger is attached
 struct DxilInst_DebugBreak {
   llvm::Instruction *Instr;
 
@@ -112,6 +112,7 @@ enum class IntrinsicOp {
   IOP_WorldToObject = 99,
   IOP_WorldToObject3x4 = 100,
   IOP_WorldToObject4x3 = 101,
+  IOP___builtin_LinAlg_Convert = 422,
   IOP___builtin_LinAlg_CopyConvertMatrix = 401,
   IOP___builtin_LinAlg_FillMatrix = 402,
   IOP___builtin_LinAlg_MatrixAccumulate = 411,
@@ -428,7 +429,7 @@ enum class IntrinsicOp {
   IOP_usign = 355,
   MOP_InterlockedUMax = 356,
   MOP_InterlockedUMin = 357,
-  Num_Intrinsics = 422,
+  Num_Intrinsics = 423,
 };
 inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
   switch (opcode) {
 
@@ -2976,25 +2976,25 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = {
      3,
      {{0x200}, {0x400}, {0x400}},
      {{0x0}, {0x63}, {0x63}}}, // Overloads: o,<hfwi,<hfwi
-
-    {OC::ReservedE1,
-     "ReservedE1",
-     OCC::Reserved,
-     "reserved",
+    {OC::LinAlgConvert,
+     "LinAlgConvert",
+     OCC::LinAlgConvert,
+     "linAlgConvert",
      Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedE2,
-     "ReservedE2",
+     2,
+     {{0x400}, {0x400}},
+     {{0x63}, {0x63}}}, // Overloads: <hfwi,<hfwi
+
+    {OC::ReservedE0,
+     "ReservedE0",
      OCC::Reserved,
      "reserved",
      Attribute::None,
      0,
      {},
      {}}, // Overloads: v
-    {OC::ReservedE3,
-     "ReservedE3",
+    {OC::ReservedE1,
+     "ReservedE1",
      OCC::Reserved,
      "reserved",
      Attribute::None,
@@ -3955,12 +3955,13 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
   // LinAlgMatrixQueryAccumulatorLayout=2147483670, LinAlgMatVecMul=2147483673,
   // LinAlgMatVecMulAdd=2147483674,
   // LinAlgMatrixAccumulateToDescriptor=2147483675,
-  // LinAlgMatrixOuterProduct=2147483677, DebugBreak=2147483681,
-  // IsDebuggerPresent=2147483682
+  // LinAlgMatrixOuterProduct=2147483677, LinAlgConvert=2147483678,
+  // DebugBreak=2147483681, IsDebuggerPresent=2147483682
   if (op == 2147483648 || (2147483652 <= op && op <= 2147483653) ||
       (2147483656 <= op && op <= 2147483657) || op == 2147483662 ||
       op == 2147483670 || (2147483673 <= op && op <= 2147483675) ||
-      op == 2147483677 || (2147483681 <= op && op <= 2147483682)) {
+      (2147483677 <= op && op <= 2147483678) ||
+      (2147483681 <= op && op <= 2147483682)) {
     major = 6;
     minor = 10;
     return;
@@ -6673,17 +6674,20 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(EXT(1));
     A(EXT(2));
     break;
-
-    //
-  case OpCode::ReservedE1:
-    A(pV);
+  case OpCode::LinAlgConvert:
+    A(EXT(0));
+    A(pI32);
+    A(EXT(1));
+    A(pI32);
     A(pI32);
     break;
-  case OpCode::ReservedE2:
+
+    //
+  case OpCode::ReservedE0:
     A(pV);
     A(pI32);
     break;
-  case OpCode::ReservedE3:
+  case OpCode::ReservedE1:
     A(pV);
     A(pI32);
     break;
@@ -7002,9 +7006,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::GetGroupWaveCount:
   case OpCode::ClusterID:
   case OpCode::LinAlgMatrixQueryAccumulatorLayout:
+  case OpCode::ReservedE0:
   case OpCode::ReservedE1:
-  case OpCode::ReservedE2:
-  case OpCode::ReservedE3:
   case OpCode::DebugBreak:
   case OpCode::IsDebuggerPresent:
     return Type::getVoidTy(Ctx);
@@ -7047,6 +7050,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::LinAlgFillMatrix:
   case OpCode::LinAlgCopyConvertMatrix:
   case OpCode::LinAlgMatrixGetElement:
+  case OpCode::LinAlgConvert:
     if (FT->getNumParams() < 2)
       return nullptr;
     return llvm::StructType::get(Ctx,
 
@@ -7089,6 +7089,30 @@ Value *TranslateLinAlgMatrixAccumStoreToMemory(
                             {OpArg, Matrix, ArrPtr, Offset, Stride, Layout});
 }
 
+Value *TranslateLinAlgConvert(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
+                              HLOperationLowerHelper &Helper,
+                              HLObjectOperationLowerHelper *ObjHelper,
+                              bool &Translated) {
+  hlsl::OP *HlslOp = &Helper.hlslOP;
+  IRBuilder<> Builder(CI);
+
+  Value *OutVecPtr = CI->getArgOperand(1);
+  DXASSERT_NOMSG(isa<PointerType>(OutVecPtr->getType()));
+  Type *OutVecTy = OutVecPtr->getType()->getPointerElementType();
+  Value *InVec = CI->getArgOperand(2);
+  Value *InInterp = CI->getArgOperand(3);
+  Value *OutInterp = CI->getArgOperand(4);
+
+  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
+  Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {OutVecTy, InVec->getType()});
+
+  Value *OutVec =
+      Builder.CreateCall(DxilFunc, {OpArg, InVec, InInterp, OutInterp});
+  Builder.CreateStore(OutVec, OutVecPtr);
+
+  return nullptr;
+}
+
 } // namespace
 
 // Lower table.
@@ -7880,6 +7904,9 @@ constexpr IntrinsicLower gLowerTable[] = {
      DXIL::OpCode::DebugBreak},
     {IntrinsicOp::IOP_DxIsDebuggerPresent, TranslateWaveToVal,
      DXIL::OpCode::IsDebuggerPresent},
+
+    {IntrinsicOp::IOP___builtin_LinAlg_Convert, TranslateLinAlgConvert,
+     DXIL::OpCode::LinAlgConvert},
 };
 constexpr size_t NumLowerTableEntries =
     sizeof(gLowerTable) / sizeof(gLowerTable[0]);
 
@@ -0,0 +1,18 @@
+// REQUIRES: dxil-1-10
+// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s
+// RUN: %dxc -T cs_6_10 -HV 202x -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2
+
+[numthreads(4,1,1)]
+void main() {
+  // CHECK-LABEL: define void @main()
+
+  // CHECK: %{{.*}} = call <4 x i32> @dx.op.linAlgConvert.v4i32.v4f32
+  // CHECK-SAME: (i32 -2147483618, <4 x float> <float 9.000000e+00, float 8.000000e+00, float 7.000000e+00, float 6.000000e+00>, i32 1, i32 2)
+  // CHECK-SAME: ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation)
+
+  // CHECK2: call void @"dx.hl.op..void (i32, <4 x i32>*, <4 x float>, i32, i32)"
+  // CHECK2-SAME: (i32 422, <4 x i32>* %result, <4 x float> %{{.*}}, i32 1, i32 2)
+  float4 vec = {9.0, 8.0, 7.0, 6.0};
+  int4 result;
+  __builtin_LinAlg_Convert(result, vec, 1, 2);
+}
@@ -50,6 +50,9 @@ define void @mainAS() {
   ; dx.op.linAlgMatVecMulAdd
   %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> <i32 9, i32 9, i32 9, i32 9>, i32 2, <4 x i32> <i32 7, i32 7, i32 7, i32 7>, i32 3)  ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation)
 
+  ; dx.op.linAlgConvert
+  %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2)  ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation)
+
   ;
   ; Built-ins restricted to compute, mesh and amplification shaders
   ;
@@ -123,6 +126,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.
 ; Function Attrs: nounwind
 declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0
 
+; Function Attrs: nounwind
+declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0
+
 ; Function Attrs: nounwind
 declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0
 
 
@@ -49,6 +49,9 @@ define void @mainCS() {
   ; dx.op.linAlgMatVecMulAdd
   %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> <i32 9, i32 9, i32 9, i32 9>, i32 2, <4 x i32> <i32 7, i32 7, i32 7, i32 7>, i32 3)  ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation)
 
+  ; dx.op.linAlgConvert
+  %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2)  ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation)
+
   ;
   ; Built-ins restricted to compute, mesh and amplification shaders
   ;
@@ -119,6 +122,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.
 ; Function Attrs: nounwind
 declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0
 
+; Function Attrs: nounwind
+declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0
+
 ; Function Attrs: nounwind
 declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0
 
 
@@ -65,6 +65,9 @@ define void @MainDS() {
   ; dx.op.linAlgMatVecMulAdd
   %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> <i32 9, i32 9, i32 9, i32 9>, i32 2, <4 x i32> <i32 7, i32 7, i32 7, i32 7>, i32 3)  ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation)
 
+  ; dx.op.linAlgConvert
+  %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2)  ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation)
+
   ;
   ; Built-ins restricted to compute, mesh and amplification shaders
   ;
@@ -143,6 +146,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.
 ; Function Attrs: nounwind
 declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0
 
+; Function Attrs: nounwind
+declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0
+
 ; Function Attrs: nounwind
 declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0