diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 0b4c7218d4..7047d9fe59 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -96,16 +96,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { "unary", Attribute::ReadNone, 1, - {{0x7}}, - {{0x0}}}, // Overloads: hfd + {{0x407}}, + {{0x7}}}, // Overloads: hfd refArgs, Type *Ty, Type *RetTy, OP *hlslOP, IRBuilder<> &Builder) { @@ -459,17 +467,42 @@ Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, } } } -// Generates a DXIL operation over an overloaded type (Ty), returning a -// RetTy value; when Ty is a vector, it will replicate per-element operations -// into RetTy to rebuild it. -Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, - Type *Ty, Type *RetTy, OP *hlslOP, - IRBuilder<> &Builder) { + +// Creates a native vector call to for a "trivial" operation where only a single +// call instruction is needed. The overload and return types are the same vector +// type `Ty`. +// Utility objects `HlslOp` and `Builder` are used to create a call to the given +// `DxilFunc` with `RefArgs` arguments. +Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode, + ArrayRef Args, Type *Ty, OP *OP, + IRBuilder<> &Builder) { + if (!Ty->isVoidTy()) + return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode)); + else + return Builder.CreateCall(Func, Args); // Cannot add name to void. +} + +// Generates a DXIL operation with the overloaded type based on `Ty` and return +// type `RetTy`. When Ty is a vector, it will either generate per-element calls +// for each vector element and reconstruct the vector type from those results or +// operate on and return native vectors depending on vector size and the value +// of `SupportsVectors`, which is deteremined by version and opcode support. +Value *TrivialDxilOperation(OP::OpCode Opcode, ArrayRef Args, Type *Ty, + Type *RetTy, OP *OP, IRBuilder<> &Builder, + bool SupportsVectors = false) { + + // If supported and the overload type is a vector with more than 1 element, + // create a native vector operation. + if (SupportsVectors && Ty->isVectorTy() && Ty->getVectorNumElements() > 1) { + Function *Func = OP->GetOpFunc(Opcode, Ty); + return TrivialDxilVectorOperation(Func, Opcode, Args, Ty, OP, Builder); + } + + // Set overload type to the scalar type of `Ty` and generate call(s). Type *EltTy = Ty->getScalarType(); - Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy); + Function *Func = OP->GetOpFunc(Opcode, EltTy); - return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP, - Builder); + return TrivialDxilOperation(Func, Opcode, Args, Ty, RetTy, OP, Builder); } Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, @@ -484,82 +517,110 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B); } -Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - Type *Ty = src->getType(); +// Translate call that converts to a dxil unary operation with a different +// return type from the overload by passing the argument, explicit return type, +// and helper objects to the scalarizing unary dxil operation creation. +Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP, + OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Type *Ty = Src->getType(); - Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); - Value *args[] = {opArg, src}; + IRBuilder<> Builder(CI); + hlsl::OP *OP = &Helper.hlslOP; + Type *RetTy = CI->getType(); + Constant *OpArg = OP->GetU32Const((unsigned)Opcode); + Value *Args[] = {OpArg, Src}; - return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder); + return TrivialDxilOperation(Opcode, Args, Ty, RetTy, OP, Builder); } -Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP, - Builder); +Value *TrivialDxilUnaryOperation(OP::OpCode Opcode, Value *Src, hlsl::OP *OP, + IRBuilder<> &Builder, + bool SupportsVectors = false) { + Type *Ty = Src->getType(); + + Constant *OpArg = OP->GetU32Const((unsigned)Opcode); + Value *Args[] = {OpArg, Src}; + + return TrivialDxilOperation(Opcode, Args, Ty, Ty, OP, Builder, + SupportsVectors); } -Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - Type *Ty = src0->getType(); +Value *TrivialDxilBinaryOperation(OP::OpCode Opcode, Value *Src0, Value *Src1, + hlsl::OP *OP, IRBuilder<> &Builder, + bool SupportsVectors = false) { + Type *Ty = Src0->getType(); - Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); - Value *args[] = {opArg, src0, src1}; + Constant *OpArg = OP->GetU32Const((unsigned)Opcode); + Value *Args[] = {OpArg, Src0, Src1}; - return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); + return TrivialDxilOperation(Opcode, Args, Ty, Ty, OP, Builder, + SupportsVectors); } -Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, - Value *src2, hlsl::OP *hlslOP, - IRBuilder<> &Builder) { - Type *Ty = src0->getType(); +Value *TrivialDxilTrinaryOperation(OP::OpCode Opcode, Value *Src0, Value *Src1, + Value *Src2, hlsl::OP *OP, + IRBuilder<> &Builder, + bool SupportsVectors = false) { + Type *Ty = Src0->getType(); - Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); - Value *args[] = {opArg, src0, src1, src2}; + Constant *OpArg = OP->GetU32Const((unsigned)Opcode); + Value *Args[] = {OpArg, Src0, Src1, Src2}; - return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); + return TrivialDxilOperation(Opcode, Args, Ty, Ty, OP, Builder, + SupportsVectors); } -Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +// Translate call that trivially converts to a dxil unary operation by passing +// argument, return type, and helper objects to either scalarizing or native +// vector dxil operation creation depending on version and vector size. +Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Value *Src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); IRBuilder<> Builder(CI); - hlsl::OP *hlslOP = &helper.hlslOP; - Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), - hlslOP, Builder); - return retVal; + hlsl::OP *OP = &Helper.hlslOP; + + return TrivialDxilUnaryOperation(Opcode, Src0, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); } -Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +// Translate call that trivially converts to a dxil binary operation by passing +// arguments, return type, and helper objects to either scalarizing or native +// vector dxil operation creation depending on version and vector size. +Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; - Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); - Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); + hlsl::OP *OP = &Helper.hlslOP; + Value *Src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); + Value *Src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); - Value *binOp = - TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder); - return binOp; + return TrivialDxilBinaryOperation(Opcode, Src0, Src1, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); } -Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +// Translate call that trivially converts to a dxil trinary (aka tertiary) +// operation by passing arguments, return type, and helper objects to either +// scalarizing or native vector dxil operation creation depending on version +// and vector size. +Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; - Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); - Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); - Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); + hlsl::OP *OP = &Helper.hlslOP; + Value *Src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); + Value *Src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); + Value *Src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); IRBuilder<> Builder(CI); - Value *triOp = - TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder); - return triOp; + return TrivialDxilTrinaryOperation(Opcode, Src0, Src1, Src2, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); } Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -724,48 +785,54 @@ Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP, // | float | False | 2 | // +----------+---------------------+------------------+ -bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow, - int32_t &powI) { +bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *X, Value *Pow, + int32_t &PowI) { // Applicable only when power is a literal. - if (!isa(pow) && !isa(pow)) { + if (!isa(Pow) && !isa(Pow)) { return false; } // Only apply this code gen on splat values. - if (ConstantDataVector *cdv = dyn_cast(pow)) { - if (!hlsl::dxilutil::IsSplat(cdv)) { + if (ConstantDataVector *Cdv = dyn_cast(Pow)) { + if (!hlsl::dxilutil::IsSplat(Cdv)) { return false; } } - APFloat powAPF = isa(pow) - ? cast(pow)->getElementAsAPFloat(0) + // Only apply on aggregates of 16 or fewer elements, + // representing the max 4x4 matrix size. + Type *Ty = X->getType(); + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 16) + return false; + + APFloat PowAPF = isa(Pow) + ? cast(Pow)->getElementAsAPFloat(0) : // should be a splat value - cast(pow)->getValueAPF(); - APSInt powAPS(32, false); - bool isExact = false; + cast(Pow)->getValueAPF(); + APSInt PowAPS(32, false); + bool IsExact = false; // Try converting float value of power to integer and also check if the float // value is exact. - APFloat::opStatus status = - powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact); - if (status == APFloat::opStatus::opOK && isExact) { - powI = powAPS.getExtValue(); - uint32_t powU = abs(powI); - int setBitCount = 0; - int maxBitSetPos = -1; - for (int i = 0; i < 32; i++) { - if ((powU >> i) & 1) { - setBitCount++; - maxBitSetPos = i; + APFloat::opStatus Status = + PowAPF.convertToInteger(PowAPS, APFloat::rmTowardZero, &IsExact); + if (Status == APFloat::opStatus::opOK && IsExact) { + PowI = PowAPS.getExtValue(); + uint32_t PowU = abs(PowI); + int SetBitCount = 0; + int MaxBitSetPos = -1; + for (int I = 0; I < 32; I++) { + if ((PowU >> I) & 1) { + SetBitCount++; + MaxBitSetPos = I; } } - DXASSERT(maxBitSetPos <= 30, "msb should always be zero."); - unsigned numElem = - isa(pow) ? x->getType()->getVectorNumElements() : 1; - int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1; - int mulOpNeeded = maxBitSetPos + setBitCount - 1; - return mulOpNeeded <= mulOpThreshold; + DXASSERT(MaxBitSetPos <= 30, "msb should always be zero."); + unsigned NumElem = + isa(Pow) ? X->getType()->getVectorNumElements() : 1; + int MulOpThreshold = PowI < 0 ? NumElem + 1 : 2 * NumElem + 1; + int MulOpNeeded = MaxBitSetPos + SetBitCount - 1; + return MulOpNeeded <= MulOpThreshold; } return false; @@ -1447,6 +1514,7 @@ Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *refArgs[] = {nullptr, CI->getOperand(1)}; return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); } + // Wave ballot intrinsic. Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, @@ -1899,9 +1967,11 @@ Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, IRBuilder<> Builder(CI); // min(max(x, minVal), maxVal). - Value *maxXMinVal = - TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder); - return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder); + bool SupportsVectors = helper.M.GetShaderModel()->IsSM69Plus(); + Value *maxXMinVal = TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, + Builder, SupportsVectors); + return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder, + SupportsVectors); } Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2014,46 +2084,45 @@ Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return Result; } -Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - Value *firstbitHi = - TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated); + Value *FirstbitHi = + TrivialUnaryOperationRet(CI, IOP, Opcode, Helper, ObjHelper, Translated); // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi); IRBuilder<> Builder(CI); - Constant *neg1 = Builder.getInt32(-1); - Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Constant *Neg1 = Builder.getInt32(-1); + Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); - Type *Ty = src->getType(); + Type *Ty = Src->getType(); IntegerType *EltTy = cast(Ty->getScalarType()); - Constant *bitWidth = Builder.getInt32(EltTy->getBitWidth() - 1); + Constant *BitWidth = Builder.getInt32(EltTy->getBitWidth() - 1); if (Ty == Ty->getScalarType()) { - Value *sub = Builder.CreateSub(bitWidth, firstbitHi); - Value *cond = Builder.CreateICmpEQ(neg1, firstbitHi); - return Builder.CreateSelect(cond, neg1, sub); + Value *Sub = Builder.CreateSub(BitWidth, FirstbitHi); + Value *Cond = Builder.CreateICmpEQ(Neg1, FirstbitHi); + return Builder.CreateSelect(Cond, Neg1, Sub); } else { - Value *result = UndefValue::get(CI->getType()); - unsigned vecSize = Ty->getVectorNumElements(); - for (unsigned i = 0; i < vecSize; i++) { - Value *EltFirstBit = Builder.CreateExtractElement(firstbitHi, i); - Value *sub = Builder.CreateSub(bitWidth, EltFirstBit); - Value *cond = Builder.CreateICmpEQ(neg1, EltFirstBit); - Value *Elt = Builder.CreateSelect(cond, neg1, sub); - result = Builder.CreateInsertElement(result, Elt, i); + Value *Result = UndefValue::get(CI->getType()); + unsigned VecSize = Ty->getVectorNumElements(); + for (unsigned I = 0; I < VecSize; I++) { + Value *EltFirstBit = Builder.CreateExtractElement(FirstbitHi, I); + Value *Sub = Builder.CreateSub(BitWidth, EltFirstBit); + Value *Cond = Builder.CreateICmpEQ(Neg1, EltFirstBit); + Value *Elt = Builder.CreateSelect(Cond, Neg1, Sub); + Result = Builder.CreateInsertElement(Result, Elt, I); } - return result; + return Result; } } -Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - Value *firstbitLo = - TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated); - return firstbitLo; + return TrivialUnaryOperationRet(CI, IOP, Opcode, Helper, ObjHelper, + Translated); } Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2200,57 +2269,60 @@ Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return TranslateLength(CI, sub, hlslOP); } -Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; +Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { + hlsl::OP *OP = &Helper.hlslOP; IRBuilder<> Builder(CI); Type *Ty = CI->getType(); - Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); - Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E); - if (Ty != Ty->getScalarType()) { - log2eConst = - ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst); - } - val = Builder.CreateFMul(log2eConst, val); - Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder); - return exp; + Value *Val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Constant *Log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E); + if (Ty != Ty->getScalarType()) + Log2eConst = + ConstantVector::getSplat(Ty->getVectorNumElements(), Log2eConst); + Val = Builder.CreateFMul(Log2eConst, Val); + + return TrivialDxilUnaryOperation(OP::OpCode::Exp, Val, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); } -Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; +Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { + hlsl::OP *OP = &Helper.hlslOP; IRBuilder<> Builder(CI); Type *Ty = CI->getType(); - Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); - Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2); - if (Ty != Ty->getScalarType()) { - ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const); - } - Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder); + Value *Val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Constant *Ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2); + if (Ty != Ty->getScalarType()) + Ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), Ln2Const); + + Value *log = + TrivialDxilUnaryOperation(OP::OpCode::Log, Val, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); - return Builder.CreateFMul(ln2Const, log); + return Builder.CreateFMul(Ln2Const, log); } -Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; + hlsl::OP *OP = &Helper.hlslOP; IRBuilder<> Builder(CI); Type *Ty = CI->getType(); - Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); - Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10); + Value *Val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Constant *Log2to10Const = + ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10); if (Ty != Ty->getScalarType()) { - log2_10Const = - ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const); + Log2to10Const = + ConstantVector::getSplat(Ty->getVectorNumElements(), Log2to10Const); } - Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder); + Value *Log = + TrivialDxilUnaryOperation(OP::OpCode::Log, Val, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); - return Builder.CreateFMul(log2_10Const, log); + return Builder.CreateFMul(Log2to10Const, Log); } Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2431,18 +2503,23 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1, return dotOP; } -Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, - hlsl::OP *hlslOP, IRBuilder<> &Builder, - bool Unsigned = false) { - auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad; - Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0); - Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0); - Value *Result = Builder.CreateMul(Elt0, Elt1); - for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) { - Elt0 = Builder.CreateExtractElement(arg0, iVecElt); - Elt1 = Builder.CreateExtractElement(arg1, iVecElt); - Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, - Builder); +// Instead of using a DXIL intrinsic, implement a dot product operation using +// multiply and add operations. Used for integer dots and long vectors. +Value *ExpandDot(Value *Arg0, Value *Arg1, unsigned VecSize, hlsl::OP *OP, + IRBuilder<> &Builder, + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) { + Value *Elt0 = Builder.CreateExtractElement(Arg0, (uint64_t)0); + Value *Elt1 = Builder.CreateExtractElement(Arg1, (uint64_t)0); + Value *Result; + if (Elt0->getType()->isFloatingPointTy()) + Result = Builder.CreateFMul(Elt0, Elt1); + else + Result = Builder.CreateMul(Elt0, Elt1); + for (unsigned Elt = 1; Elt < VecSize; ++Elt) { + Elt0 = Builder.CreateExtractElement(Arg0, Elt); + Elt1 = Builder.CreateExtractElement(Arg1, Elt); + Result = + TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, OP, Builder); } return Result; @@ -2470,21 +2547,25 @@ Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize, } } -Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; - Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); - Type *Ty = arg0->getType(); - unsigned vecSize = Ty->getVectorNumElements(); - Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); +Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { + hlsl::OP *OP = &Helper.hlslOP; + Value *Arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); + Type *Ty = Arg0->getType(); + unsigned VecSize = Ty->getVectorNumElements(); + Value *Arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); - if (Ty->getScalarType()->isFloatingPointTy()) { - return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); + Type *EltTy = Ty->getScalarType(); + if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 4) { + return TranslateFDot(Arg0, Arg1, VecSize, OP, Builder); } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_udot); + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; + if (IOP == IntrinsicOp::IOP_udot) + MadOpCode = DXIL::OpCode::UMad; + else if (EltTy->isFloatingPointTy()) + MadOpCode = DXIL::OpCode::FMad; + return ExpandDot(Arg0, Arg1, VecSize, OP, Builder, MadOpCode); } } @@ -2587,31 +2668,32 @@ Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, return refract; } -Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; + hlsl::OP *OP = &Helper.hlslOP; // s = saturate((x-min)/(max-min)). IRBuilder<> Builder(CI); - Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx); - Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx); - Value *maxSubMin = Builder.CreateFSub(maxVal, minVal); - Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx); - Value *xSubMin = Builder.CreateFSub(x, minVal); - Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin); - - Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP, - Builder); + Value *MinVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx); + Value *MaxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx); + Value *MaxSubMin = Builder.CreateFSub(MaxVal, MinVal); + Value *X = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx); + Value *XSubMin = Builder.CreateFSub(X, MinVal); + Value *SatVal = Builder.CreateFDiv(XSubMin, MaxSubMin); + + Value *S = + TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, SatVal, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); // return s * s *(3-2*s). - Constant *c2 = ConstantFP::get(CI->getType(), 2); - Constant *c3 = ConstantFP::get(CI->getType(), 3); + Constant *C2 = ConstantFP::get(CI->getType(), 2); + Constant *C3 = ConstantFP::get(CI->getType(), 3); - Value *sMul2 = Builder.CreateFMul(s, c2); - Value *result = Builder.CreateFSub(c3, sMul2); - result = Builder.CreateFMul(s, result); - result = Builder.CreateFMul(s, result); - return result; + Value *SMul2 = Builder.CreateFMul(S, C2); + Value *Result = Builder.CreateFSub(C3, SMul2); + Result = Builder.CreateFMul(S, Result); + Result = Builder.CreateFMul(S, Result); + return Result; } Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -3013,45 +3095,46 @@ Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) { return Result; } -Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { +Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; - Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); - Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *arg0Ty = arg0->getType(); - Type *arg1Ty = arg1->getType(); + hlsl::OP *OP = &Helper.hlslOP; + Value *Arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); + Value *Arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); + Type *Arg0Ty = Arg0->getType(); + Type *Arg1Ty = Arg1->getType(); IRBuilder<> Builder(CI); - if (arg0Ty->isVectorTy()) { - if (arg1Ty->isVectorTy()) { + if (Arg0Ty->isVectorTy()) { + if (Arg1Ty->isVectorTy()) { // mul(vector, vector) == dot(vector, vector) - unsigned vecSize = arg0Ty->getVectorNumElements(); - if (arg0Ty->getScalarType()->isFloatingPointTy()) { - return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); + unsigned VecSize = Arg0Ty->getVectorNumElements(); + if (Arg0Ty->getScalarType()->isFloatingPointTy()) { + return TranslateFDot(Arg0, Arg1, VecSize, OP, Builder); } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_umul); + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; + if (IOP == IntrinsicOp::IOP_umul) + MadOpCode = DXIL::OpCode::UMad; + return ExpandDot(Arg0, Arg1, VecSize, OP, Builder, MadOpCode); } } else { // mul(vector, scalar) == vector * scalar-splat - arg1 = SplatToVector(arg1, arg0Ty, Builder); + Arg1 = SplatToVector(Arg1, Arg0Ty, Builder); } } else { - if (arg1Ty->isVectorTy()) { + if (Arg1Ty->isVectorTy()) { // mul(scalar, vector) == scalar-splat * vector - arg0 = SplatToVector(arg0, arg1Ty, Builder); + Arg0 = SplatToVector(Arg0, Arg1Ty, Builder); } // else mul(scalar, scalar) == scalar * scalar; } // create fmul/mul for the pair of vectors or scalars - if (arg0Ty->getScalarType()->isFloatingPointTy()) { - return Builder.CreateFMul(arg0, arg1); + if (Arg0Ty->getScalarType()->isFloatingPointTy()) { + return Builder.CreateFMul(Arg0, Arg1); } else { - return Builder.CreateMul(arg0, arg1); + return Builder.CreateMul(Arg0, Arg1); } } @@ -6150,20 +6233,8 @@ Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateAnd(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateAnd(x, y); } Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6171,20 +6242,8 @@ Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateOr(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateOr(x, y); } Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6194,21 +6253,8 @@ Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltCond = Builder.CreateExtractElement(cond, i); - Value *EltTrue = Builder.CreateExtractElement(t, i); - Value *EltFalse = Builder.CreateExtractElement(f, i); - Value *tmp = Builder.CreateSelect(EltCond, EltTrue, EltFalse); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateSelect(cond, t, f); } } // namespace @@ -6467,18 +6513,20 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos}, {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos}, - {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, + {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet, DXIL::OpCode::Countbits}, {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes}, - {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX}, - {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddx, TrivialUnaryOperationRet, + DXIL::OpCode::DerivCoarseX}, + {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperationRet, DXIL::OpCode::DerivCoarseX}, - {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperationRet, DXIL::OpCode::DerivFineX}, - {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY}, - {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddy, TrivialUnaryOperationRet, + DXIL::OpCode::DerivCoarseY}, + {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperationRet, DXIL::OpCode::DerivCoarseY}, - {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperationRet, DXIL::OpCode::DerivFineY}, {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes}, diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 027d7d3cbc..3dac550218 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6606,7 +6606,7 @@ bool HLSLExternalSource::MatchArguments( argTypes.clear(); const bool isVariadic = IsVariadicIntrinsicFunction(pIntrinsic); - static const UINT UnusedSize = 0xFF; + static const UINT UnusedSize = UINT_MAX; static const BYTE MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1; #define CAB(cond, arg) \ { \ @@ -6622,7 +6622,7 @@ bool HLSLExternalSource::MatchArguments( ArBasicKind ComponentType[MaxIntrinsicArgs]; // Component type for each argument, // AR_BASIC_UNKNOWN if unspecified. - UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UNUSED_INDEX32 + UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UnusedSize // if unspecified. badArgIdx = MaxIntrinsicArgs; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl new file mode 100644 index 0000000000..af6f96745c --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl @@ -0,0 +1,391 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=125 %s | FileCheck %s + +// Test vector-enabled non-trivial intrinsics that take parameters of various types. + +RWByteAddressBuffer buf; +RWByteAddressBuffer ibuf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + vector hVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + vector fVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + vector dVec3 = buf.Load >(5120); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = ibuf.Load >(0); + vector sVec2 = ibuf.Load >(512); + vector sVec3 = ibuf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = ibuf.Load >(1025); + vector usVec2 = ibuf.Load >(1536); + vector usVec3 = ibuf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = ibuf.Load >(2049); + vector iVec2 = ibuf.Load >(2560); + vector iVec3 = ibuf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = ibuf.Load >(3073); + vector uiVec2 = ibuf.Load >(3584); + vector uiVec3 = ibuf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = ibuf.Load >(4097); + vector lVec2 = ibuf.Load >(4608); + vector lVec3 = ibuf.Load >(5120); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144 + // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = ibuf.Load >(5121); + vector ulVec2 = ibuf.Load >(5632); + vector ulVec3 = ibuf.Load >(6144); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 35, <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 36, <[[NUM]] x half> [[tmp]], <[[NUM]] x half> [[hvec3]]) ; FMin(a,b) + vector hRes = clamp(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 35, <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 36, <[[NUM]] x float> [[tmp]], <[[NUM]] x float> [[fvec3]]) ; FMin(a,b) + vector fRes = clamp(fVec1, fVec2, fVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 35, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 36, <[[NUM]] x double> [[tmp]], <[[NUM]] x double> [[dvec3]]) ; FMin(a,b) + vector dRes = clamp(dVec1, dVec2, dVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 38, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[svec3]]) ; IMin(a,b) + vector sRes = clamp(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 39, <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 40, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[usvec3]]) ; UMin(a,b) + vector usRes = clamp(usVec1, usVec2, usVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 38, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[ivec3]]) ; IMin(a,b) + vector iRes = clamp(iVec1, iVec2, iVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 39, <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 40, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[uivec3]]) ; UMin(a,b) + vector uiRes = clamp(uiVec1, uiVec2, uiVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 38, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[lvec3]]) ; IMin(a,b) + vector lRes = clamp(lVec1, lVec2, lVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 39, <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 40, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[ulvec3]]) ; UMin(a,b) + vector ulRes = clamp(ulVec1, ulVec2, ulVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec2]], [[hvec1]] + // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x half> zeroinitializer, <[[NUM]] x half> [[fvec2]], [[fvec1]] + // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x float> zeroinitializer, <[[NUM]] x float> [[hvec1]], @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp]]) ; Exp(value) + hRes += exp(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fmul fast <[[NUM]] x float> [[fvec1]], @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp]]) ; Exp(value) + fRes += exp(fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec1]]) ; Log(value) + // CHECK: fmul fast <[[NUM]] x half> [[tmp]], @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec1]]) ; Log(value) + // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[hvec2]], [[hvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec1]] + // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 7, <[[NUM]] x half> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], [[sat]] + // CHECK: fmul fast <[[NUM]] x half> [[mul]], [[sub]] + hRes += smoothstep(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x float> [[fvec2]], [[fvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec1]] + // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 7, <[[NUM]] x float> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], [[sat]] + // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]] + fRes += smoothstep(fVec1, fVec2, fVec3); + + // Intrinsics that expand into llvm ops. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fmul fast <[[NUM]] x half> [[hvec2]], [[fvec2]], [[hvec3]], [[fvec3]], [[fvec1]], zeroinitializer + // CHECK: [[f2i:%.*]] = bitcast <[[NUM]] x float> [[fvec1]] to <[[NUM]] x i32> + // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], [[and]], [[add]], [[shr]] to <[[NUM]] x float> + // CHECK: [[sel:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x float> [[i2f]], <[[NUM]] x float> zeroinitializer + // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], [[and]], exp = fVec3; + fRes += frexp(fVec1, exp); + fRes += exp; + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec2]] + // CHECK: fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]] + hRes += lerp(hVec2, hVec3, hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec2]] + // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]] + fRes += lerp(fVec2, fVec3, fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fdiv fast <[[NUM]] x half> , [[hvec1]] + hRes += rcp(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fdiv fast <[[NUM]] x float> , [[fvec1]] + fRes += rcp(fVec1); + + vector signs = 1; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x half> [[hvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x float> [[fvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x double> [[dvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x double> [[dvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(dVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i16> [[usvec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(usVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: [[sub:%.*]] = sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(iVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i32> [[uivec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(uiVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i64> [[lvec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i64> [[lvec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(lVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i64> [[ulvec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(ulVec2); + + iRes += signs; + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i16> [[svec1]], zeroinitializer + // CHECK: or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + sRes += or(sVec1, sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i16> [[svec3]], zeroinitializer + // CHECK: and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + sRes += and(sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: select <[[NUM]] x i1> [[bvec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]] + sRes += select(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); + + ibuf.Store >(0, sRes); + ibuf.Store >(1024, usRes); + ibuf.Store >(2048, iRes); + ibuf.Store >(3072, uiRes); + ibuf.Store >(4096, lRes); + ibuf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl new file mode 100644 index 0000000000..7d5da99e21 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl @@ -0,0 +1,147 @@ +// RUN: %dxc -T ps_6_9 %s | FileCheck %s + +// Long vector tests for vec ops that scalarize to something more complex +// than a simple repetition of the same dx.op calls. + +StructuredBuffer< vector > buf; +ByteAddressBuffer rbuf; + +float4 main(uint i : SV_PrimitiveID, bool b : B) : SV_Target { + vector vec1 = rbuf.Load< vector >(i++*32); + vector vec2 = rbuf.Load< vector >(i++*32); + vector vec3 = rbuf.Load< vector >(i++*32); + + // CHECK: fdiv fast <8 x float> + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: fadd fast <8 x float> %{{.*}}, %{{.*}}, + // CHECK: fcmp fast oeq <8 x float> + // CHECK: fcmp fast oge <8 x float> + // CHECK: fcmp fast olt <8 x float> + // CHECK: and <8 x i1> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> + // CHECK: and <8 x i1> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> + // CHECK: and <8 x i1> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> + // CHECK: fsub fast <8 x float> + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + + // CHECK: fsub fast <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> + // CHECK: fmul fast <8 x float> + vec1 = fmod(vec1, vec2); + + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: fmul fast <8 x float> + vec1 = ldexp(vec1, vec2); + + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: fmul fast <8 x float> + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + vec1 = pow(vec1, vec2); + + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: fsub fast <8 x float> + vec1 = modf(vec1, vec2); + + // CHECK: [[el:%.*]] = extractelement <8 x float> + // CHECK: [[mul:%.*]] = fmul fast float [[el]] + // CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) + // CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) + // CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) + // CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) + // CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) + // CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) + // CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) + vec1 = dot(vec1, vec2); + + vector bvec = b; + // CHECK: or i1 + // CHECK: or i1 + // CHECK: or i1 + // CHECK: or i1 + // CHECK: or i1 + // CHECK: or i1 + // CHECK: or i1 + bvec &= any(vec1); + + // CHECK: and i1 + // CHECK: and i1 + // CHECK: and i1 + // CHECK: and i1 + // CHECK: and i1 + // CHECK: and i1 + // CHECK: and i1 + bvec &= all(vec2); + + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + return WaveMatch(bvec); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl new file mode 100644 index 0000000000..02cad5b894 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl @@ -0,0 +1,69 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=35 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=35 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=36 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=36 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled binary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]]) + vector hRes = FUNC(hVec1, hVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]]) + vector fRes = FUNC(fVec1, fVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]]) + vector dRes = FUNC(dVec1, dVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl new file mode 100644 index 0000000000..994246b753 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl @@ -0,0 +1,116 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=37 -DUOP=39 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=37 -DUOP=39 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=38 -DUOP=40 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=38 -DUOP=40 -DNUM=1022 %s | FileCheck %s + +#ifndef UOP +#define UOP OP +#endif + +// Test vector-enabled binary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode numbers. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(888, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]] + buf.Store(999, UOP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = buf.Load >(0); + vector sVec2 = buf.Load >(512); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = buf.Load >(1024); + vector usVec2 = buf.Load >(1536); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = buf.Load >(2048); + vector iVec2 = buf.Load >(2560); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = buf.Load >(3072); + vector uiVec2 = buf.Load >(3584); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = buf.Load >(4096); + vector lVec2 = buf.Load >(4608); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = buf.Load >(5120); + vector ulVec2 = buf.Load >(5632); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]]) + vector sRes = FUNC(sVec1, sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]]) + vector usRes = FUNC(usVec1, usVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]]) + vector iRes = FUNC(iVec1, iVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]]) + vector uiRes = FUNC(uiVec1, uiVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]]) + vector lRes = FUNC(lVec1, lVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]]) + vector ulRes = FUNC(ulVec1, ulVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl new file mode 100644 index 0000000000..40ffd3fe63 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl @@ -0,0 +1,87 @@ +// The binary part of some of these is all just a vector math ops with as many unary dxops as elements. +// These will have apparent mismatches between the ARITY define and the check prefix. + +// RUN: %dxc -DFUNC=abs -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=pow -DARITY=2 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=f16tof32 -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY +// RUN: %dxc -DFUNC=f32tof16 -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY +// RUN: %dxc -DFUNC=isfinite -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=isinf -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=isnan -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=modf -DARITY=2 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=countbits -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=firstbitlow -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=fwidth -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=QuadReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossX -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossY -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossDiagonal -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=WaveActiveBitAnd -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveBitOr -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveBitXor -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveProduct -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveSum -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveMin -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveMax -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitAnd -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitOr -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitXor -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixProduct -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixSum -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WavePrefixSum -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WavePrefixProduct -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveReadLaneFirst -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveAllEqual -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE + +#ifndef TYPE +#define TYPE float +#endif + +#if ARITY == 1 +#define CALLARGS(x,y,z) x +#elif ARITY == 2 +#define CALLARGS(x,y,z) x, y +#elif ARITY == 3 +#define CALLARGS(x,y,z) x, y, z +// ARITY 4 is used for 1 vec + scalar +#elif ARITY == 4 +#define CALLARGS(x,y,z) x, i +// ARITY 5 is used for 1 vec + uint4 mask for wavemultiprefix* +#elif ARITY == 5 +#define CALLARGS(x,y,z) x, m +#endif + +StructuredBuffer< vector > buf; +ByteAddressBuffer rbuf; + +float4 main(uint i : SV_PrimitiveID, uint4 m : M) : SV_Target { + vector arg1 = rbuf.Load< vector >(i++*32); + vector arg2 = rbuf.Load< vector >(i++*32); + vector arg3 = rbuf.Load< vector >(i++*32); + + // UNARY: call {{.*}} [[DXOP:@dx.op.unary]] + // BINARY: call {{.*}} [[DXOP:@dx.op.binary]] + // TERTIARY: call {{.*}} [[DXOP:@dx.op.tertiary]] + // LEGACY: call {{.*}} [[DXOP:@dx.op.legacy]] + // SPECFLT: call {{.*}} [[DXOP:@dx.op.isSpecialFloat]] + // QUAD: call {{.*}} [[DXOP:@dx.op.quad]] + // WAVE: call {{.*}} [[DXOP:@dx.op.wave]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + + vector ret = FUNC(CALLARGS(arg1, arg2, arg3)); + return float4(ret[0] + ret[1], ret[2] + ret[3], ret[4] + ret[5], ret[6] + ret[7]); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl new file mode 100644 index 0000000000..e32ebc1db2 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl @@ -0,0 +1,86 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=46 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=46 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled ternary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +// Given that all we have at the moment are fmad and fma and the latter only takes doubles, +// fma is tacked on as an additional check. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + vector hVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + vector fVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + vector dVec3 = buf.Load >(5120); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.tertiary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]], <[[NUM]] x half> [[hvec3]]) + vector hRes = FUNC(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.tertiary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]], <[[NUM]] x float> [[fvec3]]) + vector fRes = FUNC(fVec1, fVec2, fVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]]) + vector dRes = FUNC(dVec1, dVec2, dVec3); + + // Tacked on fma() check since it only takes doubles. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 47, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]]) + vector dRes2 = fma(dVec1, dVec2, dVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); + buf.Store >(5120, dRes2); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl new file mode 100644 index 0000000000..50f98715e4 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl @@ -0,0 +1,131 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=48 -DUOP=49 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=48 -DUOP=49 -DNUM=1022 %s | FileCheck %s + +#ifndef UOP +#define UOP OP +#endif + +// Test vector-enabled tertiary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode numbers. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(888, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]] + buf.Store(999, UOP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = buf.Load >(0); + vector sVec2 = buf.Load >(512); + vector sVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = buf.Load >(1025); + vector usVec2 = buf.Load >(1536); + vector usVec3 = buf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = buf.Load >(2049); + vector iVec2 = buf.Load >(2560); + vector iVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = buf.Load >(3073); + vector uiVec2 = buf.Load >(3584); + vector uiVec3 = buf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = buf.Load >(4097); + vector lVec2 = buf.Load >(4608); + vector lVec3 = buf.Load >(5120); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144 + // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = buf.Load >(5121); + vector ulVec2 = buf.Load >(5632); + vector ulVec3 = buf.Load >(6144); + + // Test simple matching type overloads. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]]) + vector sRes = FUNC(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]], <[[NUM]] x i16> [[usvec3]]) + vector usRes = FUNC(usVec1, usVec2, usVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]], <[[NUM]] x i32> [[ivec3]]) + vector iRes = FUNC(iVec1, iVec2, iVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]], <[[NUM]] x i32> [[uivec3]]) + vector uiRes = FUNC(uiVec1, uiVec2, uiVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]], <[[NUM]] x i64> [[lvec3]]) + vector lRes = FUNC(lVec1, lVec2, lVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]], <[[NUM]] x i64> [[ulvec3]]) + vector ulRes = FUNC(ulVec1, ulVec2, ulVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl new file mode 100644 index 0000000000..91ab631a7e --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl @@ -0,0 +1,83 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin -DOP=13 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin -DOP=13 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan -DOP=14 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan -DOP=14 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled unary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec = buf.Load >(0); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[fvec:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec = buf.Load >(1024); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec]]) + vector hRes = FUNC(hVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec]]) + vector fRes = FUNC(fVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(1024, fRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl new file mode 100644 index 0000000000..ef0b250745 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl @@ -0,0 +1,86 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits -DOP=30 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits -DOP=30 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled unary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // Capture opcode number. + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec = buf.Load >(0); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[usvec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[ivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec = buf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[uivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[lvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec = buf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[ulvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec = buf.Load >(5120); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec]]) + vector sRes = FUNC(sVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[usvec]]) + vector usRes = FUNC(usVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec]]) + vector iRes = FUNC(iVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[uivec]]) + vector uiRes = FUNC(uiVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec]]) + vector lRes = FUNC(lVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[ulvec]]) + vector ulRes = FUNC(ulVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl new file mode 100644 index 0000000000..11d705305d --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl @@ -0,0 +1,186 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=13 %s | FileCheck %s + +// Source for dxilgen test CodeGenDXIL/passes/longvec-intrinsics.ll. +// Some targetted filecheck testing as an incidental. + +RWStructuredBuffer > hBuf; +RWStructuredBuffer > fBuf; +RWStructuredBuffer > dBuf; + +RWStructuredBuffer > bBuf; +RWStructuredBuffer > uBuf; +RWStructuredBuffer > lBuf; + +[numthreads(8,1,1)] +void main() { + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + vector fVec1 = fBuf[11]; + vector fVec2 = fBuf[12]; + vector fVec3 = fBuf[13]; + + // CHECK: [[tmp:%.*]] = call <13 x float> @dx.op.binary.v13f32(i32 35, <13 x float> [[fvec1]], <13 x float> [[fvec2]]) ; FMax(a,b) + // CHECK: call <13 x float> @dx.op.binary.v13f32(i32 36, <13 x float> [[tmp]], <13 x float> [[fvec3]]) ; FMin(a,b) + vector fRes = clamp(fVec1, fVec2, fVec3); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + vector hVec1 = hBuf[14]; + vector hVec2 = hBuf[15]; + vector hVec3 = hBuf[16]; + + // CHECK: [[tmp:%.*]] = fcmp fast olt <13 x half> [[hvec2]], [[hvec1]] + // CHECK: select <13 x i1> [[tmp]], <13 x half> zeroinitializer, <13 x half> hRes = step(hVec1, hVec2); + + // CHECK: [[tmp:%.*]] = fmul fast <13 x float> [[fvec1]], @dx.op.unary.v13f32(i32 21, <13 x float> [[tmp]]) ; Exp(value) + fRes += exp(fVec1); + + // CHECK: [[tmp:%.*]] = call <13 x half> @dx.op.unary.v13f16(i32 23, <13 x half> [[hvec1]]) ; Log(value) + // CHECK: fmul fast <13 x half> [[tmp]], [[fvec2]], [[fvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <13 x float> [[fvec3]], [[fvec1]] + // CHECK: [[div:%.*]] = fdiv fast <13 x float> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <13 x float> @dx.op.unary.v13f32(i32 7, <13 x float> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], [[sat]] + // CHECK: fmul fast <13 x float> [[mul]], [[sub]] + fRes += smoothstep(fVec1, fVec2, fVec3); + + // Intrinsics that expand into llvm ops. + + // CHECK: fmul fast <13 x float> [[fvec3]], [[fvec1]], zeroinitializer + // CHECK: [[f2i:%.*]] = bitcast <13 x float> [[fvec1]] to <13 x i32> + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], [[and]], [[add]], [[shr]] to <13 x float> + // CHECK: [[sel:%.*]] = select <13 x i1> [[cmp]], <13 x float> [[i2f]], <13 x float> zeroinitializer + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], [[and]], exp = fVec3; + fRes += frexp(fVec1, exp); + fRes += exp; + + // CHECK: [[tmp:%.*]] = fsub fast <13 x half> [[hvec3]], [[hvec2]] + // CHECK: fmul fast <13 x half> [[tmp]], [[hvec1]] + hRes += lerp(hVec2, hVec3, hVec1); + + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) + // CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) + // CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + vector uVec1 = uBuf[17]; + vector uVec2 = uBuf[18]; + + vector signs = 1; + // CHECK: [[cmp:%.*]] = icmp ne <13 x i32> [[uvec2]], zeroinitializer + // CHECK: zext <13 x i1> [[cmp]] to <13 x i32> + signs *= sign(uVec2); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 + vector lVec1 = lBuf[19]; + vector lVec2 = lBuf[20]; + + // CHECK: [[gt:%.*]] = icmp sgt <13 x i64> [[lvec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <13 x i64> [[lvec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <13 x i1> [[gt]] to <13 x i32> + // CHECK: [[ilt:%.*]] = zext <13 x i1> [[lt]] to <13 x i32> + // CHECK: sub nsw <13 x i32> [[igt]], [[ilt]] + signs *= sign(lVec2); + + vector uRes = signs; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec1:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec2:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec3:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + vector bVec1 = bBuf[21]; + vector bVec2 = bBuf[22]; + vector bVec3 = bBuf[23]; + + // CHECK: [[bvec2:%.*]] = icmp ne <13 x i32> [[vec2]], zeroinitializer + // CHECK: [[bvec1:%.*]] = icmp ne <13 x i32> [[vec1]], zeroinitializer + // CHECK: or <13 x i1> [[bvec2]], [[bvec1]] + uRes += or(bVec1, bVec2); + + // CHECK: [[bvec3:%.*]] = icmp ne <13 x i32> [[vec3]], zeroinitializer + // CHECK: and <13 x i1> [[bvec3]], [[bvec2]] + uRes += and(bVec2, bVec3); + + // CHECK: select <13 x i1> [[bvec3]], <13 x i64> [[lvec1]], <13 x i64> [[lvec2]] + vector lRes = select(bVec3, lVec1, lVec2); + + // CHECK: [[el1:%.*]] = extractelement <13 x float> [[fvec1]] + // CHECK: [[el2:%.*]] = extractelement <13 x float> [[fvec2]] + // CHECK: [[mul:%.*]] = fmul fast float [[el2]], [[el1]] + // CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) + // CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad1]]) ; FMad(a,b,c) + // CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad2]]) ; FMad(a,b,c) + // CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad3]]) ; FMad(a,b,c) + // CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad4]]) ; FMad(a,b,c) + // CHECK: [[mad6:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad5]]) ; FMad(a,b,c) + // CHECK: [[mad7:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad6]]) ; FMad(a,b,c) + // CHECK: [[mad8:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad7]]) ; FMad(a,b,c) + // CHECK: [[mad9:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad8]]) ; FMad(a,b,c) + // CHECK: [[mad10:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad9]]) ; FMad(a,b,c) + // CHECK: [[mad11:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad10]]) ; FMad(a,b,c) + // CHECK: [[mad12:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad11]]) ; FMad(a,b,c) + fRes += dot(fVec1, fVec2); + + // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 17, <13 x float> [[fvec1]]) ; Atan(value) + fRes += atan(fVec1); + + // CHECK: call <13 x i32> @dx.op.binary.v13i32(i32 40, <13 x i32> [[uvec1]], <13 x i32> [[uvec2]]) ; UMin(a,b) + uRes += min(uVec1, uVec2); + + // CHECK: call <13 x float> @dx.op.tertiary.v13f32(i32 46, <13 x float> [[fvec1]], <13 x float> [[fvec2]], <13 x float> [[fvec3]]) ; FMad(a,b,c) + fRes += mad(fVec1, fVec2, fVec3); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + vector dVec1 = dBuf[24]; + vector dVec2 = dBuf[25]; + vector dVec3 = dBuf[26]; + + // CHECK: call <13 x double> @dx.op.tertiary.v13f64(i32 47, <13 x double> [[dvec1]], <13 x double> [[dvec2]], <13 x double> [[dvec3]]) + vector dRes = fma(dVec1, dVec2, dVec3); + + hBuf[0] = hRes; + fBuf[0] = fRes; + dBuf[0] = dRes; + uBuf[0] = uRes; + lBuf[0] = lRes; +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll new file mode 100644 index 0000000000..8f9dcbbdbc --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll @@ -0,0 +1,434 @@ +; RUN: %dxopt %s -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer >" = type { <7 x half> } +%"class.RWStructuredBuffer >" = type { <7 x float> } +%"class.RWStructuredBuffer >" = type { <7 x double> } +%"class.RWStructuredBuffer >" = type { <7 x i32> } +%"class.RWStructuredBuffer >" = type { <7 x i32> } +%"class.RWStructuredBuffer >" = type { <7 x i64> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" = external global %"class.RWStructuredBuffer >", align 2 +@"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" = external global %"class.RWStructuredBuffer >", align 8 +@"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" = external global %"class.RWStructuredBuffer >", align 8 + +; CHECK-LABEL: define void @main() +define void @main() #0 { +bb: + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) + ; CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) + ; CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) + ; CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + + %exp = alloca <7 x float>, align 4 + %tmp = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:23 col:30 + %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp) ; line:23 col:30 + %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:23 col:30 + %tmp3 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 11) ; line:23 col:30 + %tmp4 = load <7 x float>, <7 x float>* %tmp3 ; line:23 col:30 + %tmp5 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:24 col:30 + %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp5) ; line:24 col:30 + %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:24 col:30 + %tmp8 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp7, i32 12) ; line:24 col:30 + %tmp9 = load <7 x float>, <7 x float>* %tmp8 ; line:24 col:30 + %tmp10 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:25 col:30 + %tmp11 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp10) ; line:25 col:30 + %tmp12 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:25 col:30 + %tmp13 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp12, i32 13) ; line:25 col:30 + %tmp14 = load <7 x float>, <7 x float>* %tmp13 ; line:25 col:30 + + ; Clamp operation. + ; CHECK: [[max:%.*]] = call <7 x float> @dx.op.binary.v7f32(i32 35, <7 x float> [[fvec1]], <7 x float> [[fvec2]]) + ; CHECK: call <7 x float> @dx.op.binary.v7f32(i32 36, <7 x float> [[max]], <7 x float> [[fvec3]]) + %tmp15 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 119, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:29 col:29 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) + ; CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) + ; CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) + ; CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + %tmp16 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:37 col:34 + %tmp17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp16) ; line:37 col:34 + %tmp18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp17, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:37 col:34 + %tmp19 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp18, i32 14) ; line:37 col:34 + %tmp20 = load <7 x half>, <7 x half>* %tmp19 ; line:37 col:34 + %tmp21 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:38 col:34 + %tmp22 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp21) ; line:38 col:34 + %tmp23 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp22, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:38 col:34 + %tmp24 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp23, i32 15) ; line:38 col:34 + %tmp25 = load <7 x half>, <7 x half>* %tmp24 ; line:38 col:34 + %tmp26 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:39 col:34 + %tmp27 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp26) ; line:39 col:34 + %tmp28 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp27, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:39 col:34 + %tmp29 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp28, i32 16) ; line:39 col:34 + %tmp30 = load <7 x half>, <7 x half>* %tmp29 ; line:39 col:34 + + ; Step operation. + ; CHECK: [[cmp:%.*]] = fcmp fast olt <7 x half> [[hvec2]], [[hvec1]] + ; CHECK: select <7 x i1> [[cmp]], <7 x half> zeroinitializer, <7 x half> + %tmp31 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32 192, <7 x half> %tmp20, <7 x half> %tmp25) ; line:43 col:33 + + ; Exp operation. + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> , [[fvec1]] + ; CHECK call <7 x float> @dx.op.unary.v7f32(i32 21, <7 x float> [[mul]]) + %tmp32 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 139, <7 x float> %tmp4) ; line:47 col:11 + %tmp33 = fadd <7 x float> %tmp15, %tmp32 ; line:47 col:8 + + ; Log operation. + ; CHECK: [[log:%.*]] = call <7 x half> @dx.op.unary.v7f16(i32 23, <7 x half> [[hvec1]]) + ; CHECK: fmul fast <7 x half> , [[log]] + %tmp34 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32 159, <7 x half> %tmp20) ; line:51 col:11 + %tmp35 = fadd <7 x half> %tmp31, %tmp34 ; line:51 col:8 + + ; Smoothstep operation. + ; CHECK: [[sub1:%.*]] = fsub fast <7 x float> [[fvec2]], [[fvec1]] + ; CHECK: [[sub2:%.*]] = fsub fast <7 x float> [[fvec3]], [[fvec1]] + ; CHECK: [[div:%.*]] = fdiv fast <7 x float> [[sub2]], [[sub1]] + ; CHECK: [[sat:%.*]] = call <7 x float> @dx.op.unary.v7f32(i32 7, <7 x float> [[div]]) + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], + ; CHECK: [[sub:%.*]] = fsub fast <7 x float> , [[mul]] + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], [[sub]] + ; CHECK: fmul fast <7 x float> %Saturate, [[mul]] + %tmp36 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 189, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:61 col:11 + %tmp37 = fadd <7 x float> %tmp33, %tmp36 ; line:61 col:8 + + ; Radians operation. + ; CHECK: fmul fast <7 x float> , [[fvec3]] + %tmp38 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 176, <7 x float> %tmp14) ; line:66 col:11 + %tmp39 = fadd <7 x float> %tmp37, %tmp38 ; line:66 col:8 + store <7 x float> %tmp14, <7 x float>* %exp, align 4 ; line:77 col:22 + + ; Frexp operation. + ; CHECK: [[cmp:%.*]] = fcmp fast une <7 x float> [[fvec1]], zeroinitializer + ; CHECK: [[ext:%.*]] = sext <7 x i1> [[cmp]] to <7 x i32> + ; CHECK: [[bct:%.*]] = bitcast <7 x float> [[fvec1]] to <7 x i32> + ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], + ; CHECK: [[add:%.*]] = add <7 x i32> [[and]], + ; CHECK: [[and:%.*]] = and <7 x i32> [[add]], [[ext]] + ; CHECK: [[shr:%.*]] = ashr <7 x i32> [[and]], + ; CHECK: [[i2f:%.*]] = sitofp <7 x i32> [[shr]] to <7 x float> + ; CHECK: store <7 x float> [[i2f]], <7 x float>* %exp + ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], + ; CHECK: [[or:%.*]] = or <7 x i32> [[and]], + ; CHECK: [[and:%.*]] = and <7 x i32> [[or]], [[ext]] + ; CHECK: bitcast <7 x i32> [[and]] to <7 x float> + %tmp41 = call <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32 150, <7 x float> %tmp4, <7 x float>* %exp) ; line:78 col:11 + %tmp42 = fadd <7 x float> %tmp39, %tmp41 ; line:78 col:8 + %tmp43 = load <7 x float>, <7 x float>* %exp, align 4 ; line:79 col:11 + %tmp44 = fadd <7 x float> %tmp42, %tmp43 ; line:79 col:8 + + ; Lerp operation. + ; CHECK: [[sub:%.*]] = fsub fast <7 x half> [[hvec3]], [[hvec2]] + ; CHECK: fmul fast <7 x half> [[hvec1]], [[sub]] + %tmp45 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32 157, <7 x half> %tmp25, <7 x half> %tmp30, <7 x half> %tmp20) ; line:83 col:11 + %tmp46 = fadd <7 x half> %tmp35, %tmp45 ; line:83 col:8 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) + ; CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) + ; CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + %tmp47 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:90 col:29 + %tmp48 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp47) ; line:90 col:29 + %tmp49 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp48, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:90 col:29 + %tmp50 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp49, i32 17) ; line:90 col:29 + %tmp51 = load <7 x i32>, <7 x i32>* %tmp50 ; line:90 col:29 + %tmp52 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:91 col:29 + %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp52) ; line:91 col:29 + %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:91 col:29 + %tmp55 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 18) ; line:91 col:29 + %tmp56 = load <7 x i32>, <7 x i32>* %tmp55 ; line:91 col:29 + + ; Unsigned int sign operation. + ; CHECK: [[cmp:%.*]] = icmp ne <7 x i32> [[uvec2]], zeroinitializer + ; CHECK: zext <7 x i1> [[cmp]] to <7 x i32> + %tmp57 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32 355, <7 x i32> %tmp56) ; line:96 col:12 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) + ; CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) + ; CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0 + %tmp58 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:102 col:32 + %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp58) ; line:102 col:32 + %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:102 col:32 + %tmp61 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp60, i32 19) ; line:102 col:32 + %tmp62 = load <7 x i64>, <7 x i64>* %tmp61 ; line:102 col:32 + %tmp63 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:103 col:32 + %tmp64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp63) ; line:103 col:32 + %tmp65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp64, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:103 col:32 + %tmp66 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp65, i32 20) ; line:103 col:32 + %tmp67 = load <7 x i64>, <7 x i64>* %tmp66 ; line:103 col:32 + + ; Signed int sign operation. + ; CHECK: [[lt1:%.*]] = icmp slt <7 x i64> zeroinitializer, [[lvec2]] + ; CHECK: [[lt2:%.*]] = icmp slt <7 x i64> [[lvec2]], zeroinitializer + ; CHECK: [[ilt1:%.*]] = zext <7 x i1> [[lt1]] to <7 x i32> + ; CHECK: [[ilt2:%.*]] = zext <7 x i1> [[lt2]] to <7 x i32> + ; CHECK: sub <7 x i32> [[ilt1]], [[ilt2]] + %tmp68 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32 185, <7 x i64> %tmp67) ; line:110 col:12 + %tmp69 = mul <7 x i32> %tmp57, %tmp68 ; line:110 col:9 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec1:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec2:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec3:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + %tmp70 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:126 col:29 + %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp70) ; line:126 col:29 + %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:126 col:29 + %tmp73 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 21) ; line:126 col:29 + %tmp74 = load <7 x i32>, <7 x i32>* %tmp73 ; line:126 col:29 + %tmp75 = icmp ne <7 x i32> %tmp74, zeroinitializer ; line:126 col:29 + %tmp76 = zext <7 x i1> %tmp75 to <7 x i32> ; line:126 col:21 + %tmp77 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:127 col:29 + %tmp78 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp77) ; line:127 col:29 + %tmp79 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp78, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:127 col:29 + %tmp80 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp79, i32 22) ; line:127 col:29 + %tmp81 = load <7 x i32>, <7 x i32>* %tmp80 ; line:127 col:29 + %tmp82 = icmp ne <7 x i32> %tmp81, zeroinitializer ; line:127 col:29 + %tmp83 = zext <7 x i1> %tmp82 to <7 x i32> ; line:127 col:21 + %tmp84 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:128 col:29 + %tmp85 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp84) ; line:128 col:29 + %tmp86 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp85, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:128 col:29 + %tmp87 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp86, i32 23) ; line:128 col:29 + %tmp88 = load <7 x i32>, <7 x i32>* %tmp87 ; line:128 col:29 + %tmp89 = icmp ne <7 x i32> %tmp88, zeroinitializer ; line:128 col:29 + %tmp90 = zext <7 x i1> %tmp89 to <7 x i32> ; line:128 col:21 + + + ; Or() operation. + ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer + ; CHECK: [[bvec1:%.*]] = icmp ne <7 x i32> [[vec1]], zeroinitializer + ; CHECK: or <7 x i1> [[bvec1]], [[bvec2]] + %tmp91 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:133 col:21 + %tmp92 = icmp ne <7 x i32> %tmp76, zeroinitializer ; line:133 col:14 + %tmp93 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 169, <7 x i1> %tmp92, <7 x i1> %tmp91) ; line:133 col:11 + %tmp94 = zext <7 x i1> %tmp93 to <7 x i32> ; line:133 col:11 + %tmp95 = add <7 x i32> %tmp69, %tmp94 ; line:133 col:8 + + ; And() operation. + ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer + ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer + ; CHECK: and <7 x i1> [[bvec2]], [[bvec3]] + %tmp96 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:137 col:22 + %tmp97 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:137 col:15 + %tmp98 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 106, <7 x i1> %tmp97, <7 x i1> %tmp96) ; line:137 col:11 + %tmp99 = zext <7 x i1> %tmp98 to <7 x i32> ; line:137 col:11 + %tmp100 = add <7 x i32> %tmp95, %tmp99 ; line:137 col:8 + + ; Select() operation. + ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer + ; CHECK: select <7 x i1> [[bvec3]], <7 x i64> [[lvec1]], <7 x i64> [[lvec2]] + %tmp101 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:140 col:38 + %tmp102 = call <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32 184, <7 x i1> %tmp101, <7 x i64> %tmp62, <7 x i64> %tmp67) ; line:140 col:31 + %tmp103 = call float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32 134, <7 x float> %tmp4, <7 x float> %tmp9) ; line:152 col:11 + + ; Dot operation. + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 0 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 0 + ; CHECK: [[mul:%.*]] = fmul fast float [[el1]], [[el2]] + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 1 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 1 + ; CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mul]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 2 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 2 + ; CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad1]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 3 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 3 + ; CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad2]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 4 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 4 + ; CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad3]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 5 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 5 + ; CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad4]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 6 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 6 + ; CHECK: call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad5]]) + %tmp104 = insertelement <7 x float> undef, float %tmp103, i32 0 ; line:152 col:11 + %tmp105 = shufflevector <7 x float> %tmp104, <7 x float> undef, <7 x i32> zeroinitializer ; line:152 col:11 + %tmp106 = fadd <7 x float> %tmp44, %tmp105 ; line:152 col:8 + + ; Atan operation. + ; CHECK: call <7 x float> @dx.op.unary.v7f32(i32 17, <7 x float> [[fvec1]]) + %tmp107 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 116, <7 x float> %tmp4) ; line:155 col:11 + %tmp108 = fadd <7 x float> %tmp106, %tmp107 ; line:155 col:8 + + ; Min operation. + ; CHECK: call <7 x i32> @dx.op.binary.v7i32(i32 40, <7 x i32> [[uvec1]], <7 x i32> [[uvec2]]) + %tmp109 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32 353, <7 x i32> %tmp51, <7 x i32> %tmp56) ; line:158 col:11 + %tmp110 = add <7 x i32> %tmp100, %tmp109 ; line:158 col:8 + + ; Mad operation. + ; CHECK: call <7 x float> @dx.op.tertiary.v7f32(i32 46, <7 x float> [[fvec1]], <7 x float> [[fvec2]], <7 x float> [[fvec3]]) + %tmp111 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 162, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:161 col:11 + %tmp112 = fadd <7 x float> %tmp108, %tmp111 ; line:161 col:8 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) + ; CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) + ; CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) + ; CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + %tmp113 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:169 col:31 + %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp113) ; line:169 col:31 + %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:169 col:31 + %tmp116 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 24) ; line:169 col:31 + %tmp117 = load <7 x double>, <7 x double>* %tmp116 ; line:169 col:31 + %tmp118 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:170 col:31 + %tmp119 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp118) ; line:170 col:31 + %tmp120 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp119, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:170 col:31 + %tmp121 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp120, i32 25) ; line:170 col:31 + %tmp122 = load <7 x double>, <7 x double>* %tmp121 ; line:170 col:31 + %tmp123 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:171 col:31 + %tmp124 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp123) ; line:171 col:31 + %tmp125 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp124, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:171 col:31 + %tmp126 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp125, i32 26) ; line:171 col:31 + %tmp127 = load <7 x double>, <7 x double>* %tmp126 ; line:171 col:31 + + ; FMA operation. + ; CHECK: call <7 x double> @dx.op.tertiary.v7f64(i32 47, <7 x double> [[dvec1]], <7 x double> [[dvec2]], <7 x double> [[dvec3]]) + %tmp128 = call <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32 147, <7 x double> %tmp117, <7 x double> %tmp122, <7 x double> %tmp127) ; line:174 col:30 + %tmp129 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:176 col:3 + %tmp130 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp129) ; line:176 col:3 + %tmp131 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp130, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:176 col:3 + %tmp132 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp131, i32 0) ; line:176 col:3 + store <7 x half> %tmp46, <7 x half>* %tmp132 ; line:176 col:11 + %tmp133 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:177 col:3 + %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp133) ; line:177 col:3 + %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:177 col:3 + %tmp136 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp135, i32 0) ; line:177 col:3 + store <7 x float> %tmp112, <7 x float>* %tmp136 ; line:177 col:11 + %tmp137 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:178 col:3 + %tmp138 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp137) ; line:178 col:3 + %tmp139 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp138, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:178 col:3 + %tmp140 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp139, i32 0) ; line:178 col:3 + store <7 x double> %tmp128, <7 x double>* %tmp140 ; line:178 col:11 + %tmp141 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:179 col:3 + %tmp142 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp141) ; line:179 col:3 + %tmp143 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp142, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:179 col:3 + %tmp144 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp143, i32 0) ; line:179 col:3 + store <7 x i32> %tmp110, <7 x i32>* %tmp144 ; line:179 col:11 + %tmp145 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:180 col:3 + %tmp146 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp145) ; line:180 col:3 + %tmp147 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp146, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:180 col:3 + %tmp148 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp147, i32 0) ; line:180 col:3 + store <7 x i64> %tmp102, <7 x i64>* %tmp148 ; line:180 col:11 + ret void ; line:181 col:1 +} + +declare <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>, <7 x float>) #1 +declare <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>) #1 +declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32, <7 x float>) #1 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32, <7 x half>) #1 +declare <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32, <7 x float>, <7 x float>*) #0 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>, <7 x half>) #1 +declare <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32, <7 x i32>) #1 +declare <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32, <7 x i64>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32, <7 x i1>, <7 x i1>) #1 +declare <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32, <7 x i1>, <7 x i64>, <7 x i64>) #1 +declare float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>) #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32, <7 x i32>, <7 x i32>) #1 +declare <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32, <7 x double>, <7 x double>, <7 x double>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!pauseresume = !{!1} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !36} +!dx.entryPoints = !{!40} +!dx.fnprops = !{!52} +!dx.options = !{!53, !54} + +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!3 = !{i32 1, i32 9} +!4 = !{!"cs", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer >" undef, !6, %"class.RWStructuredBuffer >" undef, !11, %"class.RWStructuredBuffer >" undef, !16, %"class.RWStructuredBuffer >" undef, !21, %"class.RWStructuredBuffer >" undef, !26, %"class.RWStructuredBuffer >" undef, !31} +!6 = !{i32 14, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 8, i32 13, i32 7} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, <7 x half> undef} +!11 = !{i32 28, !12, !13} +!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9, i32 13, i32 7} +!13 = !{i32 0, !14} +!14 = !{!15} +!15 = !{i32 0, <7 x float> undef} +!16 = !{i32 56, !17, !18} +!17 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 10, i32 13, i32 7} +!18 = !{i32 0, !19} +!19 = !{!20} +!20 = !{i32 0, <7 x double> undef} +!21 = !{i32 28, !22, !23} +!22 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 1, i32 13, i32 7} +!23 = !{i32 0, !24} +!24 = !{!25} +!25 = !{i32 0, <7 x i1> undef} +!26 = !{i32 28, !27, !28} +!27 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5, i32 13, i32 7} +!28 = !{i32 0, !29} +!29 = !{!30} +!30 = !{i32 0, <7 x i32> undef} +!31 = !{i32 56, !32, !33} +!32 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 6, i32 13, i32 7} +!33 = !{i32 0, !34} +!34 = !{!35} +!35 = !{i32 0, <7 x i64> undef} +!36 = !{i32 1, void ()* @main, !37} +!37 = !{!38} +!38 = !{i32 1, !39, !39} +!39 = !{} +!40 = !{void ()* @main, !"main", null, !41, null} +!41 = !{null, !42, null, null} +!42 = !{!43, !45, !47, !49, !50, !51} +!43 = !{i32 0, %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A", !"hBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !44} +!44 = !{i32 1, i32 14} +!45 = !{i32 1, %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A", !"fBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!46 = !{i32 1, i32 28} +!47 = !{i32 2, %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A", !"dBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48} +!48 = !{i32 1, i32 56} +!49 = !{i32 3, %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A", !"bBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!50 = !{i32 4, %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A", !"uBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!51 = !{i32 5, %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A", !"lBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48} +!52 = !{void ()* @main, i32 5, i32 8, i32 1, i32 1} +!53 = !{i32 0} +!54 = !{i32 -1} +!59 = !{!60, !60, i64 0} +!60 = !{!"omnipotent char", !61, i64 0} +!61 = !{!"Simple C/C++ TBAA"} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 691c3ba58f..548aae4192 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1503,7 +1503,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1537,7 +1537,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hf", + "hf<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1554,7 +1554,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the reverse bit pattern of the input value", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1601,7 +1601,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1619,7 +1619,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1674,7 +1674,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "hfd", + "hfd<", "rn", [ db_dxil_param( @@ -1691,7 +1691,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "d", + "d<", "rn", [ db_dxil_param( @@ -1715,7 +1715,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs an integral " + i, - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "the operation result"), @@ -2608,7 +2608,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2626,7 +2626,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2644,7 +2644,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2662,7 +2662,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf", + "hf<", "rn", [ db_dxil_param(