From 2bdf33e357e699d4fa088128760ebae0cebd9b7a Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Fri, 3 Jan 2025 14:25:22 -0700 Subject: [PATCH 1/6] Enable trivial native vector Dxil Operations plus a few This enables the generation of native vector DXIL Operations that are "trivial", meaning they take only a single DXOp Call instruction to implement as well as a few others that either only took such a call and some llvm operations or were of particular interest for other reasons. This involves allowing the overloads by adding the vector indication in hctdb, altering the lowering to maintain the vectors instead of scalarizing them, and a few sundry changes to fix issues along the way. The "trivial" dxil operations that return a different value from the overload type had to be moved out of the way and given their own lowering function so that the main function could generate vectors conditional on the version and vector type. These will be added in a later change. While the long vector supporting intrinsics that weren't given this treatment will continue to generate scalarized operations, some of them needed some work as well. The dot product for float vectors longer than 4 had to take the integer fallback path, which required some small modificaitons and a rename. Additionally, a heuristic for pow that malfunctioned with too many elements had to have a limit placed on it. Since the or()/and()/select() intrinsics translate directly to LLVM ops, they can have their lowering scalarization removed and what future scalarization might be needed by the current version can be done by later passes as with other LLVM operators. An issue with a special value used to represent unassined dimensions had to be addressed since new dimensions can exceed that value. It's now MAX_INT. Contributes to #7120, but I'd prefer to leave it open until all intrinsics are covered --- lib/HLSL/HLOperationLower.cpp | 241 ++++++----- tools/clang/lib/Sema/SemaHLSL.cpp | 4 +- .../hlsl/types/longvec-intrinsics.hlsl | 391 ++++++++++++++++++ .../types/longvec-scalarized-intrinsics.hlsl | 146 +++++++ ...ngvec-trivial-binary-float-intrinsics.hlsl | 69 ++++ ...longvec-trivial-binary-int-intrinsics.hlsl | 116 ++++++ ...longvec-trivial-scalarized-intrinsics.hlsl | 87 ++++ ...vec-trivial-tertiary-float-intrinsics.hlsl | 86 ++++ ...ngvec-trivial-tertiary-int-intrinsics.hlsl | 131 ++++++ ...ongvec-trivial-unary-float-intrinsics.hlsl | 83 ++++ .../longvec-trivial-unary-int-intrinsics.hlsl | 86 ++++ utils/hct/hctdb.py | 24 +- 12 files changed, 1356 insertions(+), 108 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 4d8201df8d..8dda0230ba 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -424,6 +424,14 @@ struct IntrinsicLower { // IOP intrinsics. namespace { +// Creates the necessary scalar calls to for a "trivial" operation where only +// call instructions to a single function type are needed. +// The overload type `Ty` determines what scalarization might be required. +// Elements of any vectors in `refArgs` are extracted into scalars for each +// call generated while the same scalar values are used unaltered in each call. +// Utility objects `HlslOp` and `Builder` are used to generate calls to the +// given `DxilFunc` for each set of scalar arguments. +// The results are reconstructed into the given `RetTy` as needed. Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, ArrayRef refArgs, Type *Ty, Type *RetTy, OP *hlslOP, IRBuilder<> &Builder) { @@ -459,12 +467,40 @@ Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, } } } -// Generates a DXIL operation over an overloaded type (Ty), returning a -// RetTy value; when Ty is a vector, it will replicate per-element operations -// into RetTy to rebuild it. + +// Creates a native vector call to for a "trivial" operation where only a single +// call instruction is needed. The overload and return types are the same vector +// type `Ty`. +// Utility objects `HlslOp` and `Builder` are used to create a call to the given +// `DxilFunc` with `RefArgs` arguments. +Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode, + ArrayRef Args, Type *Ty, + OP *OP, IRBuilder<> &Builder) { + if (!Ty->isVoidTy()) + return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode)); + else + return Builder.CreateCall(Func, Args); // Cannot add name to void. +} + +// Generates a DXIL operation with the overloaded type based on `Ty` and return +// type `RetTy`. When Ty is a vector, it will either generate per-element calls +// for each vector element and reconstruct the vector type from those results or +// operate on and return native vectors depending on vector size and the value +// of `SupportsVectors`, which is deteremined by version and opcode support. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, Type *Ty, Type *RetTy, OP *hlslOP, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + bool SupportsVectors = false) { + + // If supported and the overload type is a vector with more than 1 element, + // create a native vector operation. + if (SupportsVectors && Ty->isVectorTy() && Ty->getVectorNumElements() > 1) { + Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); + return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP, + Builder); + } + + // Set overload type to the scalar type of `Ty` and generate call(s). Type *EltTy = Ty->getScalarType(); Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy); @@ -484,43 +520,66 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B); } -Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - Type *Ty = src->getType(); +// Translate call that converts to a dxil unary operation with a different +// return type from the overload by passing the argument, explicit return type, +// and helper objects to the scalarizing unary dxil operation creation. +Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP, + OP::OpCode opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Type *Ty = Src->getType(); - Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); - Value *args[] = {opArg, src}; + IRBuilder<> Builder(CI); + hlsl::OP *OP = &Helper.hlslOP; + Type *RetTy = CI->getType(); + Constant *opArg = OP->GetU32Const((unsigned)opcode); + Value *args[] = {opArg, Src}; - return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder); + return TrivialDxilOperation(opcode, args, Ty, RetTy, OP, Builder); } Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP, - Builder); + hlsl::OP *hlslOP, IRBuilder<> &Builder, + bool SupportsVectors = false) { + Type *Ty = src->getType(); + + Constant *OpArg = hlslOP->GetU32Const((unsigned)opcode); + Value *Args[] = {OpArg, src}; + + return TrivialDxilOperation(opcode, Args, Ty, Ty, hlslOP, Builder, + SupportsVectors); } Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { + hlsl::OP *hlslOP, IRBuilder<> &Builder, + bool SupportsVectors = false) { Type *Ty = src0->getType(); Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *args[] = {opArg, src0, src1}; - return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); + return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder, + SupportsVectors); } Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, Value *src2, hlsl::OP *hlslOP, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + bool SupportsVectors = false) { Type *Ty = src0->getType(); Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *args[] = {opArg, src0, src1, src2}; - return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); + return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder, + SupportsVectors); } +// Translate call that trivially converts to a dxil unary operation by passing +// argument, return type, and helper objects to either scalarizing or native +// vector dxil operation creation depending on version and vector size. Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -528,11 +587,14 @@ Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); IRBuilder<> Builder(CI); hlsl::OP *hlslOP = &helper.hlslOP; - Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), - hlslOP, Builder); - return retVal; + + return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder, + helper.M.GetShaderModel()->IsSM69Plus()); } +// Translate call that trivially converts to a dxil binary operation by passing +// arguments, return type, and helper objects to either scalarizing or native +// vector dxil operation creation depending on version and vector size. Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -542,11 +604,14 @@ Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); - Value *binOp = - TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder); - return binOp; + return TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder, + helper.M.GetShaderModel()->IsSM69Plus()); } +// Translate call that trivially converts to a dxil trinary (aka tertiary) +// operation by passing arguments, return type, and helper objects to either +// scalarizing or native vector dxil operation creation depending on version +// and vector size. Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -557,9 +622,8 @@ Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); IRBuilder<> Builder(CI); - Value *triOp = - TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder); - return triOp; + return TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder, + helper.M.GetShaderModel()->IsSM69Plus()); } Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -738,6 +802,12 @@ bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow, } } + // Only apply on aggregates of 16 or fewer elements, + // representing the max 4x4 matrix size. + Type *Ty = x->getType(); + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 16) + return false; + APFloat powAPF = isa(pow) ? cast(pow)->getElementAsAPFloat(0) : // should be a splat value @@ -1447,6 +1517,7 @@ Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *refArgs[] = {nullptr, CI->getOperand(1)}; return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); } + // Wave ballot intrinsic. Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, @@ -1899,9 +1970,11 @@ Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, IRBuilder<> Builder(CI); // min(max(x, minVal), maxVal). - Value *maxXMinVal = - TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder); - return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder); + bool SupportsVectors = helper.M.GetShaderModel()->IsSM69Plus(); + Value *maxXMinVal = TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, + Builder, SupportsVectors); + return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder, + SupportsVectors); } Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2019,7 +2092,7 @@ Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *firstbitHi = - TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated); + TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated); // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi); IRBuilder<> Builder(CI); Constant *neg1 = Builder.getInt32(-1); @@ -2052,7 +2125,7 @@ Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *firstbitLo = - TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated); + TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated); return firstbitLo; } @@ -2214,8 +2287,9 @@ Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst); } val = Builder.CreateFMul(log2eConst, val); - Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder); - return exp; + + return TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder, + helper.M.GetShaderModel()->IsSM69Plus()); } Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2230,7 +2304,10 @@ Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, if (Ty != Ty->getScalarType()) { ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const); } - Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder); + + Value *log = + TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder, + helper.M.GetShaderModel()->IsSM69Plus()); return Builder.CreateFMul(ln2Const, log); } @@ -2248,7 +2325,9 @@ Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, log2_10Const = ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const); } - Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder); + Value *log = + TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder, + helper.M.GetShaderModel()->IsSM69Plus()); return Builder.CreateFMul(log2_10Const, log); } @@ -2431,17 +2510,18 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1, return dotOP; } -Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, - hlsl::OP *hlslOP, IRBuilder<> &Builder, - bool Unsigned = false) { - auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad; +// Instead of using a DXIL intrinsic, implement a dot product operation using +// multiply and add operations. Used for integer dots and long vectors. +Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, + IRBuilder<> &Builder, + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) { Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0); Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0); Value *Result = Builder.CreateMul(Elt0, Elt1); - for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) { - Elt0 = Builder.CreateExtractElement(arg0, iVecElt); - Elt1 = Builder.CreateExtractElement(arg1, iVecElt); - Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, + for (unsigned Elt = 1; Elt < vecSize; ++Elt) { + Elt0 = Builder.CreateExtractElement(arg0, Elt); + Elt1 = Builder.CreateExtractElement(arg1, Elt); + Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP, Builder); } @@ -2480,11 +2560,16 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, unsigned vecSize = Ty->getVectorNumElements(); Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); - if (Ty->getScalarType()->isFloatingPointTy()) { + Type *EltTy = Ty->getScalarType(); + if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 4) { return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_udot); + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; + if (IOP == IntrinsicOp::IOP_udot) + MadOpCode = DXIL::OpCode::UMad; + else if (EltTy->isFloatingPointTy()) + MadOpCode = DXIL::OpCode::FMad; + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); } } @@ -2601,8 +2686,9 @@ Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *xSubMin = Builder.CreateFSub(x, minVal); Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin); - Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP, - Builder); + Value *s = + TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP, Builder, + helper.M.GetShaderModel()->IsSM69Plus()); // return s * s *(3-2*s). Constant *c2 = ConstantFP::get(CI->getType(), 2); Constant *c3 = ConstantFP::get(CI->getType(), 3); @@ -3032,8 +3118,10 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, if (arg0Ty->getScalarType()->isFloatingPointTy()) { return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_umul); + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; + if (IOP == IntrinsicOp::IOP_umul) + MadOpCode = DXIL::OpCode::UMad; + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); } } else { // mul(vector, scalar) == vector * scalar-splat @@ -6150,20 +6238,8 @@ Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateAnd(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateAnd(x, y); } Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6171,20 +6247,8 @@ Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateOr(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateOr(x, y); } Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6194,21 +6258,8 @@ Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltCond = Builder.CreateExtractElement(cond, i); - Value *EltTrue = Builder.CreateExtractElement(t, i); - Value *EltFalse = Builder.CreateExtractElement(f, i); - Value *tmp = Builder.CreateSelect(EltCond, EltTrue, EltFalse); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateSelect(cond, t, f); } } // namespace @@ -6467,18 +6518,20 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos}, {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos}, - {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, + {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet, DXIL::OpCode::Countbits}, {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes}, - {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX}, - {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddx, TrivialUnaryOperationRet, + DXIL::OpCode::DerivCoarseX}, + {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperationRet, DXIL::OpCode::DerivCoarseX}, - {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperationRet, DXIL::OpCode::DerivFineX}, - {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY}, - {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddy, TrivialUnaryOperationRet, + DXIL::OpCode::DerivCoarseY}, + {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperationRet, DXIL::OpCode::DerivCoarseY}, - {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperationRet, DXIL::OpCode::DerivFineY}, {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes}, diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 027d7d3cbc..3dac550218 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6606,7 +6606,7 @@ bool HLSLExternalSource::MatchArguments( argTypes.clear(); const bool isVariadic = IsVariadicIntrinsicFunction(pIntrinsic); - static const UINT UnusedSize = 0xFF; + static const UINT UnusedSize = UINT_MAX; static const BYTE MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1; #define CAB(cond, arg) \ { \ @@ -6622,7 +6622,7 @@ bool HLSLExternalSource::MatchArguments( ArBasicKind ComponentType[MaxIntrinsicArgs]; // Component type for each argument, // AR_BASIC_UNKNOWN if unspecified. - UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UNUSED_INDEX32 + UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UnusedSize // if unspecified. badArgIdx = MaxIntrinsicArgs; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl new file mode 100644 index 0000000000..af6f96745c --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl @@ -0,0 +1,391 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=125 %s | FileCheck %s + +// Test vector-enabled non-trivial intrinsics that take parameters of various types. + +RWByteAddressBuffer buf; +RWByteAddressBuffer ibuf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + vector hVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + vector fVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + vector dVec3 = buf.Load >(5120); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = ibuf.Load >(0); + vector sVec2 = ibuf.Load >(512); + vector sVec3 = ibuf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = ibuf.Load >(1025); + vector usVec2 = ibuf.Load >(1536); + vector usVec3 = ibuf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = ibuf.Load >(2049); + vector iVec2 = ibuf.Load >(2560); + vector iVec3 = ibuf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = ibuf.Load >(3073); + vector uiVec2 = ibuf.Load >(3584); + vector uiVec3 = ibuf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = ibuf.Load >(4097); + vector lVec2 = ibuf.Load >(4608); + vector lVec3 = ibuf.Load >(5120); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144 + // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = ibuf.Load >(5121); + vector ulVec2 = ibuf.Load >(5632); + vector ulVec3 = ibuf.Load >(6144); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 35, <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 36, <[[NUM]] x half> [[tmp]], <[[NUM]] x half> [[hvec3]]) ; FMin(a,b) + vector hRes = clamp(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 35, <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 36, <[[NUM]] x float> [[tmp]], <[[NUM]] x float> [[fvec3]]) ; FMin(a,b) + vector fRes = clamp(fVec1, fVec2, fVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 35, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 36, <[[NUM]] x double> [[tmp]], <[[NUM]] x double> [[dvec3]]) ; FMin(a,b) + vector dRes = clamp(dVec1, dVec2, dVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 38, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[svec3]]) ; IMin(a,b) + vector sRes = clamp(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 39, <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 40, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[usvec3]]) ; UMin(a,b) + vector usRes = clamp(usVec1, usVec2, usVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 38, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[ivec3]]) ; IMin(a,b) + vector iRes = clamp(iVec1, iVec2, iVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 39, <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 40, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[uivec3]]) ; UMin(a,b) + vector uiRes = clamp(uiVec1, uiVec2, uiVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 38, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[lvec3]]) ; IMin(a,b) + vector lRes = clamp(lVec1, lVec2, lVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 39, <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 40, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[ulvec3]]) ; UMin(a,b) + vector ulRes = clamp(ulVec1, ulVec2, ulVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec2]], [[hvec1]] + // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x half> zeroinitializer, <[[NUM]] x half> [[fvec2]], [[fvec1]] + // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x float> zeroinitializer, <[[NUM]] x float> [[hvec1]], @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp]]) ; Exp(value) + hRes += exp(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fmul fast <[[NUM]] x float> [[fvec1]], @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp]]) ; Exp(value) + fRes += exp(fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec1]]) ; Log(value) + // CHECK: fmul fast <[[NUM]] x half> [[tmp]], @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec1]]) ; Log(value) + // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[hvec2]], [[hvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec1]] + // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 7, <[[NUM]] x half> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], [[sat]] + // CHECK: fmul fast <[[NUM]] x half> [[mul]], [[sub]] + hRes += smoothstep(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x float> [[fvec2]], [[fvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec1]] + // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 7, <[[NUM]] x float> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], [[sat]] + // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]] + fRes += smoothstep(fVec1, fVec2, fVec3); + + // Intrinsics that expand into llvm ops. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fmul fast <[[NUM]] x half> [[hvec2]], [[fvec2]], [[hvec3]], [[fvec3]], [[fvec1]], zeroinitializer + // CHECK: [[f2i:%.*]] = bitcast <[[NUM]] x float> [[fvec1]] to <[[NUM]] x i32> + // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], [[and]], [[add]], [[shr]] to <[[NUM]] x float> + // CHECK: [[sel:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x float> [[i2f]], <[[NUM]] x float> zeroinitializer + // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], [[and]], exp = fVec3; + fRes += frexp(fVec1, exp); + fRes += exp; + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec2]] + // CHECK: fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]] + hRes += lerp(hVec2, hVec3, hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec2]] + // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]] + fRes += lerp(fVec2, fVec3, fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fdiv fast <[[NUM]] x half> , [[hvec1]] + hRes += rcp(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fdiv fast <[[NUM]] x float> , [[fvec1]] + fRes += rcp(fVec1); + + vector signs = 1; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x half> [[hvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x float> [[fvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x double> [[dvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x double> [[dvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(dVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i16> [[usvec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(usVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: [[sub:%.*]] = sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(iVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i32> [[uivec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(uiVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i64> [[lvec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i64> [[lvec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(lVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i64> [[ulvec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(ulVec2); + + iRes += signs; + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i16> [[svec1]], zeroinitializer + // CHECK: or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + sRes += or(sVec1, sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i16> [[svec3]], zeroinitializer + // CHECK: and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + sRes += and(sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: select <[[NUM]] x i1> [[bvec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]] + sRes += select(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); + + ibuf.Store >(0, sRes); + ibuf.Store >(1024, usRes); + ibuf.Store >(2048, iRes); + ibuf.Store >(3072, uiRes); + ibuf.Store >(4096, lRes); + ibuf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl new file mode 100644 index 0000000000..4886f04e01 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl @@ -0,0 +1,146 @@ +// RUN: %dxc -T ps_6_9 %s | FileCheck %s + +// Long vector tests for vec ops that scalarize to something more complex +// than a simple repetition of the same dx.op calls. + +StructuredBuffer< vector > buf; +ByteAddressBuffer rbuf; + +float4 main(uint i : SV_PrimitiveID, bool b : B) : SV_Target { + vector vec1 = rbuf.Load< vector >(i++*32); + vector vec2 = rbuf.Load< vector >(i++*32); + vector vec3 = rbuf.Load< vector >(i++*32); + + // CHECK: fdiv fast <8 x float> + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: call float @dx.op.unary.f32(i32 17, float %{{.*}}) ; Atan(value) + // CHECK: fadd fast <8 x float> %{{.*}}, %{{.*}}, + // CHECK: fcmp fast oeq <8 x float> + // CHECK: fcmp fast oge <8 x float> + // CHECK: fcmp fast olt <8 x float> + // CHECK: and <8 x i1> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> + // CHECK: and <8 x i1> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> + // CHECK: and <8 x i1> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> + // CHECK: fsub fast <8 x float> + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + // CHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) ; FAbs(value) + + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + // CHECK: call float @dx.op.unary.f32(i32 22, float %{{.*}}) ; Frc(value) + + // CHECK: fsub fast <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> + // CHECK: fmul fast <8 x float> + vec1 = fmod(vec1, vec2); + + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: fmul fast <8 x float> + vec1 = ldexp(vec1, vec2); + + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) ; Log(value) + // CHECK: fmul fast <8 x float> + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + // CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) ; Exp(value) + vec1 = pow(vec1, vec2); + + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: call float @dx.op.unary.f32(i32 29, float %{{.*}}) ; Round_z(value) + // CHECK: fsub fast <8 x float> + vec1 = modf(vec1, vec2); + + // CHECK: fmul fast float + // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) + // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) + // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) + // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) + // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) + // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) + // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) + vec1 = dot(vec1, vec2); + + vector bvec = b; + // CHECK: or i1 + // CHECK: or i1 + // CHECK: or i1 + // CHECK: or i1 + // CHECK: or i1 + // CHECK: or i1 + // CHECK: or i1 + bvec &= any(vec1); + + // CHECK: and i1 + // CHECK: and i1 + // CHECK: and i1 + // CHECK: and i1 + // CHECK: and i1 + // CHECK: and i1 + // CHECK: and i1 + bvec &= all(vec2); + + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + // call {{.*}} @dx.op.wave + return WaveMatch(bvec); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl new file mode 100644 index 0000000000..02cad5b894 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl @@ -0,0 +1,69 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=35 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=35 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=36 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=36 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled binary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]]) + vector hRes = FUNC(hVec1, hVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]]) + vector fRes = FUNC(fVec1, fVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]]) + vector dRes = FUNC(dVec1, dVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl new file mode 100644 index 0000000000..994246b753 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl @@ -0,0 +1,116 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=37 -DUOP=39 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=37 -DUOP=39 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=38 -DUOP=40 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=38 -DUOP=40 -DNUM=1022 %s | FileCheck %s + +#ifndef UOP +#define UOP OP +#endif + +// Test vector-enabled binary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode numbers. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(888, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]] + buf.Store(999, UOP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = buf.Load >(0); + vector sVec2 = buf.Load >(512); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = buf.Load >(1024); + vector usVec2 = buf.Load >(1536); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = buf.Load >(2048); + vector iVec2 = buf.Load >(2560); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = buf.Load >(3072); + vector uiVec2 = buf.Load >(3584); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = buf.Load >(4096); + vector lVec2 = buf.Load >(4608); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = buf.Load >(5120); + vector ulVec2 = buf.Load >(5632); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]]) + vector sRes = FUNC(sVec1, sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]]) + vector usRes = FUNC(usVec1, usVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]]) + vector iRes = FUNC(iVec1, iVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]]) + vector uiRes = FUNC(uiVec1, uiVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]]) + vector lRes = FUNC(lVec1, lVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]]) + vector ulRes = FUNC(ulVec1, ulVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl new file mode 100644 index 0000000000..40ffd3fe63 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl @@ -0,0 +1,87 @@ +// The binary part of some of these is all just a vector math ops with as many unary dxops as elements. +// These will have apparent mismatches between the ARITY define and the check prefix. + +// RUN: %dxc -DFUNC=abs -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=pow -DARITY=2 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=f16tof32 -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY +// RUN: %dxc -DFUNC=f32tof16 -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY +// RUN: %dxc -DFUNC=isfinite -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=isinf -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=isnan -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=modf -DARITY=2 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=countbits -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=firstbitlow -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=fwidth -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=QuadReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossX -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossY -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossDiagonal -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=WaveActiveBitAnd -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveBitOr -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveBitXor -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveProduct -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveSum -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveMin -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveMax -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitAnd -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitOr -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitXor -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixProduct -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixSum -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WavePrefixSum -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WavePrefixProduct -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveReadLaneFirst -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveAllEqual -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE + +#ifndef TYPE +#define TYPE float +#endif + +#if ARITY == 1 +#define CALLARGS(x,y,z) x +#elif ARITY == 2 +#define CALLARGS(x,y,z) x, y +#elif ARITY == 3 +#define CALLARGS(x,y,z) x, y, z +// ARITY 4 is used for 1 vec + scalar +#elif ARITY == 4 +#define CALLARGS(x,y,z) x, i +// ARITY 5 is used for 1 vec + uint4 mask for wavemultiprefix* +#elif ARITY == 5 +#define CALLARGS(x,y,z) x, m +#endif + +StructuredBuffer< vector > buf; +ByteAddressBuffer rbuf; + +float4 main(uint i : SV_PrimitiveID, uint4 m : M) : SV_Target { + vector arg1 = rbuf.Load< vector >(i++*32); + vector arg2 = rbuf.Load< vector >(i++*32); + vector arg3 = rbuf.Load< vector >(i++*32); + + // UNARY: call {{.*}} [[DXOP:@dx.op.unary]] + // BINARY: call {{.*}} [[DXOP:@dx.op.binary]] + // TERTIARY: call {{.*}} [[DXOP:@dx.op.tertiary]] + // LEGACY: call {{.*}} [[DXOP:@dx.op.legacy]] + // SPECFLT: call {{.*}} [[DXOP:@dx.op.isSpecialFloat]] + // QUAD: call {{.*}} [[DXOP:@dx.op.quad]] + // WAVE: call {{.*}} [[DXOP:@dx.op.wave]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + + vector ret = FUNC(CALLARGS(arg1, arg2, arg3)); + return float4(ret[0] + ret[1], ret[2] + ret[3], ret[4] + ret[5], ret[6] + ret[7]); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl new file mode 100644 index 0000000000..e32ebc1db2 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl @@ -0,0 +1,86 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=46 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=46 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled ternary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +// Given that all we have at the moment are fmad and fma and the latter only takes doubles, +// fma is tacked on as an additional check. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + vector hVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + vector fVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + vector dVec3 = buf.Load >(5120); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.tertiary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]], <[[NUM]] x half> [[hvec3]]) + vector hRes = FUNC(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.tertiary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]], <[[NUM]] x float> [[fvec3]]) + vector fRes = FUNC(fVec1, fVec2, fVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]]) + vector dRes = FUNC(dVec1, dVec2, dVec3); + + // Tacked on fma() check since it only takes doubles. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 47, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]]) + vector dRes2 = fma(dVec1, dVec2, dVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); + buf.Store >(5120, dRes2); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl new file mode 100644 index 0000000000..50f98715e4 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl @@ -0,0 +1,131 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=48 -DUOP=49 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=48 -DUOP=49 -DNUM=1022 %s | FileCheck %s + +#ifndef UOP +#define UOP OP +#endif + +// Test vector-enabled tertiary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode numbers. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(888, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]] + buf.Store(999, UOP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = buf.Load >(0); + vector sVec2 = buf.Load >(512); + vector sVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = buf.Load >(1025); + vector usVec2 = buf.Load >(1536); + vector usVec3 = buf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = buf.Load >(2049); + vector iVec2 = buf.Load >(2560); + vector iVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = buf.Load >(3073); + vector uiVec2 = buf.Load >(3584); + vector uiVec3 = buf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = buf.Load >(4097); + vector lVec2 = buf.Load >(4608); + vector lVec3 = buf.Load >(5120); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144 + // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = buf.Load >(5121); + vector ulVec2 = buf.Load >(5632); + vector ulVec3 = buf.Load >(6144); + + // Test simple matching type overloads. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]]) + vector sRes = FUNC(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]], <[[NUM]] x i16> [[usvec3]]) + vector usRes = FUNC(usVec1, usVec2, usVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]], <[[NUM]] x i32> [[ivec3]]) + vector iRes = FUNC(iVec1, iVec2, iVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]], <[[NUM]] x i32> [[uivec3]]) + vector uiRes = FUNC(uiVec1, uiVec2, uiVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]], <[[NUM]] x i64> [[lvec3]]) + vector lRes = FUNC(lVec1, lVec2, lVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]], <[[NUM]] x i64> [[ulvec3]]) + vector ulRes = FUNC(ulVec1, ulVec2, ulVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl new file mode 100644 index 0000000000..91ab631a7e --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl @@ -0,0 +1,83 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin -DOP=13 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin -DOP=13 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan -DOP=14 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan -DOP=14 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled unary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec = buf.Load >(0); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[fvec:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec = buf.Load >(1024); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec]]) + vector hRes = FUNC(hVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec]]) + vector fRes = FUNC(fVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(1024, fRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl new file mode 100644 index 0000000000..ef0b250745 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl @@ -0,0 +1,86 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits -DOP=30 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits -DOP=30 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled unary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // Capture opcode number. + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec = buf.Load >(0); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[usvec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[ivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec = buf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[uivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[lvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec = buf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[ulvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec = buf.Load >(5120); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec]]) + vector sRes = FUNC(sVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[usvec]]) + vector usRes = FUNC(usVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec]]) + vector iRes = FUNC(iVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[uivec]]) + vector uiRes = FUNC(uiVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec]]) + vector lRes = FUNC(lVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[ulvec]]) + vector ulRes = FUNC(ulVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 691c3ba58f..548aae4192 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1503,7 +1503,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1537,7 +1537,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hf", + "hf<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1554,7 +1554,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the reverse bit pattern of the input value", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1601,7 +1601,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1619,7 +1619,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1674,7 +1674,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "hfd", + "hfd<", "rn", [ db_dxil_param( @@ -1691,7 +1691,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "d", + "d<", "rn", [ db_dxil_param( @@ -1715,7 +1715,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs an integral " + i, - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "the operation result"), @@ -2608,7 +2608,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2626,7 +2626,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2644,7 +2644,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2662,7 +2662,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf", + "hf<", "rn", [ db_dxil_param( From db9b361ad0fbfe7f9710572bd73948b895d5c73f Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 7 Apr 2025 13:04:22 -0600 Subject: [PATCH 2/6] generated code update --- lib/DXIL/DxilOperations.cpp | 140 ++++++++++++++++++------------------ 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 0b4c7218d4..7047d9fe59 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -96,16 +96,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { "unary", Attribute::ReadNone, 1, - {{0x7}}, - {{0x0}}}, // Overloads: hfd + {{0x407}}, + {{0x7}}}, // Overloads: hfd Date: Mon, 7 Apr 2025 13:54:15 -0600 Subject: [PATCH 3/6] Pre-empt any and all variable capitalization discussion Any altered function is brought inline with LLVM coding standards for varaible capitalization. --- lib/HLSL/HLOperationLower.cpp | 402 +++++++++++++++++----------------- 1 file changed, 199 insertions(+), 203 deletions(-) diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 8dda0230ba..6292e66120 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -487,24 +487,24 @@ Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode, // for each vector element and reconstruct the vector type from those results or // operate on and return native vectors depending on vector size and the value // of `SupportsVectors`, which is deteremined by version and opcode support. -Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, - Type *Ty, Type *RetTy, OP *hlslOP, +Value *TrivialDxilOperation(OP::OpCode Opcode, ArrayRef Args, + Type *Ty, Type *RetTy, OP *OP, IRBuilder<> &Builder, bool SupportsVectors = false) { // If supported and the overload type is a vector with more than 1 element, // create a native vector operation. if (SupportsVectors && Ty->isVectorTy() && Ty->getVectorNumElements() > 1) { - Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); - return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP, + Function *Func = OP->GetOpFunc(Opcode, Ty); + return TrivialDxilVectorOperation(Func, Opcode, Args, Ty, OP, Builder); } // Set overload type to the scalar type of `Ty` and generate call(s). Type *EltTy = Ty->getScalarType(); - Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy); + Function *Func = OP->GetOpFunc(Opcode, EltTy); - return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP, + return TrivialDxilOperation(Func, Opcode, Args, Ty, RetTy, OP, Builder); } @@ -524,9 +524,9 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, // return type from the overload by passing the argument, explicit return type, // and helper objects to the scalarizing unary dxil operation creation. Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP, - OP::OpCode opcode, + OP::OpCode Opcode, HLOperationLowerHelper &Helper, - HLObjectOperationLowerHelper *pObjHelper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); Type *Ty = Src->getType(); @@ -534,96 +534,96 @@ Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP, IRBuilder<> Builder(CI); hlsl::OP *OP = &Helper.hlslOP; Type *RetTy = CI->getType(); - Constant *opArg = OP->GetU32Const((unsigned)opcode); - Value *args[] = {opArg, Src}; + Constant *OpArg = OP->GetU32Const((unsigned)Opcode); + Value *Args[] = {OpArg, Src}; - return TrivialDxilOperation(opcode, args, Ty, RetTy, OP, Builder); + return TrivialDxilOperation(Opcode, Args, Ty, RetTy, OP, Builder); } -Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src, - hlsl::OP *hlslOP, IRBuilder<> &Builder, +Value *TrivialDxilUnaryOperation(OP::OpCode Opcode, Value *Src, + hlsl::OP *OP, IRBuilder<> &Builder, bool SupportsVectors = false) { - Type *Ty = src->getType(); + Type *Ty = Src->getType(); - Constant *OpArg = hlslOP->GetU32Const((unsigned)opcode); - Value *Args[] = {OpArg, src}; + Constant *OpArg = OP->GetU32Const((unsigned)Opcode); + Value *Args[] = {OpArg, Src}; - return TrivialDxilOperation(opcode, Args, Ty, Ty, hlslOP, Builder, + return TrivialDxilOperation(Opcode, Args, Ty, Ty, OP, Builder, SupportsVectors); } -Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, - hlsl::OP *hlslOP, IRBuilder<> &Builder, +Value *TrivialDxilBinaryOperation(OP::OpCode Opcode, Value *Src0, Value *Src1, + hlsl::OP *OP, IRBuilder<> &Builder, bool SupportsVectors = false) { - Type *Ty = src0->getType(); + Type *Ty = Src0->getType(); - Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); - Value *args[] = {opArg, src0, src1}; + Constant *OpArg = OP->GetU32Const((unsigned)Opcode); + Value *Args[] = {OpArg, Src0, Src1}; - return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder, + return TrivialDxilOperation(Opcode, Args, Ty, Ty, OP, Builder, SupportsVectors); } -Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, - Value *src2, hlsl::OP *hlslOP, +Value *TrivialDxilTrinaryOperation(OP::OpCode Opcode, Value *Src0, Value *Src1, + Value *Src2, hlsl::OP *OP, IRBuilder<> &Builder, bool SupportsVectors = false) { - Type *Ty = src0->getType(); + Type *Ty = Src0->getType(); - Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); - Value *args[] = {opArg, src0, src1, src2}; + Constant *OpArg = OP->GetU32Const((unsigned)Opcode); + Value *Args[] = {OpArg, Src0, Src1, Src2}; - return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder, + return TrivialDxilOperation(Opcode, Args, Ty, Ty, OP, Builder, SupportsVectors); } // Translate call that trivially converts to a dxil unary operation by passing // argument, return type, and helper objects to either scalarizing or native // vector dxil operation creation depending on version and vector size. -Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Value *Src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); IRBuilder<> Builder(CI); - hlsl::OP *hlslOP = &helper.hlslOP; + hlsl::OP *OP = &Helper.hlslOP; - return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder, - helper.M.GetShaderModel()->IsSM69Plus()); + return TrivialDxilUnaryOperation(Opcode, Src0, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); } // Translate call that trivially converts to a dxil binary operation by passing // arguments, return type, and helper objects to either scalarizing or native // vector dxil operation creation depending on version and vector size. -Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; - Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); - Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); + hlsl::OP *OP = &Helper.hlslOP; + Value *Src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); + Value *Src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); - return TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder, - helper.M.GetShaderModel()->IsSM69Plus()); + return TrivialDxilBinaryOperation(Opcode, Src0, Src1, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); } // Translate call that trivially converts to a dxil trinary (aka tertiary) // operation by passing arguments, return type, and helper objects to either // scalarizing or native vector dxil operation creation depending on version // and vector size. -Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; - Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); - Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); - Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); + hlsl::OP *OP = &Helper.hlslOP; + Value *Src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); + Value *Src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); + Value *Src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); IRBuilder<> Builder(CI); - return TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder, - helper.M.GetShaderModel()->IsSM69Plus()); + return TrivialDxilTrinaryOperation(Opcode, Src0, Src1, Src2, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); } Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -788,54 +788,54 @@ Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP, // | float | False | 2 | // +----------+---------------------+------------------+ -bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow, - int32_t &powI) { +bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *X, Value *Pow, + int32_t &PowI) { // Applicable only when power is a literal. - if (!isa(pow) && !isa(pow)) { + if (!isa(Pow) && !isa(Pow)) { return false; } // Only apply this code gen on splat values. - if (ConstantDataVector *cdv = dyn_cast(pow)) { - if (!hlsl::dxilutil::IsSplat(cdv)) { + if (ConstantDataVector *Cdv = dyn_cast(Pow)) { + if (!hlsl::dxilutil::IsSplat(Cdv)) { return false; } } // Only apply on aggregates of 16 or fewer elements, // representing the max 4x4 matrix size. - Type *Ty = x->getType(); + Type *Ty = X->getType(); if (Ty->isVectorTy() && Ty->getVectorNumElements() > 16) return false; - APFloat powAPF = isa(pow) - ? cast(pow)->getElementAsAPFloat(0) + APFloat PowAPF = isa(Pow) + ? cast(Pow)->getElementAsAPFloat(0) : // should be a splat value - cast(pow)->getValueAPF(); - APSInt powAPS(32, false); - bool isExact = false; + cast(Pow)->getValueAPF(); + APSInt PowAPS(32, false); + bool IsExact = false; // Try converting float value of power to integer and also check if the float // value is exact. - APFloat::opStatus status = - powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact); - if (status == APFloat::opStatus::opOK && isExact) { - powI = powAPS.getExtValue(); - uint32_t powU = abs(powI); - int setBitCount = 0; - int maxBitSetPos = -1; - for (int i = 0; i < 32; i++) { - if ((powU >> i) & 1) { - setBitCount++; - maxBitSetPos = i; + APFloat::opStatus Status = + PowAPF.convertToInteger(PowAPS, APFloat::rmTowardZero, &IsExact); + if (Status == APFloat::opStatus::opOK && IsExact) { + PowI = PowAPS.getExtValue(); + uint32_t PowU = abs(PowI); + int SetBitCount = 0; + int MaxBitSetPos = -1; + for (int I = 0; I < 32; I++) { + if ((PowU >> I) & 1) { + SetBitCount++; + MaxBitSetPos = I; } } - DXASSERT(maxBitSetPos <= 30, "msb should always be zero."); - unsigned numElem = - isa(pow) ? x->getType()->getVectorNumElements() : 1; - int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1; - int mulOpNeeded = maxBitSetPos + setBitCount - 1; - return mulOpNeeded <= mulOpThreshold; + DXASSERT(MaxBitSetPos <= 30, "msb should always be zero."); + unsigned NumElem = + isa(Pow) ? X->getType()->getVectorNumElements() : 1; + int MulOpThreshold = PowI < 0 ? NumElem + 1 : 2 * NumElem + 1; + int MulOpNeeded = MaxBitSetPos + SetBitCount - 1; + return MulOpNeeded <= MulOpThreshold; } return false; @@ -2087,46 +2087,44 @@ Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return Result; } -Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - Value *firstbitHi = - TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated); + Value *FirstbitHi = + TrivialUnaryOperationRet(CI, IOP, Opcode, Helper, ObjHelper, Translated); // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi); IRBuilder<> Builder(CI); - Constant *neg1 = Builder.getInt32(-1); - Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Constant *Neg1 = Builder.getInt32(-1); + Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); - Type *Ty = src->getType(); + Type *Ty = Src->getType(); IntegerType *EltTy = cast(Ty->getScalarType()); - Constant *bitWidth = Builder.getInt32(EltTy->getBitWidth() - 1); + Constant *BitWidth = Builder.getInt32(EltTy->getBitWidth() - 1); if (Ty == Ty->getScalarType()) { - Value *sub = Builder.CreateSub(bitWidth, firstbitHi); - Value *cond = Builder.CreateICmpEQ(neg1, firstbitHi); - return Builder.CreateSelect(cond, neg1, sub); + Value *Sub = Builder.CreateSub(BitWidth, FirstbitHi); + Value *Cond = Builder.CreateICmpEQ(Neg1, FirstbitHi); + return Builder.CreateSelect(Cond, Neg1, Sub); } else { - Value *result = UndefValue::get(CI->getType()); - unsigned vecSize = Ty->getVectorNumElements(); - for (unsigned i = 0; i < vecSize; i++) { - Value *EltFirstBit = Builder.CreateExtractElement(firstbitHi, i); - Value *sub = Builder.CreateSub(bitWidth, EltFirstBit); - Value *cond = Builder.CreateICmpEQ(neg1, EltFirstBit); - Value *Elt = Builder.CreateSelect(cond, neg1, sub); - result = Builder.CreateInsertElement(result, Elt, i); + Value *Result = UndefValue::get(CI->getType()); + unsigned VecSize = Ty->getVectorNumElements(); + for (unsigned I = 0; I < VecSize; I++) { + Value *EltFirstBit = Builder.CreateExtractElement(FirstbitHi, I); + Value *Sub = Builder.CreateSub(BitWidth, EltFirstBit); + Value *Cond = Builder.CreateICmpEQ(Neg1, EltFirstBit); + Value *Elt = Builder.CreateSelect(Cond, Neg1, Sub); + Result = Builder.CreateInsertElement(Result, Elt, I); } - return result; + return Result; } } -Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - Value *firstbitLo = - TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated); - return firstbitLo; + return TrivialUnaryOperationRet(CI, IOP, Opcode, Helper, ObjHelper, Translated); } Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2273,63 +2271,61 @@ Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return TranslateLength(CI, sub, hlslOP); } -Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; + hlsl::OP *OP = &Helper.hlslOP; IRBuilder<> Builder(CI); Type *Ty = CI->getType(); - Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); - Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E); - if (Ty != Ty->getScalarType()) { - log2eConst = - ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst); - } - val = Builder.CreateFMul(log2eConst, val); + Value *Val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Constant *Log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E); + if (Ty != Ty->getScalarType()) + Log2eConst = + ConstantVector::getSplat(Ty->getVectorNumElements(), Log2eConst); + Val = Builder.CreateFMul(Log2eConst, Val); - return TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder, - helper.M.GetShaderModel()->IsSM69Plus()); + return TrivialDxilUnaryOperation(OP::OpCode::Exp, Val, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); } -Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; + hlsl::OP *OP = &Helper.hlslOP; IRBuilder<> Builder(CI); Type *Ty = CI->getType(); - Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); - Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2); - if (Ty != Ty->getScalarType()) { - ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const); - } + Value *Val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Constant *Ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2); + if (Ty != Ty->getScalarType()) + Ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), Ln2Const); Value *log = - TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder, - helper.M.GetShaderModel()->IsSM69Plus()); + TrivialDxilUnaryOperation(OP::OpCode::Log, Val, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); - return Builder.CreateFMul(ln2Const, log); + return Builder.CreateFMul(Ln2Const, log); } -Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; + hlsl::OP *OP = &Helper.hlslOP; IRBuilder<> Builder(CI); Type *Ty = CI->getType(); - Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); - Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10); + Value *Val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Constant *Log2to10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10); if (Ty != Ty->getScalarType()) { - log2_10Const = - ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const); + Log2to10Const = + ConstantVector::getSplat(Ty->getVectorNumElements(), Log2to10Const); } - Value *log = - TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder, - helper.M.GetShaderModel()->IsSM69Plus()); + Value *Log = + TrivialDxilUnaryOperation(OP::OpCode::Log, Val, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); - return Builder.CreateFMul(log2_10Const, log); + return Builder.CreateFMul(Log2to10Const, Log); } Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2512,16 +2508,16 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1, // Instead of using a DXIL intrinsic, implement a dot product operation using // multiply and add operations. Used for integer dots and long vectors. -Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, +Value *ExpandDot(Value *Arg0, Value *Arg1, unsigned VecSize, hlsl::OP *OP, IRBuilder<> &Builder, DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) { - Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0); - Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0); + Value *Elt0 = Builder.CreateExtractElement(Arg0, (uint64_t)0); + Value *Elt1 = Builder.CreateExtractElement(Arg1, (uint64_t)0); Value *Result = Builder.CreateMul(Elt0, Elt1); - for (unsigned Elt = 1; Elt < vecSize; ++Elt) { - Elt0 = Builder.CreateExtractElement(arg0, Elt); - Elt1 = Builder.CreateExtractElement(arg1, Elt); - Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP, + for (unsigned Elt = 1; Elt < VecSize; ++Elt) { + Elt0 = Builder.CreateExtractElement(Arg0, Elt); + Elt1 = Builder.CreateExtractElement(Arg1, Elt); + Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, OP, Builder); } @@ -2550,26 +2546,26 @@ Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize, } } -Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; - Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); - Type *Ty = arg0->getType(); - unsigned vecSize = Ty->getVectorNumElements(); - Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); + hlsl::OP *OP = &Helper.hlslOP; + Value *Arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); + Type *Ty = Arg0->getType(); + unsigned VecSize = Ty->getVectorNumElements(); + Value *Arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); Type *EltTy = Ty->getScalarType(); if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 4) { - return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); + return TranslateFDot(Arg0, Arg1, VecSize, OP, Builder); } else { DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; if (IOP == IntrinsicOp::IOP_udot) MadOpCode = DXIL::OpCode::UMad; else if (EltTy->isFloatingPointTy()) MadOpCode = DXIL::OpCode::FMad; - return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); + return ExpandDot(Arg0, Arg1, VecSize, OP, Builder, MadOpCode); } } @@ -2672,32 +2668,32 @@ Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, return refract; } -Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; + hlsl::OP *OP = &Helper.hlslOP; // s = saturate((x-min)/(max-min)). IRBuilder<> Builder(CI); - Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx); - Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx); - Value *maxSubMin = Builder.CreateFSub(maxVal, minVal); - Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx); - Value *xSubMin = Builder.CreateFSub(x, minVal); - Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin); - - Value *s = - TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP, Builder, - helper.M.GetShaderModel()->IsSM69Plus()); + Value *MinVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx); + Value *MaxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx); + Value *MaxSubMin = Builder.CreateFSub(MaxVal, MinVal); + Value *X = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx); + Value *XSubMin = Builder.CreateFSub(X, MinVal); + Value *SatVal = Builder.CreateFDiv(XSubMin, MaxSubMin); + + Value *S = + TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, SatVal, OP, Builder, + Helper.M.GetShaderModel()->IsSM69Plus()); // return s * s *(3-2*s). - Constant *c2 = ConstantFP::get(CI->getType(), 2); - Constant *c3 = ConstantFP::get(CI->getType(), 3); + Constant *C2 = ConstantFP::get(CI->getType(), 2); + Constant *C3 = ConstantFP::get(CI->getType(), 3); - Value *sMul2 = Builder.CreateFMul(s, c2); - Value *result = Builder.CreateFSub(c3, sMul2); - result = Builder.CreateFMul(s, result); - result = Builder.CreateFMul(s, result); - return result; + Value *SMul2 = Builder.CreateFMul(S, C2); + Value *Result = Builder.CreateFSub(C3, SMul2); + Result = Builder.CreateFMul(S, Result); + Result = Builder.CreateFMul(S, Result); + return Result; } Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -3099,47 +3095,47 @@ Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) { return Result; } -Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; - Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); - Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *arg0Ty = arg0->getType(); - Type *arg1Ty = arg1->getType(); + hlsl::OP *OP = &Helper.hlslOP; + Value *Arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); + Value *Arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); + Type *Arg0Ty = Arg0->getType(); + Type *Arg1Ty = Arg1->getType(); IRBuilder<> Builder(CI); - if (arg0Ty->isVectorTy()) { - if (arg1Ty->isVectorTy()) { + if (Arg0Ty->isVectorTy()) { + if (Arg1Ty->isVectorTy()) { // mul(vector, vector) == dot(vector, vector) - unsigned vecSize = arg0Ty->getVectorNumElements(); - if (arg0Ty->getScalarType()->isFloatingPointTy()) { - return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); + unsigned VecSize = Arg0Ty->getVectorNumElements(); + if (Arg0Ty->getScalarType()->isFloatingPointTy()) { + return TranslateFDot(Arg0, Arg1, VecSize, OP, Builder); } else { DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; if (IOP == IntrinsicOp::IOP_umul) MadOpCode = DXIL::OpCode::UMad; - return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); + return ExpandDot(Arg0, Arg1, VecSize, OP, Builder, MadOpCode); } } else { // mul(vector, scalar) == vector * scalar-splat - arg1 = SplatToVector(arg1, arg0Ty, Builder); + Arg1 = SplatToVector(Arg1, Arg0Ty, Builder); } } else { - if (arg1Ty->isVectorTy()) { + if (Arg1Ty->isVectorTy()) { // mul(scalar, vector) == scalar-splat * vector - arg0 = SplatToVector(arg0, arg1Ty, Builder); + Arg0 = SplatToVector(Arg0, Arg1Ty, Builder); } // else mul(scalar, scalar) == scalar * scalar; } // create fmul/mul for the pair of vectors or scalars - if (arg0Ty->getScalarType()->isFloatingPointTy()) { - return Builder.CreateFMul(arg0, arg1); + if (Arg0Ty->getScalarType()->isFloatingPointTy()) { + return Builder.CreateFMul(Arg0, Arg1); } else { - return Builder.CreateMul(arg0, arg1); + return Builder.CreateMul(Arg0, Arg1); } } From 3f7b1086662f1335696f2f891a4e1deaa79eb09a Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 7 Apr 2025 13:57:59 -0600 Subject: [PATCH 4/6] clang-format --- lib/HLSL/HLOperationLower.cpp | 41 +++++++++++++++-------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 6292e66120..ed6bdc3f6d 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -474,8 +474,8 @@ Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, // Utility objects `HlslOp` and `Builder` are used to create a call to the given // `DxilFunc` with `RefArgs` arguments. Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode, - ArrayRef Args, Type *Ty, - OP *OP, IRBuilder<> &Builder) { + ArrayRef Args, Type *Ty, OP *OP, + IRBuilder<> &Builder) { if (!Ty->isVoidTy()) return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode)); else @@ -487,25 +487,22 @@ Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode, // for each vector element and reconstruct the vector type from those results or // operate on and return native vectors depending on vector size and the value // of `SupportsVectors`, which is deteremined by version and opcode support. -Value *TrivialDxilOperation(OP::OpCode Opcode, ArrayRef Args, - Type *Ty, Type *RetTy, OP *OP, - IRBuilder<> &Builder, +Value *TrivialDxilOperation(OP::OpCode Opcode, ArrayRef Args, Type *Ty, + Type *RetTy, OP *OP, IRBuilder<> &Builder, bool SupportsVectors = false) { // If supported and the overload type is a vector with more than 1 element, // create a native vector operation. if (SupportsVectors && Ty->isVectorTy() && Ty->getVectorNumElements() > 1) { Function *Func = OP->GetOpFunc(Opcode, Ty); - return TrivialDxilVectorOperation(Func, Opcode, Args, Ty, OP, - Builder); + return TrivialDxilVectorOperation(Func, Opcode, Args, Ty, OP, Builder); } // Set overload type to the scalar type of `Ty` and generate call(s). Type *EltTy = Ty->getScalarType(); Function *Func = OP->GetOpFunc(Opcode, EltTy); - return TrivialDxilOperation(Func, Opcode, Args, Ty, RetTy, OP, - Builder); + return TrivialDxilOperation(Func, Opcode, Args, Ty, RetTy, OP, Builder); } Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, @@ -540,8 +537,8 @@ Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP, return TrivialDxilOperation(Opcode, Args, Ty, RetTy, OP, Builder); } -Value *TrivialDxilUnaryOperation(OP::OpCode Opcode, Value *Src, - hlsl::OP *OP, IRBuilder<> &Builder, +Value *TrivialDxilUnaryOperation(OP::OpCode Opcode, Value *Src, hlsl::OP *OP, + IRBuilder<> &Builder, bool SupportsVectors = false) { Type *Ty = Src->getType(); @@ -2124,7 +2121,8 @@ Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - return TrivialUnaryOperationRet(CI, IOP, Opcode, Helper, ObjHelper, Translated); + return TrivialUnaryOperationRet(CI, IOP, Opcode, Helper, ObjHelper, + Translated); } Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2273,8 +2271,7 @@ Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, HLOperationLowerHelper &Helper, - HLObjectOperationLowerHelper *ObjHelper, - bool &Translated) { + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { hlsl::OP *OP = &Helper.hlslOP; IRBuilder<> Builder(CI); Type *Ty = CI->getType(); @@ -2291,8 +2288,7 @@ Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, HLOperationLowerHelper &Helper, - HLObjectOperationLowerHelper *ObjHelper, - bool &Translated) { + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { hlsl::OP *OP = &Helper.hlslOP; IRBuilder<> Builder(CI); Type *Ty = CI->getType(); @@ -2316,7 +2312,8 @@ Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, IRBuilder<> Builder(CI); Type *Ty = CI->getType(); Value *Val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); - Constant *Log2to10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10); + Constant *Log2to10Const = + ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10); if (Ty != Ty->getScalarType()) { Log2to10Const = ConstantVector::getSplat(Ty->getVectorNumElements(), Log2to10Const); @@ -2517,8 +2514,8 @@ Value *ExpandDot(Value *Arg0, Value *Arg1, unsigned VecSize, hlsl::OP *OP, for (unsigned Elt = 1; Elt < VecSize; ++Elt) { Elt0 = Builder.CreateExtractElement(Arg0, Elt); Elt1 = Builder.CreateExtractElement(Arg1, Elt); - Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, OP, - Builder); + Result = + TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, OP, Builder); } return Result; @@ -2548,8 +2545,7 @@ Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize, Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, HLOperationLowerHelper &Helper, - HLObjectOperationLowerHelper *ObjHelper, - bool &Translated) { + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { hlsl::OP *OP = &Helper.hlslOP; Value *Arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Type *Ty = Arg0->getType(); @@ -3097,8 +3093,7 @@ Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) { Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, HLOperationLowerHelper &Helper, - HLObjectOperationLowerHelper *ObjHelper, - bool &Translated) { + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { hlsl::OP *OP = &Helper.hlslOP; Value *Arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); From 907cdba8c14a5c70f41a4aed90360c8afb5ac3bd Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 7 Apr 2025 15:03:06 -0600 Subject: [PATCH 5/6] Fix wrong mul type and tighted up dot() testing Was using int dot for the float operands as it was originally an int-only lowering function. --- lib/HLSL/HLOperationLower.cpp | 6 +++++- .../types/longvec-scalarized-intrinsics.hlsl | 17 +++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index ed6bdc3f6d..a2bab818a8 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -2510,7 +2510,11 @@ Value *ExpandDot(Value *Arg0, Value *Arg1, unsigned VecSize, hlsl::OP *OP, DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) { Value *Elt0 = Builder.CreateExtractElement(Arg0, (uint64_t)0); Value *Elt1 = Builder.CreateExtractElement(Arg1, (uint64_t)0); - Value *Result = Builder.CreateMul(Elt0, Elt1); + Value *Result; + if (Elt0->getType()->isFloatingPointTy()) + Result = Builder.CreateFMul(Elt0, Elt1); + else + Result = Builder.CreateMul(Elt0, Elt1); for (unsigned Elt = 1; Elt < VecSize; ++Elt) { Elt0 = Builder.CreateExtractElement(Arg0, Elt); Elt1 = Builder.CreateExtractElement(Arg1, Elt); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl index 4886f04e01..7d5da99e21 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl @@ -104,14 +104,15 @@ float4 main(uint i : SV_PrimitiveID, bool b : B) : SV_Target { // CHECK: fsub fast <8 x float> vec1 = modf(vec1, vec2); - // CHECK: fmul fast float - // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) - // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) - // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) - // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) - // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) - // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) - // CHECK: call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; FMad(a,b,c) + // CHECK: [[el:%.*]] = extractelement <8 x float> + // CHECK: [[mul:%.*]] = fmul fast float [[el]] + // CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) + // CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) + // CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) + // CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) + // CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) + // CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) + // CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) vec1 = dot(vec1, vec2); vector bvec = b; From dcc76b44daa2f7c2adf0d0e21c1bc6c21f293a9f Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Tue, 8 Apr 2025 08:05:57 -0600 Subject: [PATCH 6/6] Add IR test for dxilgen pass --- .../passes/longvec-intrinsics.hlsl | 186 ++++++++ .../CodeGenDXIL/passes/longvec-intrinsics.ll | 434 ++++++++++++++++++ 2 files changed, 620 insertions(+) create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl new file mode 100644 index 0000000000..11d705305d --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl @@ -0,0 +1,186 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=13 %s | FileCheck %s + +// Source for dxilgen test CodeGenDXIL/passes/longvec-intrinsics.ll. +// Some targetted filecheck testing as an incidental. + +RWStructuredBuffer > hBuf; +RWStructuredBuffer > fBuf; +RWStructuredBuffer > dBuf; + +RWStructuredBuffer > bBuf; +RWStructuredBuffer > uBuf; +RWStructuredBuffer > lBuf; + +[numthreads(8,1,1)] +void main() { + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + vector fVec1 = fBuf[11]; + vector fVec2 = fBuf[12]; + vector fVec3 = fBuf[13]; + + // CHECK: [[tmp:%.*]] = call <13 x float> @dx.op.binary.v13f32(i32 35, <13 x float> [[fvec1]], <13 x float> [[fvec2]]) ; FMax(a,b) + // CHECK: call <13 x float> @dx.op.binary.v13f32(i32 36, <13 x float> [[tmp]], <13 x float> [[fvec3]]) ; FMin(a,b) + vector fRes = clamp(fVec1, fVec2, fVec3); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + vector hVec1 = hBuf[14]; + vector hVec2 = hBuf[15]; + vector hVec3 = hBuf[16]; + + // CHECK: [[tmp:%.*]] = fcmp fast olt <13 x half> [[hvec2]], [[hvec1]] + // CHECK: select <13 x i1> [[tmp]], <13 x half> zeroinitializer, <13 x half> hRes = step(hVec1, hVec2); + + // CHECK: [[tmp:%.*]] = fmul fast <13 x float> [[fvec1]], @dx.op.unary.v13f32(i32 21, <13 x float> [[tmp]]) ; Exp(value) + fRes += exp(fVec1); + + // CHECK: [[tmp:%.*]] = call <13 x half> @dx.op.unary.v13f16(i32 23, <13 x half> [[hvec1]]) ; Log(value) + // CHECK: fmul fast <13 x half> [[tmp]], [[fvec2]], [[fvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <13 x float> [[fvec3]], [[fvec1]] + // CHECK: [[div:%.*]] = fdiv fast <13 x float> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <13 x float> @dx.op.unary.v13f32(i32 7, <13 x float> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], [[sat]] + // CHECK: fmul fast <13 x float> [[mul]], [[sub]] + fRes += smoothstep(fVec1, fVec2, fVec3); + + // Intrinsics that expand into llvm ops. + + // CHECK: fmul fast <13 x float> [[fvec3]], [[fvec1]], zeroinitializer + // CHECK: [[f2i:%.*]] = bitcast <13 x float> [[fvec1]] to <13 x i32> + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], [[and]], [[add]], [[shr]] to <13 x float> + // CHECK: [[sel:%.*]] = select <13 x i1> [[cmp]], <13 x float> [[i2f]], <13 x float> zeroinitializer + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], [[and]], exp = fVec3; + fRes += frexp(fVec1, exp); + fRes += exp; + + // CHECK: [[tmp:%.*]] = fsub fast <13 x half> [[hvec3]], [[hvec2]] + // CHECK: fmul fast <13 x half> [[tmp]], [[hvec1]] + hRes += lerp(hVec2, hVec3, hVec1); + + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) + // CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) + // CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + vector uVec1 = uBuf[17]; + vector uVec2 = uBuf[18]; + + vector signs = 1; + // CHECK: [[cmp:%.*]] = icmp ne <13 x i32> [[uvec2]], zeroinitializer + // CHECK: zext <13 x i1> [[cmp]] to <13 x i32> + signs *= sign(uVec2); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 + vector lVec1 = lBuf[19]; + vector lVec2 = lBuf[20]; + + // CHECK: [[gt:%.*]] = icmp sgt <13 x i64> [[lvec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <13 x i64> [[lvec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <13 x i1> [[gt]] to <13 x i32> + // CHECK: [[ilt:%.*]] = zext <13 x i1> [[lt]] to <13 x i32> + // CHECK: sub nsw <13 x i32> [[igt]], [[ilt]] + signs *= sign(lVec2); + + vector uRes = signs; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec1:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec2:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec3:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + vector bVec1 = bBuf[21]; + vector bVec2 = bBuf[22]; + vector bVec3 = bBuf[23]; + + // CHECK: [[bvec2:%.*]] = icmp ne <13 x i32> [[vec2]], zeroinitializer + // CHECK: [[bvec1:%.*]] = icmp ne <13 x i32> [[vec1]], zeroinitializer + // CHECK: or <13 x i1> [[bvec2]], [[bvec1]] + uRes += or(bVec1, bVec2); + + // CHECK: [[bvec3:%.*]] = icmp ne <13 x i32> [[vec3]], zeroinitializer + // CHECK: and <13 x i1> [[bvec3]], [[bvec2]] + uRes += and(bVec2, bVec3); + + // CHECK: select <13 x i1> [[bvec3]], <13 x i64> [[lvec1]], <13 x i64> [[lvec2]] + vector lRes = select(bVec3, lVec1, lVec2); + + // CHECK: [[el1:%.*]] = extractelement <13 x float> [[fvec1]] + // CHECK: [[el2:%.*]] = extractelement <13 x float> [[fvec2]] + // CHECK: [[mul:%.*]] = fmul fast float [[el2]], [[el1]] + // CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) + // CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad1]]) ; FMad(a,b,c) + // CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad2]]) ; FMad(a,b,c) + // CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad3]]) ; FMad(a,b,c) + // CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad4]]) ; FMad(a,b,c) + // CHECK: [[mad6:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad5]]) ; FMad(a,b,c) + // CHECK: [[mad7:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad6]]) ; FMad(a,b,c) + // CHECK: [[mad8:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad7]]) ; FMad(a,b,c) + // CHECK: [[mad9:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad8]]) ; FMad(a,b,c) + // CHECK: [[mad10:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad9]]) ; FMad(a,b,c) + // CHECK: [[mad11:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad10]]) ; FMad(a,b,c) + // CHECK: [[mad12:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad11]]) ; FMad(a,b,c) + fRes += dot(fVec1, fVec2); + + // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 17, <13 x float> [[fvec1]]) ; Atan(value) + fRes += atan(fVec1); + + // CHECK: call <13 x i32> @dx.op.binary.v13i32(i32 40, <13 x i32> [[uvec1]], <13 x i32> [[uvec2]]) ; UMin(a,b) + uRes += min(uVec1, uVec2); + + // CHECK: call <13 x float> @dx.op.tertiary.v13f32(i32 46, <13 x float> [[fvec1]], <13 x float> [[fvec2]], <13 x float> [[fvec3]]) ; FMad(a,b,c) + fRes += mad(fVec1, fVec2, fVec3); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + vector dVec1 = dBuf[24]; + vector dVec2 = dBuf[25]; + vector dVec3 = dBuf[26]; + + // CHECK: call <13 x double> @dx.op.tertiary.v13f64(i32 47, <13 x double> [[dvec1]], <13 x double> [[dvec2]], <13 x double> [[dvec3]]) + vector dRes = fma(dVec1, dVec2, dVec3); + + hBuf[0] = hRes; + fBuf[0] = fRes; + dBuf[0] = dRes; + uBuf[0] = uRes; + lBuf[0] = lRes; +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll new file mode 100644 index 0000000000..8f9dcbbdbc --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll @@ -0,0 +1,434 @@ +; RUN: %dxopt %s -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer >" = type { <7 x half> } +%"class.RWStructuredBuffer >" = type { <7 x float> } +%"class.RWStructuredBuffer >" = type { <7 x double> } +%"class.RWStructuredBuffer >" = type { <7 x i32> } +%"class.RWStructuredBuffer >" = type { <7 x i32> } +%"class.RWStructuredBuffer >" = type { <7 x i64> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" = external global %"class.RWStructuredBuffer >", align 2 +@"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" = external global %"class.RWStructuredBuffer >", align 8 +@"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" = external global %"class.RWStructuredBuffer >", align 8 + +; CHECK-LABEL: define void @main() +define void @main() #0 { +bb: + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) + ; CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) + ; CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) + ; CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + + %exp = alloca <7 x float>, align 4 + %tmp = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:23 col:30 + %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp) ; line:23 col:30 + %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:23 col:30 + %tmp3 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 11) ; line:23 col:30 + %tmp4 = load <7 x float>, <7 x float>* %tmp3 ; line:23 col:30 + %tmp5 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:24 col:30 + %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp5) ; line:24 col:30 + %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:24 col:30 + %tmp8 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp7, i32 12) ; line:24 col:30 + %tmp9 = load <7 x float>, <7 x float>* %tmp8 ; line:24 col:30 + %tmp10 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:25 col:30 + %tmp11 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp10) ; line:25 col:30 + %tmp12 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:25 col:30 + %tmp13 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp12, i32 13) ; line:25 col:30 + %tmp14 = load <7 x float>, <7 x float>* %tmp13 ; line:25 col:30 + + ; Clamp operation. + ; CHECK: [[max:%.*]] = call <7 x float> @dx.op.binary.v7f32(i32 35, <7 x float> [[fvec1]], <7 x float> [[fvec2]]) + ; CHECK: call <7 x float> @dx.op.binary.v7f32(i32 36, <7 x float> [[max]], <7 x float> [[fvec3]]) + %tmp15 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 119, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:29 col:29 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) + ; CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) + ; CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) + ; CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + %tmp16 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:37 col:34 + %tmp17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp16) ; line:37 col:34 + %tmp18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp17, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:37 col:34 + %tmp19 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp18, i32 14) ; line:37 col:34 + %tmp20 = load <7 x half>, <7 x half>* %tmp19 ; line:37 col:34 + %tmp21 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:38 col:34 + %tmp22 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp21) ; line:38 col:34 + %tmp23 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp22, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:38 col:34 + %tmp24 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp23, i32 15) ; line:38 col:34 + %tmp25 = load <7 x half>, <7 x half>* %tmp24 ; line:38 col:34 + %tmp26 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:39 col:34 + %tmp27 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp26) ; line:39 col:34 + %tmp28 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp27, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:39 col:34 + %tmp29 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp28, i32 16) ; line:39 col:34 + %tmp30 = load <7 x half>, <7 x half>* %tmp29 ; line:39 col:34 + + ; Step operation. + ; CHECK: [[cmp:%.*]] = fcmp fast olt <7 x half> [[hvec2]], [[hvec1]] + ; CHECK: select <7 x i1> [[cmp]], <7 x half> zeroinitializer, <7 x half> + %tmp31 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32 192, <7 x half> %tmp20, <7 x half> %tmp25) ; line:43 col:33 + + ; Exp operation. + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> , [[fvec1]] + ; CHECK call <7 x float> @dx.op.unary.v7f32(i32 21, <7 x float> [[mul]]) + %tmp32 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 139, <7 x float> %tmp4) ; line:47 col:11 + %tmp33 = fadd <7 x float> %tmp15, %tmp32 ; line:47 col:8 + + ; Log operation. + ; CHECK: [[log:%.*]] = call <7 x half> @dx.op.unary.v7f16(i32 23, <7 x half> [[hvec1]]) + ; CHECK: fmul fast <7 x half> , [[log]] + %tmp34 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32 159, <7 x half> %tmp20) ; line:51 col:11 + %tmp35 = fadd <7 x half> %tmp31, %tmp34 ; line:51 col:8 + + ; Smoothstep operation. + ; CHECK: [[sub1:%.*]] = fsub fast <7 x float> [[fvec2]], [[fvec1]] + ; CHECK: [[sub2:%.*]] = fsub fast <7 x float> [[fvec3]], [[fvec1]] + ; CHECK: [[div:%.*]] = fdiv fast <7 x float> [[sub2]], [[sub1]] + ; CHECK: [[sat:%.*]] = call <7 x float> @dx.op.unary.v7f32(i32 7, <7 x float> [[div]]) + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], + ; CHECK: [[sub:%.*]] = fsub fast <7 x float> , [[mul]] + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], [[sub]] + ; CHECK: fmul fast <7 x float> %Saturate, [[mul]] + %tmp36 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 189, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:61 col:11 + %tmp37 = fadd <7 x float> %tmp33, %tmp36 ; line:61 col:8 + + ; Radians operation. + ; CHECK: fmul fast <7 x float> , [[fvec3]] + %tmp38 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 176, <7 x float> %tmp14) ; line:66 col:11 + %tmp39 = fadd <7 x float> %tmp37, %tmp38 ; line:66 col:8 + store <7 x float> %tmp14, <7 x float>* %exp, align 4 ; line:77 col:22 + + ; Frexp operation. + ; CHECK: [[cmp:%.*]] = fcmp fast une <7 x float> [[fvec1]], zeroinitializer + ; CHECK: [[ext:%.*]] = sext <7 x i1> [[cmp]] to <7 x i32> + ; CHECK: [[bct:%.*]] = bitcast <7 x float> [[fvec1]] to <7 x i32> + ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], + ; CHECK: [[add:%.*]] = add <7 x i32> [[and]], + ; CHECK: [[and:%.*]] = and <7 x i32> [[add]], [[ext]] + ; CHECK: [[shr:%.*]] = ashr <7 x i32> [[and]], + ; CHECK: [[i2f:%.*]] = sitofp <7 x i32> [[shr]] to <7 x float> + ; CHECK: store <7 x float> [[i2f]], <7 x float>* %exp + ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], + ; CHECK: [[or:%.*]] = or <7 x i32> [[and]], + ; CHECK: [[and:%.*]] = and <7 x i32> [[or]], [[ext]] + ; CHECK: bitcast <7 x i32> [[and]] to <7 x float> + %tmp41 = call <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32 150, <7 x float> %tmp4, <7 x float>* %exp) ; line:78 col:11 + %tmp42 = fadd <7 x float> %tmp39, %tmp41 ; line:78 col:8 + %tmp43 = load <7 x float>, <7 x float>* %exp, align 4 ; line:79 col:11 + %tmp44 = fadd <7 x float> %tmp42, %tmp43 ; line:79 col:8 + + ; Lerp operation. + ; CHECK: [[sub:%.*]] = fsub fast <7 x half> [[hvec3]], [[hvec2]] + ; CHECK: fmul fast <7 x half> [[hvec1]], [[sub]] + %tmp45 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32 157, <7 x half> %tmp25, <7 x half> %tmp30, <7 x half> %tmp20) ; line:83 col:11 + %tmp46 = fadd <7 x half> %tmp35, %tmp45 ; line:83 col:8 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) + ; CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) + ; CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + %tmp47 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:90 col:29 + %tmp48 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp47) ; line:90 col:29 + %tmp49 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp48, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:90 col:29 + %tmp50 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp49, i32 17) ; line:90 col:29 + %tmp51 = load <7 x i32>, <7 x i32>* %tmp50 ; line:90 col:29 + %tmp52 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:91 col:29 + %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp52) ; line:91 col:29 + %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:91 col:29 + %tmp55 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 18) ; line:91 col:29 + %tmp56 = load <7 x i32>, <7 x i32>* %tmp55 ; line:91 col:29 + + ; Unsigned int sign operation. + ; CHECK: [[cmp:%.*]] = icmp ne <7 x i32> [[uvec2]], zeroinitializer + ; CHECK: zext <7 x i1> [[cmp]] to <7 x i32> + %tmp57 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32 355, <7 x i32> %tmp56) ; line:96 col:12 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) + ; CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) + ; CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0 + %tmp58 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:102 col:32 + %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp58) ; line:102 col:32 + %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:102 col:32 + %tmp61 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp60, i32 19) ; line:102 col:32 + %tmp62 = load <7 x i64>, <7 x i64>* %tmp61 ; line:102 col:32 + %tmp63 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:103 col:32 + %tmp64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp63) ; line:103 col:32 + %tmp65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp64, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:103 col:32 + %tmp66 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp65, i32 20) ; line:103 col:32 + %tmp67 = load <7 x i64>, <7 x i64>* %tmp66 ; line:103 col:32 + + ; Signed int sign operation. + ; CHECK: [[lt1:%.*]] = icmp slt <7 x i64> zeroinitializer, [[lvec2]] + ; CHECK: [[lt2:%.*]] = icmp slt <7 x i64> [[lvec2]], zeroinitializer + ; CHECK: [[ilt1:%.*]] = zext <7 x i1> [[lt1]] to <7 x i32> + ; CHECK: [[ilt2:%.*]] = zext <7 x i1> [[lt2]] to <7 x i32> + ; CHECK: sub <7 x i32> [[ilt1]], [[ilt2]] + %tmp68 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32 185, <7 x i64> %tmp67) ; line:110 col:12 + %tmp69 = mul <7 x i32> %tmp57, %tmp68 ; line:110 col:9 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec1:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec2:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec3:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + %tmp70 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:126 col:29 + %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp70) ; line:126 col:29 + %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:126 col:29 + %tmp73 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 21) ; line:126 col:29 + %tmp74 = load <7 x i32>, <7 x i32>* %tmp73 ; line:126 col:29 + %tmp75 = icmp ne <7 x i32> %tmp74, zeroinitializer ; line:126 col:29 + %tmp76 = zext <7 x i1> %tmp75 to <7 x i32> ; line:126 col:21 + %tmp77 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:127 col:29 + %tmp78 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp77) ; line:127 col:29 + %tmp79 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp78, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:127 col:29 + %tmp80 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp79, i32 22) ; line:127 col:29 + %tmp81 = load <7 x i32>, <7 x i32>* %tmp80 ; line:127 col:29 + %tmp82 = icmp ne <7 x i32> %tmp81, zeroinitializer ; line:127 col:29 + %tmp83 = zext <7 x i1> %tmp82 to <7 x i32> ; line:127 col:21 + %tmp84 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:128 col:29 + %tmp85 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp84) ; line:128 col:29 + %tmp86 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp85, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:128 col:29 + %tmp87 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp86, i32 23) ; line:128 col:29 + %tmp88 = load <7 x i32>, <7 x i32>* %tmp87 ; line:128 col:29 + %tmp89 = icmp ne <7 x i32> %tmp88, zeroinitializer ; line:128 col:29 + %tmp90 = zext <7 x i1> %tmp89 to <7 x i32> ; line:128 col:21 + + + ; Or() operation. + ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer + ; CHECK: [[bvec1:%.*]] = icmp ne <7 x i32> [[vec1]], zeroinitializer + ; CHECK: or <7 x i1> [[bvec1]], [[bvec2]] + %tmp91 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:133 col:21 + %tmp92 = icmp ne <7 x i32> %tmp76, zeroinitializer ; line:133 col:14 + %tmp93 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 169, <7 x i1> %tmp92, <7 x i1> %tmp91) ; line:133 col:11 + %tmp94 = zext <7 x i1> %tmp93 to <7 x i32> ; line:133 col:11 + %tmp95 = add <7 x i32> %tmp69, %tmp94 ; line:133 col:8 + + ; And() operation. + ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer + ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer + ; CHECK: and <7 x i1> [[bvec2]], [[bvec3]] + %tmp96 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:137 col:22 + %tmp97 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:137 col:15 + %tmp98 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 106, <7 x i1> %tmp97, <7 x i1> %tmp96) ; line:137 col:11 + %tmp99 = zext <7 x i1> %tmp98 to <7 x i32> ; line:137 col:11 + %tmp100 = add <7 x i32> %tmp95, %tmp99 ; line:137 col:8 + + ; Select() operation. + ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer + ; CHECK: select <7 x i1> [[bvec3]], <7 x i64> [[lvec1]], <7 x i64> [[lvec2]] + %tmp101 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:140 col:38 + %tmp102 = call <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32 184, <7 x i1> %tmp101, <7 x i64> %tmp62, <7 x i64> %tmp67) ; line:140 col:31 + %tmp103 = call float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32 134, <7 x float> %tmp4, <7 x float> %tmp9) ; line:152 col:11 + + ; Dot operation. + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 0 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 0 + ; CHECK: [[mul:%.*]] = fmul fast float [[el1]], [[el2]] + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 1 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 1 + ; CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mul]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 2 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 2 + ; CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad1]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 3 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 3 + ; CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad2]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 4 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 4 + ; CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad3]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 5 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 5 + ; CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad4]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 6 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 6 + ; CHECK: call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad5]]) + %tmp104 = insertelement <7 x float> undef, float %tmp103, i32 0 ; line:152 col:11 + %tmp105 = shufflevector <7 x float> %tmp104, <7 x float> undef, <7 x i32> zeroinitializer ; line:152 col:11 + %tmp106 = fadd <7 x float> %tmp44, %tmp105 ; line:152 col:8 + + ; Atan operation. + ; CHECK: call <7 x float> @dx.op.unary.v7f32(i32 17, <7 x float> [[fvec1]]) + %tmp107 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 116, <7 x float> %tmp4) ; line:155 col:11 + %tmp108 = fadd <7 x float> %tmp106, %tmp107 ; line:155 col:8 + + ; Min operation. + ; CHECK: call <7 x i32> @dx.op.binary.v7i32(i32 40, <7 x i32> [[uvec1]], <7 x i32> [[uvec2]]) + %tmp109 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32 353, <7 x i32> %tmp51, <7 x i32> %tmp56) ; line:158 col:11 + %tmp110 = add <7 x i32> %tmp100, %tmp109 ; line:158 col:8 + + ; Mad operation. + ; CHECK: call <7 x float> @dx.op.tertiary.v7f32(i32 46, <7 x float> [[fvec1]], <7 x float> [[fvec2]], <7 x float> [[fvec3]]) + %tmp111 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 162, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:161 col:11 + %tmp112 = fadd <7 x float> %tmp108, %tmp111 ; line:161 col:8 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) + ; CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) + ; CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) + ; CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + %tmp113 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:169 col:31 + %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp113) ; line:169 col:31 + %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:169 col:31 + %tmp116 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 24) ; line:169 col:31 + %tmp117 = load <7 x double>, <7 x double>* %tmp116 ; line:169 col:31 + %tmp118 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:170 col:31 + %tmp119 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp118) ; line:170 col:31 + %tmp120 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp119, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:170 col:31 + %tmp121 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp120, i32 25) ; line:170 col:31 + %tmp122 = load <7 x double>, <7 x double>* %tmp121 ; line:170 col:31 + %tmp123 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:171 col:31 + %tmp124 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp123) ; line:171 col:31 + %tmp125 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp124, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:171 col:31 + %tmp126 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp125, i32 26) ; line:171 col:31 + %tmp127 = load <7 x double>, <7 x double>* %tmp126 ; line:171 col:31 + + ; FMA operation. + ; CHECK: call <7 x double> @dx.op.tertiary.v7f64(i32 47, <7 x double> [[dvec1]], <7 x double> [[dvec2]], <7 x double> [[dvec3]]) + %tmp128 = call <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32 147, <7 x double> %tmp117, <7 x double> %tmp122, <7 x double> %tmp127) ; line:174 col:30 + %tmp129 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:176 col:3 + %tmp130 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp129) ; line:176 col:3 + %tmp131 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp130, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:176 col:3 + %tmp132 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp131, i32 0) ; line:176 col:3 + store <7 x half> %tmp46, <7 x half>* %tmp132 ; line:176 col:11 + %tmp133 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:177 col:3 + %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp133) ; line:177 col:3 + %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:177 col:3 + %tmp136 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp135, i32 0) ; line:177 col:3 + store <7 x float> %tmp112, <7 x float>* %tmp136 ; line:177 col:11 + %tmp137 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:178 col:3 + %tmp138 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp137) ; line:178 col:3 + %tmp139 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp138, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:178 col:3 + %tmp140 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp139, i32 0) ; line:178 col:3 + store <7 x double> %tmp128, <7 x double>* %tmp140 ; line:178 col:11 + %tmp141 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:179 col:3 + %tmp142 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp141) ; line:179 col:3 + %tmp143 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp142, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:179 col:3 + %tmp144 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp143, i32 0) ; line:179 col:3 + store <7 x i32> %tmp110, <7 x i32>* %tmp144 ; line:179 col:11 + %tmp145 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:180 col:3 + %tmp146 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp145) ; line:180 col:3 + %tmp147 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp146, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:180 col:3 + %tmp148 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp147, i32 0) ; line:180 col:3 + store <7 x i64> %tmp102, <7 x i64>* %tmp148 ; line:180 col:11 + ret void ; line:181 col:1 +} + +declare <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>, <7 x float>) #1 +declare <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>) #1 +declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32, <7 x float>) #1 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32, <7 x half>) #1 +declare <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32, <7 x float>, <7 x float>*) #0 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>, <7 x half>) #1 +declare <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32, <7 x i32>) #1 +declare <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32, <7 x i64>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32, <7 x i1>, <7 x i1>) #1 +declare <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32, <7 x i1>, <7 x i64>, <7 x i64>) #1 +declare float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>) #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32, <7 x i32>, <7 x i32>) #1 +declare <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32, <7 x double>, <7 x double>, <7 x double>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!pauseresume = !{!1} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !36} +!dx.entryPoints = !{!40} +!dx.fnprops = !{!52} +!dx.options = !{!53, !54} + +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!3 = !{i32 1, i32 9} +!4 = !{!"cs", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer >" undef, !6, %"class.RWStructuredBuffer >" undef, !11, %"class.RWStructuredBuffer >" undef, !16, %"class.RWStructuredBuffer >" undef, !21, %"class.RWStructuredBuffer >" undef, !26, %"class.RWStructuredBuffer >" undef, !31} +!6 = !{i32 14, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 8, i32 13, i32 7} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, <7 x half> undef} +!11 = !{i32 28, !12, !13} +!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9, i32 13, i32 7} +!13 = !{i32 0, !14} +!14 = !{!15} +!15 = !{i32 0, <7 x float> undef} +!16 = !{i32 56, !17, !18} +!17 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 10, i32 13, i32 7} +!18 = !{i32 0, !19} +!19 = !{!20} +!20 = !{i32 0, <7 x double> undef} +!21 = !{i32 28, !22, !23} +!22 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 1, i32 13, i32 7} +!23 = !{i32 0, !24} +!24 = !{!25} +!25 = !{i32 0, <7 x i1> undef} +!26 = !{i32 28, !27, !28} +!27 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5, i32 13, i32 7} +!28 = !{i32 0, !29} +!29 = !{!30} +!30 = !{i32 0, <7 x i32> undef} +!31 = !{i32 56, !32, !33} +!32 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 6, i32 13, i32 7} +!33 = !{i32 0, !34} +!34 = !{!35} +!35 = !{i32 0, <7 x i64> undef} +!36 = !{i32 1, void ()* @main, !37} +!37 = !{!38} +!38 = !{i32 1, !39, !39} +!39 = !{} +!40 = !{void ()* @main, !"main", null, !41, null} +!41 = !{null, !42, null, null} +!42 = !{!43, !45, !47, !49, !50, !51} +!43 = !{i32 0, %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A", !"hBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !44} +!44 = !{i32 1, i32 14} +!45 = !{i32 1, %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A", !"fBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!46 = !{i32 1, i32 28} +!47 = !{i32 2, %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A", !"dBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48} +!48 = !{i32 1, i32 56} +!49 = !{i32 3, %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A", !"bBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!50 = !{i32 4, %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A", !"uBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!51 = !{i32 5, %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A", !"lBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48} +!52 = !{void ()* @main, i32 5, i32 8, i32 1, i32 1} +!53 = !{i32 0} +!54 = !{i32 -1} +!59 = !{!60, !60, i64 0} +!60 = !{!"omnipotent char", !61, i64 0} +!61 = !{!"Simple C/C++ TBAA"}