diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 0b4c7218d4..7047d9fe59 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -96,16 +96,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { "unary", Attribute::ReadNone, 1, - {{0x7}}, - {{0x0}}}, // Overloads: hfd + {{0x407}}, + {{0x7}}}, // Overloads: hfd refArgs, Type *Ty, Type *RetTy, OP *hlslOP, IRBuilder<> &Builder) { @@ -459,12 +467,40 @@ Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, } } } -// Generates a DXIL operation over an overloaded type (Ty), returning a -// RetTy value; when Ty is a vector, it will replicate per-element operations -// into RetTy to rebuild it. + +// Creates a native vector call to for a "trivial" operation where only a single +// call instruction is needed. The overload and return types are the same vector +// type `Ty`. +// Utility objects `HlslOp` and `Builder` are used to create a call to the given +// `DxilFunc` with `RefArgs` arguments. +Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode, + ArrayRef Args, Type *Ty, OP *OP, + IRBuilder<> &Builder) { + if (!Ty->isVoidTy()) + return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode)); + return Builder.CreateCall(Func, Args); // Cannot add name to void. +} + +// Generates a DXIL operation with the overloaded type based on `Ty` and return +// type `RetTy`. When Ty is a vector, it will either generate per-element calls +// for each vector element and reconstruct the vector type from those results or +// operate on and return native vectors depending on vector size and the +// legality of the vector overload. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, Type *Ty, Type *RetTy, OP *hlslOP, IRBuilder<> &Builder) { + + // If supported and the overload type is a vector with more than 1 element, + // create a native vector operation. + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 1 && + hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() && + OP::IsOverloadLegal(opcode, Ty)) { + Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); + return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP, + Builder); + } + + // Set overload type to the scalar type of `Ty` and generate call(s). Type *EltTy = Ty->getScalarType(); Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy); @@ -484,20 +520,34 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B); } -Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - Type *Ty = src->getType(); +// Translate call that converts to a dxil unary operation with a different +// return type from the overload by passing the argument, explicit return type, +// and helper objects to the scalarizing unary dxil operation creation. +Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *, + bool &Translated) { + Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Type *Ty = Src->getType(); - Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); - Value *args[] = {opArg, src}; + IRBuilder<> Builder(CI); + hlsl::OP *OP = &Helper.hlslOP; + Type *RetTy = CI->getType(); + Constant *OpArg = OP->GetU32Const((unsigned)OpCode); + Value *Args[] = {OpArg, Src}; - return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder); + return TrivialDxilOperation(OpCode, Args, Ty, RetTy, OP, Builder); } -Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP, - Builder); +Value *TrivialDxilUnaryOperation(OP::OpCode OpCode, Value *Src, hlsl::OP *Op, + IRBuilder<> &Builder) { + Type *Ty = Src->getType(); + + Constant *OpArg = Op->GetU32Const((unsigned)OpCode); + Value *Args[] = {OpArg, Src}; + + return TrivialDxilOperation(OpCode, Args, Ty, Ty, Op, Builder); } Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, @@ -521,6 +571,9 @@ Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); } +// Translate call that trivially converts to a dxil unary operation by passing +// argument, return type, and helper objects to either scalarizing or native +// vector dxil operation creation depending on version and vector size. Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -528,11 +581,13 @@ Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); IRBuilder<> Builder(CI); hlsl::OP *hlslOP = &helper.hlslOP; - Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), - hlslOP, Builder); - return retVal; + + return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder); } +// Translate call that trivially converts to a dxil binary operation by passing +// arguments, return type, and helper objects to either scalarizing or native +// vector dxil operation creation depending on version and vector size. Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -547,6 +602,10 @@ Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return binOp; } +// Translate call that trivially converts to a dxil trinary (aka tertiary) +// operation by passing arguments, return type, and helper objects to either +// scalarizing or native vector dxil operation creation depending on version +// and vector size. Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -738,6 +797,12 @@ bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow, } } + // Only apply on aggregates of 16 or fewer elements, + // representing the max 4x4 matrix size. + Type *Ty = x->getType(); + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 16) + return false; + APFloat powAPF = isa(pow) ? cast(pow)->getElementAsAPFloat(0) : // should be a splat value @@ -2019,7 +2084,7 @@ Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *firstbitHi = - TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated); + TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated); // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi); IRBuilder<> Builder(CI); Constant *neg1 = Builder.getInt32(-1); @@ -2052,7 +2117,7 @@ Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *firstbitLo = - TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated); + TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated); return firstbitLo; } @@ -2431,17 +2496,22 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1, return dotOP; } -Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, - hlsl::OP *hlslOP, IRBuilder<> &Builder, - bool Unsigned = false) { - auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad; +// Instead of using a DXIL intrinsic, implement a dot product operation using +// multiply and add operations. Used for integer dots and long vectors. +Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, + IRBuilder<> &Builder, + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) { Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0); Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0); - Value *Result = Builder.CreateMul(Elt0, Elt1); - for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) { - Elt0 = Builder.CreateExtractElement(arg0, iVecElt); - Elt1 = Builder.CreateExtractElement(arg1, iVecElt); - Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, + Value *Result; + if (Elt0->getType()->isFloatingPointTy()) + Result = Builder.CreateFMul(Elt0, Elt1); + else + Result = Builder.CreateMul(Elt0, Elt1); + for (unsigned Elt = 1; Elt < vecSize; ++Elt) { + Elt0 = Builder.CreateExtractElement(arg0, Elt); + Elt1 = Builder.CreateExtractElement(arg1, Elt); + Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP, Builder); } @@ -2480,12 +2550,16 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, unsigned vecSize = Ty->getVectorNumElements(); Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); - if (Ty->getScalarType()->isFloatingPointTy()) { + Type *EltTy = Ty->getScalarType(); + if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 4) return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); - } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_udot); - } + + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; + if (IOP == IntrinsicOp::IOP_udot) + MadOpCode = DXIL::OpCode::UMad; + else if (EltTy->isFloatingPointTy()) + MadOpCode = DXIL::OpCode::FMad; + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); } Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -3032,8 +3106,10 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, if (arg0Ty->getScalarType()->isFloatingPointTy()) { return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_umul); + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; + if (IOP == IntrinsicOp::IOP_umul) + MadOpCode = DXIL::OpCode::UMad; + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); } } else { // mul(vector, scalar) == vector * scalar-splat @@ -6150,20 +6226,8 @@ Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateAnd(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateAnd(x, y); } Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6171,20 +6235,8 @@ Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateOr(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateOr(x, y); } Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6194,21 +6246,8 @@ Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltCond = Builder.CreateExtractElement(cond, i); - Value *EltTrue = Builder.CreateExtractElement(t, i); - Value *EltFalse = Builder.CreateExtractElement(f, i); - Value *tmp = Builder.CreateSelect(EltCond, EltTrue, EltFalse); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateSelect(cond, t, f); } } // namespace @@ -6467,18 +6506,20 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos}, {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos}, - {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, + {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet, DXIL::OpCode::Countbits}, {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes}, - {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX}, - {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddx, TrivialUnaryOperationRet, DXIL::OpCode::DerivCoarseX}, - {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperationRet, + DXIL::OpCode::DerivCoarseX}, + {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperationRet, DXIL::OpCode::DerivFineX}, - {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY}, - {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddy, TrivialUnaryOperationRet, + DXIL::OpCode::DerivCoarseY}, + {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperationRet, DXIL::OpCode::DerivCoarseY}, - {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperationRet, DXIL::OpCode::DerivFineY}, {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes}, diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 6796badcb6..72dd6d41aa 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6606,8 +6606,10 @@ bool HLSLExternalSource::MatchArguments( argTypes.clear(); const bool isVariadic = IsVariadicIntrinsicFunction(pIntrinsic); - static const UINT UnusedSize = 0xFF; - static const BYTE MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1; + static const uint32_t UnusedSize = std::numeric_limits::max(); + static const uint32_t MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1; + assert(MaxIntrinsicArgs < std::numeric_limits::max() && + "This should be a pretty small number"); #define CAB(cond, arg) \ { \ if (!(cond)) { \ @@ -6622,7 +6624,7 @@ bool HLSLExternalSource::MatchArguments( ArBasicKind ComponentType[MaxIntrinsicArgs]; // Component type for each argument, // AR_BASIC_UNKNOWN if unspecified. - UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UNUSED_INDEX32 + UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UnusedSize // if unspecified. badArgIdx = MaxIntrinsicArgs; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl new file mode 100644 index 0000000000..0b7f0d6b2f --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl @@ -0,0 +1,394 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=2 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=125 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=256 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=1024 %s | FileCheck %s + +// Test vector-enabled non-trivial intrinsics that take parameters of various types. + +RWByteAddressBuffer buf; +RWByteAddressBuffer ibuf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + vector hVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + vector fVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + vector dVec3 = buf.Load >(5120); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = ibuf.Load >(0); + vector sVec2 = ibuf.Load >(512); + vector sVec3 = ibuf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = ibuf.Load >(1025); + vector usVec2 = ibuf.Load >(1536); + vector usVec3 = ibuf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = ibuf.Load >(2049); + vector iVec2 = ibuf.Load >(2560); + vector iVec3 = ibuf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = ibuf.Load >(3073); + vector uiVec2 = ibuf.Load >(3584); + vector uiVec3 = ibuf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = ibuf.Load >(4097); + vector lVec2 = ibuf.Load >(4608); + vector lVec3 = ibuf.Load >(5120); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144 + // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = ibuf.Load >(5121); + vector ulVec2 = ibuf.Load >(5632); + vector ulVec3 = ibuf.Load >(6144); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 35, <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 36, <[[NUM]] x half> [[tmp]], <[[NUM]] x half> [[hvec3]]) ; FMin(a,b) + vector hRes = clamp(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 35, <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 36, <[[NUM]] x float> [[tmp]], <[[NUM]] x float> [[fvec3]]) ; FMin(a,b) + vector fRes = clamp(fVec1, fVec2, fVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 35, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 36, <[[NUM]] x double> [[tmp]], <[[NUM]] x double> [[dvec3]]) ; FMin(a,b) + vector dRes = clamp(dVec1, dVec2, dVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 38, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[svec3]]) ; IMin(a,b) + vector sRes = clamp(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 39, <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 40, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[usvec3]]) ; UMin(a,b) + vector usRes = clamp(usVec1, usVec2, usVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 38, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[ivec3]]) ; IMin(a,b) + vector iRes = clamp(iVec1, iVec2, iVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 39, <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 40, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[uivec3]]) ; UMin(a,b) + vector uiRes = clamp(uiVec1, uiVec2, uiVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 38, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[lvec3]]) ; IMin(a,b) + vector lRes = clamp(lVec1, lVec2, lVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 39, <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 40, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[ulvec3]]) ; UMin(a,b) + vector ulRes = clamp(ulVec1, ulVec2, ulVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec2]], [[hvec1]] + // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x half> zeroinitializer, <[[NUM]] x half> [[fvec2]], [[fvec1]] + // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x float> zeroinitializer, <[[NUM]] x float> [[hvec1]], @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp]]) ; Exp(value) + hRes += exp(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fmul fast <[[NUM]] x float> [[fvec1]], @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp]]) ; Exp(value) + fRes += exp(fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec1]]) ; Log(value) + // CHECK: fmul fast <[[NUM]] x half> [[tmp]], @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec1]]) ; Log(value) + // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[hvec2]], [[hvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec1]] + // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 7, <[[NUM]] x half> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], [[sat]] + // CHECK: fmul fast <[[NUM]] x half> [[mul]], [[sub]] + hRes += smoothstep(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x float> [[fvec2]], [[fvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec1]] + // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 7, <[[NUM]] x float> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], [[sat]] + // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]] + fRes += smoothstep(fVec1, fVec2, fVec3); + + // Intrinsics that expand into llvm ops. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fmul fast <[[NUM]] x half> [[hvec2]], [[fvec2]], [[hvec3]], [[fvec3]], [[fvec1]], zeroinitializer + // CHECK: [[f2i:%.*]] = bitcast <[[NUM]] x float> [[fvec1]] to <[[NUM]] x i32> + // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], [[and]], [[add]], [[shr]] to <[[NUM]] x float> + // CHECK: [[sel:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x float> [[i2f]], <[[NUM]] x float> zeroinitializer + // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], [[and]], exp = fVec3; + fRes += frexp(fVec1, exp); + fRes += exp; + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec2]] + // CHECK: fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]] + hRes += lerp(hVec2, hVec3, hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec2]] + // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]] + fRes += lerp(fVec2, fVec3, fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fdiv fast <[[NUM]] x half> , [[hvec1]] + hRes += rcp(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fdiv fast <[[NUM]] x float> , [[fvec1]] + fRes += rcp(fVec1); + + vector signs = 1; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x half> [[hvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x float> [[fvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x double> [[dvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x double> [[dvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(dVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i16> [[usvec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(usVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: [[sub:%.*]] = sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(iVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i32> [[uivec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(uiVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i64> [[lvec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i64> [[lvec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(lVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i64> [[ulvec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(ulVec2); + + iRes += signs; + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i16> [[svec1]], zeroinitializer + // CHECK: or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + sRes += or(sVec1, sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i16> [[svec3]], zeroinitializer + // CHECK: and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + sRes += and(sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: select <[[NUM]] x i1> [[bvec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]] + sRes += select(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); + + ibuf.Store >(0, sRes); + ibuf.Store >(1024, usRes); + ibuf.Store >(2048, iRes); + ibuf.Store >(3072, uiRes); + ibuf.Store >(4096, lRes); + ibuf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl new file mode 100644 index 0000000000..2ae3c92e85 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl @@ -0,0 +1,115 @@ +// RUN: %dxc -T lib_6_9 %s | FileCheck %s + +// Long vector tests for vec ops that scalarize to something more complex +// than a simple repetition of the same dx.op calls. + +// CHECK-LABEL: test_atan2 +// CHECK: fdiv fast <8 x float> +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> %{{.*}}) ; Atan(value) +// CHECK: fadd fast <8 x float> %{{.*}}, %{{.*}}, +// CHECK: fcmp fast oeq <8 x float> +// CHECK: fcmp fast oge <8 x float> +// CHECK: fcmp fast olt <8 x float> +// CHECK: and <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> +// CHECK: and <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> +// CHECK: and <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> vec1, vector vec2) { + vec1 = atan2(vec1, vec2); +} + +// CHECK-LABEL: test_fmod +// CHECK: fdiv fast <8 x float> +// CHECK: fsub fast <8 x float> +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 6, <8 x float> %{{.*}}) ; FAbs(value) +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 22, <8 x float> %{{.*}}) ; Frc(value) + +// CHECK: fsub fast <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> +// CHECK: fmul fast <8 x float> +export void test_fmod(inout vector vec1, vector vec2) { + vec1 = fmod(vec1, vec2); +} + +// CHECK-LABEL: test_ldexp +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> %{{.*}}) ; Exp(value) +// CHECK: fmul fast <8 x float> + +export void test_ldexp(inout vector vec1, vector vec2) { + vec1 = ldexp(vec1, vec2); +} + + +// CHECK-LABEL: test_pow +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> %{{.*}}) ; Log(value) +// CHECK: fmul fast <8 x float> +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> %{{.*}}) ; Exp(value) +export void test_pow(inout vector vec1, vector vec2) { + vec1 = pow(vec1, vec2); +} + +// CHECK-LABEL: test_modf +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 29, <8 x float> %{{.*}}) ; Round_z(value) +// CHECK: fsub fast <8 x float> +export void test_modf(inout vector vec1, vector vec2) { + vec1 = modf(vec1, vec2); +} + +// CHECK-LABEL: test_dot +// CHECK: [[el:%.*]] = extractelement <8 x float> +// CHECK: [[mul:%.*]] = fmul fast float [[el]] +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) +// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) +// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) +// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) +export void test_dot(inout vector vec1, vector vec2) { + vec1 = dot(vec1, vec2); +} + +// CHECK-LABEL: test_any +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +export void test_any(vector vec1, inout vector bvec) { + bvec &= any(vec1); +} + +// CHECK-LABEL: test_all +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +export void test_all(vector vec1, inout vector bvec) { + bvec &= all(vec1); +} + +// CHECK-LABEL: test_WaveMatch +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +export uint4 test_WaveMatch(vector bvec) { + return WaveMatch(bvec); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl new file mode 100644 index 0000000000..02cad5b894 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl @@ -0,0 +1,69 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=35 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=35 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=36 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=36 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled binary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]]) + vector hRes = FUNC(hVec1, hVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]]) + vector fRes = FUNC(fVec1, fVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]]) + vector dRes = FUNC(dVec1, dVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl new file mode 100644 index 0000000000..994246b753 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl @@ -0,0 +1,116 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=37 -DUOP=39 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=37 -DUOP=39 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=38 -DUOP=40 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=38 -DUOP=40 -DNUM=1022 %s | FileCheck %s + +#ifndef UOP +#define UOP OP +#endif + +// Test vector-enabled binary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode numbers. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(888, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]] + buf.Store(999, UOP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = buf.Load >(0); + vector sVec2 = buf.Load >(512); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = buf.Load >(1024); + vector usVec2 = buf.Load >(1536); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = buf.Load >(2048); + vector iVec2 = buf.Load >(2560); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = buf.Load >(3072); + vector uiVec2 = buf.Load >(3584); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = buf.Load >(4096); + vector lVec2 = buf.Load >(4608); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = buf.Load >(5120); + vector ulVec2 = buf.Load >(5632); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]]) + vector sRes = FUNC(sVec1, sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]]) + vector usRes = FUNC(usVec1, usVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]]) + vector iRes = FUNC(iVec1, iVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]]) + vector uiRes = FUNC(uiVec1, uiVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]]) + vector lRes = FUNC(lVec1, lVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]]) + vector ulRes = FUNC(ulVec1, ulVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl new file mode 100644 index 0000000000..6ebb511b00 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl @@ -0,0 +1,77 @@ +// The binary part of some of these is all just a vector math ops with as many unary dxops as elements. +// These will have apparent mismatches between the ARITY define and the check prefix. + +// RUN: %dxc -DFUNC=f16tof32 -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY +// RUN: %dxc -DFUNC=f32tof16 -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY +// RUN: %dxc -DFUNC=isfinite -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=isinf -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=isnan -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=countbits -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=firstbitlow -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=QuadReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossX -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossY -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossDiagonal -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=WaveActiveBitAnd -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveBitOr -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveBitXor -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveProduct -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveSum -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveMin -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveMax -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitAnd -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitOr -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitXor -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixProduct -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixSum -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WavePrefixSum -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WavePrefixProduct -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveReadLaneFirst -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveAllEqual -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE + +#ifndef TYPE +#define TYPE float +#endif + +#if ARITY == 1 +#define CALLARGS(x,y,z) x +#elif ARITY == 2 +#define CALLARGS(x,y,z) x, y +#elif ARITY == 3 +#define CALLARGS(x,y,z) x, y, z +// ARITY 4 is used for 1 vec + scalar +#elif ARITY == 4 +#define CALLARGS(x,y,z) x, i +// ARITY 5 is used for 1 vec + uint4 mask for wavemultiprefix* +#elif ARITY == 5 +#define CALLARGS(x,y,z) x, m +#endif + +StructuredBuffer< vector > buf; +ByteAddressBuffer rbuf; + +float4 main(uint i : SV_PrimitiveID, uint4 m : M) : SV_Target { + vector arg1 = rbuf.Load< vector >(i++*32); + vector arg2 = rbuf.Load< vector >(i++*32); + vector arg3 = rbuf.Load< vector >(i++*32); + + // UNARY: call {{.*}} [[DXOP:@dx.op.unary]] + // BINARY: call {{.*}} [[DXOP:@dx.op.binary]] + // TERTIARY: call {{.*}} [[DXOP:@dx.op.tertiary]] + // LEGACY: call {{.*}} [[DXOP:@dx.op.legacy]] + // SPECFLT: call {{.*}} [[DXOP:@dx.op.isSpecialFloat]] + // QUAD: call {{.*}} [[DXOP:@dx.op.quad]] + // WAVE: call {{.*}} [[DXOP:@dx.op.wave]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + + vector ret = FUNC(CALLARGS(arg1, arg2, arg3)); + return float4(ret[0] + ret[1], ret[2] + ret[3], ret[4] + ret[5], ret[6] + ret[7]); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl new file mode 100644 index 0000000000..e32ebc1db2 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl @@ -0,0 +1,86 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=46 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=46 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled ternary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +// Given that all we have at the moment are fmad and fma and the latter only takes doubles, +// fma is tacked on as an additional check. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + vector hVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + vector fVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + vector dVec3 = buf.Load >(5120); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.tertiary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]], <[[NUM]] x half> [[hvec3]]) + vector hRes = FUNC(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.tertiary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]], <[[NUM]] x float> [[fvec3]]) + vector fRes = FUNC(fVec1, fVec2, fVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]]) + vector dRes = FUNC(dVec1, dVec2, dVec3); + + // Tacked on fma() check since it only takes doubles. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 47, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]]) + vector dRes2 = fma(dVec1, dVec2, dVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); + buf.Store >(5120, dRes2); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl new file mode 100644 index 0000000000..50f98715e4 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl @@ -0,0 +1,131 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=48 -DUOP=49 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=48 -DUOP=49 -DNUM=1022 %s | FileCheck %s + +#ifndef UOP +#define UOP OP +#endif + +// Test vector-enabled tertiary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode numbers. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(888, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]] + buf.Store(999, UOP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = buf.Load >(0); + vector sVec2 = buf.Load >(512); + vector sVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = buf.Load >(1025); + vector usVec2 = buf.Load >(1536); + vector usVec3 = buf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = buf.Load >(2049); + vector iVec2 = buf.Load >(2560); + vector iVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = buf.Load >(3073); + vector uiVec2 = buf.Load >(3584); + vector uiVec3 = buf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = buf.Load >(4097); + vector lVec2 = buf.Load >(4608); + vector lVec3 = buf.Load >(5120); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144 + // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = buf.Load >(5121); + vector ulVec2 = buf.Load >(5632); + vector ulVec3 = buf.Load >(6144); + + // Test simple matching type overloads. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]]) + vector sRes = FUNC(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]], <[[NUM]] x i16> [[usvec3]]) + vector usRes = FUNC(usVec1, usVec2, usVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]], <[[NUM]] x i32> [[ivec3]]) + vector iRes = FUNC(iVec1, iVec2, iVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]], <[[NUM]] x i32> [[uivec3]]) + vector uiRes = FUNC(uiVec1, uiVec2, uiVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]], <[[NUM]] x i64> [[lvec3]]) + vector lRes = FUNC(lVec1, lVec2, lVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]], <[[NUM]] x i64> [[ulvec3]]) + vector ulRes = FUNC(ulVec1, ulVec2, ulVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl new file mode 100644 index 0000000000..91ab631a7e --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl @@ -0,0 +1,83 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin -DOP=13 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin -DOP=13 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan -DOP=14 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan -DOP=14 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled unary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec = buf.Load >(0); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[fvec:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec = buf.Load >(1024); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec]]) + vector hRes = FUNC(hVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec]]) + vector fRes = FUNC(fVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(1024, fRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl new file mode 100644 index 0000000000..ef0b250745 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl @@ -0,0 +1,86 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits -DOP=30 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits -DOP=30 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled unary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // Capture opcode number. + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec = buf.Load >(0); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[usvec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[ivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec = buf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[uivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[lvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec = buf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[ulvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec = buf.Load >(5120); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec]]) + vector sRes = FUNC(sVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[usvec]]) + vector usRes = FUNC(usVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec]]) + vector iRes = FUNC(iVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[uivec]]) + vector uiRes = FUNC(uiVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec]]) + vector lRes = FUNC(lVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[ulvec]]) + vector ulRes = FUNC(ulVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl new file mode 100644 index 0000000000..11d705305d --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl @@ -0,0 +1,186 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=13 %s | FileCheck %s + +// Source for dxilgen test CodeGenDXIL/passes/longvec-intrinsics.ll. +// Some targetted filecheck testing as an incidental. + +RWStructuredBuffer > hBuf; +RWStructuredBuffer > fBuf; +RWStructuredBuffer > dBuf; + +RWStructuredBuffer > bBuf; +RWStructuredBuffer > uBuf; +RWStructuredBuffer > lBuf; + +[numthreads(8,1,1)] +void main() { + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + vector fVec1 = fBuf[11]; + vector fVec2 = fBuf[12]; + vector fVec3 = fBuf[13]; + + // CHECK: [[tmp:%.*]] = call <13 x float> @dx.op.binary.v13f32(i32 35, <13 x float> [[fvec1]], <13 x float> [[fvec2]]) ; FMax(a,b) + // CHECK: call <13 x float> @dx.op.binary.v13f32(i32 36, <13 x float> [[tmp]], <13 x float> [[fvec3]]) ; FMin(a,b) + vector fRes = clamp(fVec1, fVec2, fVec3); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + vector hVec1 = hBuf[14]; + vector hVec2 = hBuf[15]; + vector hVec3 = hBuf[16]; + + // CHECK: [[tmp:%.*]] = fcmp fast olt <13 x half> [[hvec2]], [[hvec1]] + // CHECK: select <13 x i1> [[tmp]], <13 x half> zeroinitializer, <13 x half> hRes = step(hVec1, hVec2); + + // CHECK: [[tmp:%.*]] = fmul fast <13 x float> [[fvec1]], @dx.op.unary.v13f32(i32 21, <13 x float> [[tmp]]) ; Exp(value) + fRes += exp(fVec1); + + // CHECK: [[tmp:%.*]] = call <13 x half> @dx.op.unary.v13f16(i32 23, <13 x half> [[hvec1]]) ; Log(value) + // CHECK: fmul fast <13 x half> [[tmp]], [[fvec2]], [[fvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <13 x float> [[fvec3]], [[fvec1]] + // CHECK: [[div:%.*]] = fdiv fast <13 x float> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <13 x float> @dx.op.unary.v13f32(i32 7, <13 x float> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], [[sat]] + // CHECK: fmul fast <13 x float> [[mul]], [[sub]] + fRes += smoothstep(fVec1, fVec2, fVec3); + + // Intrinsics that expand into llvm ops. + + // CHECK: fmul fast <13 x float> [[fvec3]], [[fvec1]], zeroinitializer + // CHECK: [[f2i:%.*]] = bitcast <13 x float> [[fvec1]] to <13 x i32> + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], [[and]], [[add]], [[shr]] to <13 x float> + // CHECK: [[sel:%.*]] = select <13 x i1> [[cmp]], <13 x float> [[i2f]], <13 x float> zeroinitializer + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], [[and]], exp = fVec3; + fRes += frexp(fVec1, exp); + fRes += exp; + + // CHECK: [[tmp:%.*]] = fsub fast <13 x half> [[hvec3]], [[hvec2]] + // CHECK: fmul fast <13 x half> [[tmp]], [[hvec1]] + hRes += lerp(hVec2, hVec3, hVec1); + + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) + // CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) + // CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + vector uVec1 = uBuf[17]; + vector uVec2 = uBuf[18]; + + vector signs = 1; + // CHECK: [[cmp:%.*]] = icmp ne <13 x i32> [[uvec2]], zeroinitializer + // CHECK: zext <13 x i1> [[cmp]] to <13 x i32> + signs *= sign(uVec2); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 + vector lVec1 = lBuf[19]; + vector lVec2 = lBuf[20]; + + // CHECK: [[gt:%.*]] = icmp sgt <13 x i64> [[lvec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <13 x i64> [[lvec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <13 x i1> [[gt]] to <13 x i32> + // CHECK: [[ilt:%.*]] = zext <13 x i1> [[lt]] to <13 x i32> + // CHECK: sub nsw <13 x i32> [[igt]], [[ilt]] + signs *= sign(lVec2); + + vector uRes = signs; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec1:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec2:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec3:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + vector bVec1 = bBuf[21]; + vector bVec2 = bBuf[22]; + vector bVec3 = bBuf[23]; + + // CHECK: [[bvec2:%.*]] = icmp ne <13 x i32> [[vec2]], zeroinitializer + // CHECK: [[bvec1:%.*]] = icmp ne <13 x i32> [[vec1]], zeroinitializer + // CHECK: or <13 x i1> [[bvec2]], [[bvec1]] + uRes += or(bVec1, bVec2); + + // CHECK: [[bvec3:%.*]] = icmp ne <13 x i32> [[vec3]], zeroinitializer + // CHECK: and <13 x i1> [[bvec3]], [[bvec2]] + uRes += and(bVec2, bVec3); + + // CHECK: select <13 x i1> [[bvec3]], <13 x i64> [[lvec1]], <13 x i64> [[lvec2]] + vector lRes = select(bVec3, lVec1, lVec2); + + // CHECK: [[el1:%.*]] = extractelement <13 x float> [[fvec1]] + // CHECK: [[el2:%.*]] = extractelement <13 x float> [[fvec2]] + // CHECK: [[mul:%.*]] = fmul fast float [[el2]], [[el1]] + // CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) + // CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad1]]) ; FMad(a,b,c) + // CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad2]]) ; FMad(a,b,c) + // CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad3]]) ; FMad(a,b,c) + // CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad4]]) ; FMad(a,b,c) + // CHECK: [[mad6:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad5]]) ; FMad(a,b,c) + // CHECK: [[mad7:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad6]]) ; FMad(a,b,c) + // CHECK: [[mad8:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad7]]) ; FMad(a,b,c) + // CHECK: [[mad9:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad8]]) ; FMad(a,b,c) + // CHECK: [[mad10:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad9]]) ; FMad(a,b,c) + // CHECK: [[mad11:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad10]]) ; FMad(a,b,c) + // CHECK: [[mad12:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad11]]) ; FMad(a,b,c) + fRes += dot(fVec1, fVec2); + + // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 17, <13 x float> [[fvec1]]) ; Atan(value) + fRes += atan(fVec1); + + // CHECK: call <13 x i32> @dx.op.binary.v13i32(i32 40, <13 x i32> [[uvec1]], <13 x i32> [[uvec2]]) ; UMin(a,b) + uRes += min(uVec1, uVec2); + + // CHECK: call <13 x float> @dx.op.tertiary.v13f32(i32 46, <13 x float> [[fvec1]], <13 x float> [[fvec2]], <13 x float> [[fvec3]]) ; FMad(a,b,c) + fRes += mad(fVec1, fVec2, fVec3); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + vector dVec1 = dBuf[24]; + vector dVec2 = dBuf[25]; + vector dVec3 = dBuf[26]; + + // CHECK: call <13 x double> @dx.op.tertiary.v13f64(i32 47, <13 x double> [[dvec1]], <13 x double> [[dvec2]], <13 x double> [[dvec3]]) + vector dRes = fma(dVec1, dVec2, dVec3); + + hBuf[0] = hRes; + fBuf[0] = fRes; + dBuf[0] = dRes; + uBuf[0] = uRes; + lBuf[0] = lRes; +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll new file mode 100644 index 0000000000..8f9dcbbdbc --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll @@ -0,0 +1,434 @@ +; RUN: %dxopt %s -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer >" = type { <7 x half> } +%"class.RWStructuredBuffer >" = type { <7 x float> } +%"class.RWStructuredBuffer >" = type { <7 x double> } +%"class.RWStructuredBuffer >" = type { <7 x i32> } +%"class.RWStructuredBuffer >" = type { <7 x i32> } +%"class.RWStructuredBuffer >" = type { <7 x i64> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" = external global %"class.RWStructuredBuffer >", align 2 +@"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" = external global %"class.RWStructuredBuffer >", align 8 +@"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" = external global %"class.RWStructuredBuffer >", align 8 + +; CHECK-LABEL: define void @main() +define void @main() #0 { +bb: + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) + ; CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) + ; CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) + ; CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + + %exp = alloca <7 x float>, align 4 + %tmp = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:23 col:30 + %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp) ; line:23 col:30 + %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:23 col:30 + %tmp3 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 11) ; line:23 col:30 + %tmp4 = load <7 x float>, <7 x float>* %tmp3 ; line:23 col:30 + %tmp5 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:24 col:30 + %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp5) ; line:24 col:30 + %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:24 col:30 + %tmp8 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp7, i32 12) ; line:24 col:30 + %tmp9 = load <7 x float>, <7 x float>* %tmp8 ; line:24 col:30 + %tmp10 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:25 col:30 + %tmp11 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp10) ; line:25 col:30 + %tmp12 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:25 col:30 + %tmp13 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp12, i32 13) ; line:25 col:30 + %tmp14 = load <7 x float>, <7 x float>* %tmp13 ; line:25 col:30 + + ; Clamp operation. + ; CHECK: [[max:%.*]] = call <7 x float> @dx.op.binary.v7f32(i32 35, <7 x float> [[fvec1]], <7 x float> [[fvec2]]) + ; CHECK: call <7 x float> @dx.op.binary.v7f32(i32 36, <7 x float> [[max]], <7 x float> [[fvec3]]) + %tmp15 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 119, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:29 col:29 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) + ; CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) + ; CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) + ; CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + %tmp16 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:37 col:34 + %tmp17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp16) ; line:37 col:34 + %tmp18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp17, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:37 col:34 + %tmp19 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp18, i32 14) ; line:37 col:34 + %tmp20 = load <7 x half>, <7 x half>* %tmp19 ; line:37 col:34 + %tmp21 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:38 col:34 + %tmp22 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp21) ; line:38 col:34 + %tmp23 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp22, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:38 col:34 + %tmp24 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp23, i32 15) ; line:38 col:34 + %tmp25 = load <7 x half>, <7 x half>* %tmp24 ; line:38 col:34 + %tmp26 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:39 col:34 + %tmp27 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp26) ; line:39 col:34 + %tmp28 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp27, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:39 col:34 + %tmp29 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp28, i32 16) ; line:39 col:34 + %tmp30 = load <7 x half>, <7 x half>* %tmp29 ; line:39 col:34 + + ; Step operation. + ; CHECK: [[cmp:%.*]] = fcmp fast olt <7 x half> [[hvec2]], [[hvec1]] + ; CHECK: select <7 x i1> [[cmp]], <7 x half> zeroinitializer, <7 x half> + %tmp31 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32 192, <7 x half> %tmp20, <7 x half> %tmp25) ; line:43 col:33 + + ; Exp operation. + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> , [[fvec1]] + ; CHECK call <7 x float> @dx.op.unary.v7f32(i32 21, <7 x float> [[mul]]) + %tmp32 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 139, <7 x float> %tmp4) ; line:47 col:11 + %tmp33 = fadd <7 x float> %tmp15, %tmp32 ; line:47 col:8 + + ; Log operation. + ; CHECK: [[log:%.*]] = call <7 x half> @dx.op.unary.v7f16(i32 23, <7 x half> [[hvec1]]) + ; CHECK: fmul fast <7 x half> , [[log]] + %tmp34 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32 159, <7 x half> %tmp20) ; line:51 col:11 + %tmp35 = fadd <7 x half> %tmp31, %tmp34 ; line:51 col:8 + + ; Smoothstep operation. + ; CHECK: [[sub1:%.*]] = fsub fast <7 x float> [[fvec2]], [[fvec1]] + ; CHECK: [[sub2:%.*]] = fsub fast <7 x float> [[fvec3]], [[fvec1]] + ; CHECK: [[div:%.*]] = fdiv fast <7 x float> [[sub2]], [[sub1]] + ; CHECK: [[sat:%.*]] = call <7 x float> @dx.op.unary.v7f32(i32 7, <7 x float> [[div]]) + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], + ; CHECK: [[sub:%.*]] = fsub fast <7 x float> , [[mul]] + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], [[sub]] + ; CHECK: fmul fast <7 x float> %Saturate, [[mul]] + %tmp36 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 189, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:61 col:11 + %tmp37 = fadd <7 x float> %tmp33, %tmp36 ; line:61 col:8 + + ; Radians operation. + ; CHECK: fmul fast <7 x float> , [[fvec3]] + %tmp38 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 176, <7 x float> %tmp14) ; line:66 col:11 + %tmp39 = fadd <7 x float> %tmp37, %tmp38 ; line:66 col:8 + store <7 x float> %tmp14, <7 x float>* %exp, align 4 ; line:77 col:22 + + ; Frexp operation. + ; CHECK: [[cmp:%.*]] = fcmp fast une <7 x float> [[fvec1]], zeroinitializer + ; CHECK: [[ext:%.*]] = sext <7 x i1> [[cmp]] to <7 x i32> + ; CHECK: [[bct:%.*]] = bitcast <7 x float> [[fvec1]] to <7 x i32> + ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], + ; CHECK: [[add:%.*]] = add <7 x i32> [[and]], + ; CHECK: [[and:%.*]] = and <7 x i32> [[add]], [[ext]] + ; CHECK: [[shr:%.*]] = ashr <7 x i32> [[and]], + ; CHECK: [[i2f:%.*]] = sitofp <7 x i32> [[shr]] to <7 x float> + ; CHECK: store <7 x float> [[i2f]], <7 x float>* %exp + ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], + ; CHECK: [[or:%.*]] = or <7 x i32> [[and]], + ; CHECK: [[and:%.*]] = and <7 x i32> [[or]], [[ext]] + ; CHECK: bitcast <7 x i32> [[and]] to <7 x float> + %tmp41 = call <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32 150, <7 x float> %tmp4, <7 x float>* %exp) ; line:78 col:11 + %tmp42 = fadd <7 x float> %tmp39, %tmp41 ; line:78 col:8 + %tmp43 = load <7 x float>, <7 x float>* %exp, align 4 ; line:79 col:11 + %tmp44 = fadd <7 x float> %tmp42, %tmp43 ; line:79 col:8 + + ; Lerp operation. + ; CHECK: [[sub:%.*]] = fsub fast <7 x half> [[hvec3]], [[hvec2]] + ; CHECK: fmul fast <7 x half> [[hvec1]], [[sub]] + %tmp45 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32 157, <7 x half> %tmp25, <7 x half> %tmp30, <7 x half> %tmp20) ; line:83 col:11 + %tmp46 = fadd <7 x half> %tmp35, %tmp45 ; line:83 col:8 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) + ; CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) + ; CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + %tmp47 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:90 col:29 + %tmp48 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp47) ; line:90 col:29 + %tmp49 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp48, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:90 col:29 + %tmp50 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp49, i32 17) ; line:90 col:29 + %tmp51 = load <7 x i32>, <7 x i32>* %tmp50 ; line:90 col:29 + %tmp52 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:91 col:29 + %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp52) ; line:91 col:29 + %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:91 col:29 + %tmp55 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 18) ; line:91 col:29 + %tmp56 = load <7 x i32>, <7 x i32>* %tmp55 ; line:91 col:29 + + ; Unsigned int sign operation. + ; CHECK: [[cmp:%.*]] = icmp ne <7 x i32> [[uvec2]], zeroinitializer + ; CHECK: zext <7 x i1> [[cmp]] to <7 x i32> + %tmp57 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32 355, <7 x i32> %tmp56) ; line:96 col:12 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) + ; CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) + ; CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0 + %tmp58 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:102 col:32 + %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp58) ; line:102 col:32 + %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:102 col:32 + %tmp61 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp60, i32 19) ; line:102 col:32 + %tmp62 = load <7 x i64>, <7 x i64>* %tmp61 ; line:102 col:32 + %tmp63 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:103 col:32 + %tmp64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp63) ; line:103 col:32 + %tmp65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp64, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:103 col:32 + %tmp66 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp65, i32 20) ; line:103 col:32 + %tmp67 = load <7 x i64>, <7 x i64>* %tmp66 ; line:103 col:32 + + ; Signed int sign operation. + ; CHECK: [[lt1:%.*]] = icmp slt <7 x i64> zeroinitializer, [[lvec2]] + ; CHECK: [[lt2:%.*]] = icmp slt <7 x i64> [[lvec2]], zeroinitializer + ; CHECK: [[ilt1:%.*]] = zext <7 x i1> [[lt1]] to <7 x i32> + ; CHECK: [[ilt2:%.*]] = zext <7 x i1> [[lt2]] to <7 x i32> + ; CHECK: sub <7 x i32> [[ilt1]], [[ilt2]] + %tmp68 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32 185, <7 x i64> %tmp67) ; line:110 col:12 + %tmp69 = mul <7 x i32> %tmp57, %tmp68 ; line:110 col:9 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec1:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec2:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec3:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + %tmp70 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:126 col:29 + %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp70) ; line:126 col:29 + %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:126 col:29 + %tmp73 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 21) ; line:126 col:29 + %tmp74 = load <7 x i32>, <7 x i32>* %tmp73 ; line:126 col:29 + %tmp75 = icmp ne <7 x i32> %tmp74, zeroinitializer ; line:126 col:29 + %tmp76 = zext <7 x i1> %tmp75 to <7 x i32> ; line:126 col:21 + %tmp77 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:127 col:29 + %tmp78 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp77) ; line:127 col:29 + %tmp79 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp78, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:127 col:29 + %tmp80 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp79, i32 22) ; line:127 col:29 + %tmp81 = load <7 x i32>, <7 x i32>* %tmp80 ; line:127 col:29 + %tmp82 = icmp ne <7 x i32> %tmp81, zeroinitializer ; line:127 col:29 + %tmp83 = zext <7 x i1> %tmp82 to <7 x i32> ; line:127 col:21 + %tmp84 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:128 col:29 + %tmp85 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp84) ; line:128 col:29 + %tmp86 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp85, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:128 col:29 + %tmp87 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp86, i32 23) ; line:128 col:29 + %tmp88 = load <7 x i32>, <7 x i32>* %tmp87 ; line:128 col:29 + %tmp89 = icmp ne <7 x i32> %tmp88, zeroinitializer ; line:128 col:29 + %tmp90 = zext <7 x i1> %tmp89 to <7 x i32> ; line:128 col:21 + + + ; Or() operation. + ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer + ; CHECK: [[bvec1:%.*]] = icmp ne <7 x i32> [[vec1]], zeroinitializer + ; CHECK: or <7 x i1> [[bvec1]], [[bvec2]] + %tmp91 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:133 col:21 + %tmp92 = icmp ne <7 x i32> %tmp76, zeroinitializer ; line:133 col:14 + %tmp93 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 169, <7 x i1> %tmp92, <7 x i1> %tmp91) ; line:133 col:11 + %tmp94 = zext <7 x i1> %tmp93 to <7 x i32> ; line:133 col:11 + %tmp95 = add <7 x i32> %tmp69, %tmp94 ; line:133 col:8 + + ; And() operation. + ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer + ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer + ; CHECK: and <7 x i1> [[bvec2]], [[bvec3]] + %tmp96 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:137 col:22 + %tmp97 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:137 col:15 + %tmp98 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 106, <7 x i1> %tmp97, <7 x i1> %tmp96) ; line:137 col:11 + %tmp99 = zext <7 x i1> %tmp98 to <7 x i32> ; line:137 col:11 + %tmp100 = add <7 x i32> %tmp95, %tmp99 ; line:137 col:8 + + ; Select() operation. + ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer + ; CHECK: select <7 x i1> [[bvec3]], <7 x i64> [[lvec1]], <7 x i64> [[lvec2]] + %tmp101 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:140 col:38 + %tmp102 = call <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32 184, <7 x i1> %tmp101, <7 x i64> %tmp62, <7 x i64> %tmp67) ; line:140 col:31 + %tmp103 = call float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32 134, <7 x float> %tmp4, <7 x float> %tmp9) ; line:152 col:11 + + ; Dot operation. + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 0 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 0 + ; CHECK: [[mul:%.*]] = fmul fast float [[el1]], [[el2]] + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 1 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 1 + ; CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mul]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 2 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 2 + ; CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad1]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 3 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 3 + ; CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad2]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 4 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 4 + ; CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad3]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 5 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 5 + ; CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad4]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 6 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 6 + ; CHECK: call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad5]]) + %tmp104 = insertelement <7 x float> undef, float %tmp103, i32 0 ; line:152 col:11 + %tmp105 = shufflevector <7 x float> %tmp104, <7 x float> undef, <7 x i32> zeroinitializer ; line:152 col:11 + %tmp106 = fadd <7 x float> %tmp44, %tmp105 ; line:152 col:8 + + ; Atan operation. + ; CHECK: call <7 x float> @dx.op.unary.v7f32(i32 17, <7 x float> [[fvec1]]) + %tmp107 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 116, <7 x float> %tmp4) ; line:155 col:11 + %tmp108 = fadd <7 x float> %tmp106, %tmp107 ; line:155 col:8 + + ; Min operation. + ; CHECK: call <7 x i32> @dx.op.binary.v7i32(i32 40, <7 x i32> [[uvec1]], <7 x i32> [[uvec2]]) + %tmp109 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32 353, <7 x i32> %tmp51, <7 x i32> %tmp56) ; line:158 col:11 + %tmp110 = add <7 x i32> %tmp100, %tmp109 ; line:158 col:8 + + ; Mad operation. + ; CHECK: call <7 x float> @dx.op.tertiary.v7f32(i32 46, <7 x float> [[fvec1]], <7 x float> [[fvec2]], <7 x float> [[fvec3]]) + %tmp111 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 162, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:161 col:11 + %tmp112 = fadd <7 x float> %tmp108, %tmp111 ; line:161 col:8 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) + ; CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) + ; CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) + ; CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + %tmp113 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:169 col:31 + %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp113) ; line:169 col:31 + %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:169 col:31 + %tmp116 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 24) ; line:169 col:31 + %tmp117 = load <7 x double>, <7 x double>* %tmp116 ; line:169 col:31 + %tmp118 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:170 col:31 + %tmp119 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp118) ; line:170 col:31 + %tmp120 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp119, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:170 col:31 + %tmp121 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp120, i32 25) ; line:170 col:31 + %tmp122 = load <7 x double>, <7 x double>* %tmp121 ; line:170 col:31 + %tmp123 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:171 col:31 + %tmp124 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp123) ; line:171 col:31 + %tmp125 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp124, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:171 col:31 + %tmp126 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp125, i32 26) ; line:171 col:31 + %tmp127 = load <7 x double>, <7 x double>* %tmp126 ; line:171 col:31 + + ; FMA operation. + ; CHECK: call <7 x double> @dx.op.tertiary.v7f64(i32 47, <7 x double> [[dvec1]], <7 x double> [[dvec2]], <7 x double> [[dvec3]]) + %tmp128 = call <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32 147, <7 x double> %tmp117, <7 x double> %tmp122, <7 x double> %tmp127) ; line:174 col:30 + %tmp129 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:176 col:3 + %tmp130 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp129) ; line:176 col:3 + %tmp131 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp130, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:176 col:3 + %tmp132 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp131, i32 0) ; line:176 col:3 + store <7 x half> %tmp46, <7 x half>* %tmp132 ; line:176 col:11 + %tmp133 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:177 col:3 + %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp133) ; line:177 col:3 + %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:177 col:3 + %tmp136 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp135, i32 0) ; line:177 col:3 + store <7 x float> %tmp112, <7 x float>* %tmp136 ; line:177 col:11 + %tmp137 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:178 col:3 + %tmp138 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp137) ; line:178 col:3 + %tmp139 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp138, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:178 col:3 + %tmp140 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp139, i32 0) ; line:178 col:3 + store <7 x double> %tmp128, <7 x double>* %tmp140 ; line:178 col:11 + %tmp141 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:179 col:3 + %tmp142 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp141) ; line:179 col:3 + %tmp143 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp142, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:179 col:3 + %tmp144 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp143, i32 0) ; line:179 col:3 + store <7 x i32> %tmp110, <7 x i32>* %tmp144 ; line:179 col:11 + %tmp145 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:180 col:3 + %tmp146 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp145) ; line:180 col:3 + %tmp147 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp146, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:180 col:3 + %tmp148 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp147, i32 0) ; line:180 col:3 + store <7 x i64> %tmp102, <7 x i64>* %tmp148 ; line:180 col:11 + ret void ; line:181 col:1 +} + +declare <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>, <7 x float>) #1 +declare <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>) #1 +declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32, <7 x float>) #1 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32, <7 x half>) #1 +declare <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32, <7 x float>, <7 x float>*) #0 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>, <7 x half>) #1 +declare <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32, <7 x i32>) #1 +declare <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32, <7 x i64>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32, <7 x i1>, <7 x i1>) #1 +declare <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32, <7 x i1>, <7 x i64>, <7 x i64>) #1 +declare float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>) #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32, <7 x i32>, <7 x i32>) #1 +declare <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32, <7 x double>, <7 x double>, <7 x double>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!pauseresume = !{!1} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !36} +!dx.entryPoints = !{!40} +!dx.fnprops = !{!52} +!dx.options = !{!53, !54} + +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!3 = !{i32 1, i32 9} +!4 = !{!"cs", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer >" undef, !6, %"class.RWStructuredBuffer >" undef, !11, %"class.RWStructuredBuffer >" undef, !16, %"class.RWStructuredBuffer >" undef, !21, %"class.RWStructuredBuffer >" undef, !26, %"class.RWStructuredBuffer >" undef, !31} +!6 = !{i32 14, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 8, i32 13, i32 7} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, <7 x half> undef} +!11 = !{i32 28, !12, !13} +!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9, i32 13, i32 7} +!13 = !{i32 0, !14} +!14 = !{!15} +!15 = !{i32 0, <7 x float> undef} +!16 = !{i32 56, !17, !18} +!17 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 10, i32 13, i32 7} +!18 = !{i32 0, !19} +!19 = !{!20} +!20 = !{i32 0, <7 x double> undef} +!21 = !{i32 28, !22, !23} +!22 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 1, i32 13, i32 7} +!23 = !{i32 0, !24} +!24 = !{!25} +!25 = !{i32 0, <7 x i1> undef} +!26 = !{i32 28, !27, !28} +!27 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5, i32 13, i32 7} +!28 = !{i32 0, !29} +!29 = !{!30} +!30 = !{i32 0, <7 x i32> undef} +!31 = !{i32 56, !32, !33} +!32 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 6, i32 13, i32 7} +!33 = !{i32 0, !34} +!34 = !{!35} +!35 = !{i32 0, <7 x i64> undef} +!36 = !{i32 1, void ()* @main, !37} +!37 = !{!38} +!38 = !{i32 1, !39, !39} +!39 = !{} +!40 = !{void ()* @main, !"main", null, !41, null} +!41 = !{null, !42, null, null} +!42 = !{!43, !45, !47, !49, !50, !51} +!43 = !{i32 0, %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A", !"hBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !44} +!44 = !{i32 1, i32 14} +!45 = !{i32 1, %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A", !"fBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!46 = !{i32 1, i32 28} +!47 = !{i32 2, %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A", !"dBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48} +!48 = !{i32 1, i32 56} +!49 = !{i32 3, %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A", !"bBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!50 = !{i32 4, %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A", !"uBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!51 = !{i32 5, %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A", !"lBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48} +!52 = !{void ()* @main, i32 5, i32 8, i32 1, i32 1} +!53 = !{i32 0} +!54 = !{i32 -1} +!59 = !{!60, !60, i64 0} +!60 = !{!"omnipotent char", !61, i64 0} +!61 = !{!"Simple C/C++ TBAA"} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 0008b752b1..a6cc52df1a 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1503,7 +1503,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1537,7 +1537,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hf", + "hf<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1554,7 +1554,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the reverse bit pattern of the input value", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1601,7 +1601,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1619,7 +1619,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1674,7 +1674,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "hfd", + "hfd<", "rn", [ db_dxil_param( @@ -1691,7 +1691,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "d", + "d<", "rn", [ db_dxil_param( @@ -1715,7 +1715,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs an integral " + i, - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "the operation result"), @@ -2608,7 +2608,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2626,7 +2626,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2644,7 +2644,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2662,7 +2662,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf", + "hf<", "rn", [ db_dxil_param(