From 39f80672e71dc8b3610eb30002ce46ed2e278f51 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Wed, 7 May 2025 23:40:13 -0700 Subject: [PATCH 01/16] Refactor udt intrinsic arg copy to before SROA Intrinsics that take UDT arguments need copy-in/copy-out. Other aggregate args are flattened for intrinsic calls. Previously, these operations were intermingled, driven by SROA on alloca/GV values. There were RayDesc arguments that weren't treated consistently, leading to problems. They should be flattened into the intrinsic arguments, but TraceRay calls didn't do this. Doing this for TraceRay would cause an issue where the subsequent Payload UDT arg could be in a different position depending on whether it was processed before or after flattening the RayDesc. This change is in preparation for flattening RayDesc args. It separates the UDT copy-in/copy-out into a separate operation before SROA. When doing the copy-in/copy-out separately, we don't expand the memcpy operations until they are handled separately during SROA. This causes a change in some IR shape, which had to be adjusted for in some tests. There are a couple remaining PIX pass tests that still need adjustment. These will likely need further changes when flattening RayDesc, so maybe they should be revisited after that. TODO: - Fix the remaining tests - Flatten the RayDesc parameters - Adjust tests for accordingly --- include/dxc/HLSL/HLOperations.h | 19 +- lib/HLSL/HLOperationLower.cpp | 2 +- .../Scalar/ScalarReplAggregatesHLSL.cpp | 218 +++++++++++------- .../hitobject_fromrayquery_scalarrepl.ll | 8 +- ...raytracing_intersection_geometryIndex.hlsl | 8 +- 5 files changed, 159 insertions(+), 96 deletions(-) diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index c75318da99..55843ffb67 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -396,7 +396,10 @@ const unsigned kAnnotateHandleResourceTypeOpIdx = 3; // TraceRay. const unsigned kTraceRayRayDescOpIdx = 7; -const unsigned kTraceRayPayLoadOpIdx = 8; +// kTraceRayPayloadPreOpIdx is before flattening the RayDesc +const unsigned kTraceRayPayloadPreOpIdx = 8; +// kTraceRayPayloadOpIdx is after flattening the RayDesc +const unsigned kTraceRayPayloadOpIdx = 8; // TODO: Update after flattening // AllocateRayQuery const unsigned kAllocateRayQueryRayFlagsIdx = 1; @@ -434,12 +437,22 @@ const unsigned kAnnotateNodeHandleNodePropIdx = 2; const unsigned kAnnotateNodeRecordHandleNodeRecordPropIdx = 2; // HitObject::MakeMiss -const unsigned kHitObjectMakeMiss_NumOp = 8; +const unsigned kHitObjectMakeMiss_PreNumOp = 8; +const unsigned kHitObjectMakeMiss_NumOp = 8; // TODO: Update after flattening const unsigned kHitObjectMakeMissRayDescOpIdx = 4; // HitObject::TraceRay const unsigned kHitObjectTraceRay_RayDescOpIdx = 8; -const unsigned kHitObjectTraceRay_NumOp = 10; +// kHitObjectTraceRay_PayloadPreOpIdx is before flattening the RayDesc +const unsigned kHitObjectTraceRay_PayloadPreOpIdx = 9; +// kHitObjectTraceRay_PayloadOpIdx is after flattening the RayDesc +const unsigned kHitObjectTraceRay_PayloadOpIdx = 9; // TODO: Update after + // flattening +const unsigned kHitObjectTraceRay_PreNumOp = 10; +const unsigned kHitObjectTraceRay_NumOp = 10; // TODO: Update after flattening + +// HitObject::Invoke +const unsigned kHitObjectInvoke_PayloadOpIdx = 2; // HitObject::FromRayQuery const unsigned kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx = 4; diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 18d003a764..7db160e86f 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -5760,7 +5760,7 @@ Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, hlsl::OP *OP = &Helper.hlslOP; Value *RayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx); - Value *PayLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx); + Value *PayLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayloadOpIdx); Value *Args[DXIL::OperandIndex::kTraceRayNumOp]; Args[0] = OP->GetU32Const(static_cast(OpCode)); diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 8bd78dd9a6..5a0bf5392d 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -129,7 +129,6 @@ class SROA_Helper { void RewriteMemIntrin(MemIntrinsic *MI, Value *OldV); void RewriteCall(CallInst *CI); void RewriteBitCast(BitCastInst *BCI); - void RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, bool bOut); }; } // namespace @@ -1478,6 +1477,46 @@ void isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, } } +static bool isUDTIntrinsicArg(CallInst *CI, unsigned OpIdx) { + if (HLOpcodeGroup::HLIntrinsic != GetHLOpcodeGroup(CI->getCalledFunction())) + return false; + switch (static_cast(GetHLOpcode(CI))) { + case IntrinsicOp::IOP_TraceRay: + if (OpIdx == HLOperandIndex::kTraceRayRayDescOpIdx) + return true; + if (OpIdx == HLOperandIndex::kTraceRayPayloadPreOpIdx) + return true; + break; + case IntrinsicOp::IOP_ReportHit: + if (OpIdx == HLOperandIndex::kReportIntersectionAttributeOpIdx) + return true; + break; + case IntrinsicOp::IOP_CallShader: + if (OpIdx == HLOperandIndex::kCallShaderPayloadOpIdx) + return true; + break; + case IntrinsicOp::MOP_DxHitObject_FromRayQuery: + if (OpIdx == + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx) + return true; + break; + case IntrinsicOp::MOP_DxHitObject_TraceRay: + // TODO: Remove RayDesc for flattening + if (OpIdx == HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx) + return true; + if (OpIdx == HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx) + return true; + break; + case IntrinsicOp::MOP_DxHitObject_Invoke: + if (OpIdx == HLOperandIndex::kHitObjectInvoke_PayloadOpIdx) + return true; + break; + default: + break; + } + return false; +} + /// isSafeForScalarRepl - Check if instruction I is a safe use with regard to /// performing scalar replacement of alloca AI. The results are flagged in /// the Info parameter. Offset indicates the position within AI that is @@ -1536,15 +1575,9 @@ void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info) { if (HLOpcodeGroup::NotHL == group) return MarkUnsafe(Info, User); else if (HLOpcodeGroup::HLIntrinsic == group) { - // TODO: should we check HL parameter type for UDT overload instead of - // basing on IOP? - IntrinsicOp opcode = static_cast(GetHLOpcode(CI)); - if (IntrinsicOp::IOP_TraceRay == opcode || - IntrinsicOp::MOP_DxHitObject_TraceRay == opcode || - IntrinsicOp::MOP_DxHitObject_Invoke == opcode || - IntrinsicOp::IOP_ReportHit == opcode || - IntrinsicOp::IOP_CallShader == opcode) { - return MarkUnsafe(Info, User); + for (unsigned OpIdx = 0; OpIdx < CI->getNumArgOperands(); OpIdx++) { + if (CI->getArgOperand(OpIdx) == I && isUDTIntrinsicArg(CI, OpIdx)) + return MarkUnsafe(Info, User); } } } else { @@ -2666,8 +2699,7 @@ void SROA_Helper::RewriteBitCast(BitCastInst *BCI) { /// replace OldVal with alloca and /// copy in copy out data between alloca and flattened NewElts /// in CallInst. -void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, - bool bOut) { +static void RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, bool bOut) { Function *F = CI->getParent()->getParent(); IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F)); const DataLayout &DL = F->getParent()->getDataLayout(); @@ -2678,16 +2710,60 @@ void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, Value *Alloca = AllocaBuilder.CreateAlloca(userTyElt); IRBuilder<> Builder(CI); if (bIn) { - MemCpyInst *cpy = cast(Builder.CreateMemCpy( - Alloca, userTyV, DL.getTypeAllocSize(userTyElt), false)); - RewriteMemIntrin(cpy, cpy->getRawSource()); + Builder.CreateMemCpy(Alloca, userTyV, DL.getTypeAllocSize(userTyElt), + false); } CI->setArgOperand(ArgIdx, Alloca); if (bOut) { Builder.SetInsertPoint(CI->getNextNode()); - MemCpyInst *cpy = cast(Builder.CreateMemCpy( - userTyV, Alloca, DL.getTypeAllocSize(userTyElt), false)); - RewriteMemIntrin(cpy, cpy->getRawSource()); + Builder.CreateMemCpy(userTyV, Alloca, DL.getTypeAllocSize(userTyElt), + false); + } +} + +static void copyIntrinsicUDTArgs(HLModule &HLM) { + // Iterate HLIntrinsic function users + // For specific intrinsics, use RewriteCallArg on UDT args + for (Function &F : HLM.GetModule()->functions()) { + if (F.isIntrinsic() || !F.isDeclaration()) + continue; + if (GetHLOpcodeGroup(&F) != HLOpcodeGroup::HLIntrinsic) + continue; + // Iterate users + for (User *U : F.users()) { + if (CallInst *CI = dyn_cast(U)) { + switch (static_cast(GetHLOpcode(CI))) { + case IntrinsicOp::IOP_TraceRay: + // TODO: Remove RayDesc for flattening + RewriteCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx, + /*bIn*/ true, /*bOut*/ false); + RewriteCallArg(CI, HLOperandIndex::kTraceRayPayloadPreOpIdx, + /*bIn*/ true, /*bOut*/ true); + break; + case IntrinsicOp::IOP_ReportHit: + RewriteCallArg(CI, HLOperandIndex::kReportIntersectionAttributeOpIdx, + /*bIn*/ true, /*bOut*/ false); + break; + case IntrinsicOp::IOP_CallShader: + RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx, + /*bIn*/ true, /*bOut*/ true); + break; + case IntrinsicOp::MOP_DxHitObject_FromRayQuery: + if (CI->getNumArgOperands() == + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) { + RewriteCallArg( + CI, + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx, + /*bIn*/ true, /*bOut*/ false); + } + break; + case IntrinsicOp::MOP_DxHitObject_TraceRay: + RewriteCallArg(CI, HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx, + /*bIn*/ true, /*bOut*/ true); + break; + } + } + } } } @@ -2741,10 +2817,23 @@ static CallInst *RewriteWithFlattenedHLIntrinsicCall(CallInst *CI, /// RewriteCall - Replace OldVal with flattened NewElts in CallInst. void SROA_Helper::RewriteCall(CallInst *CI) { - HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); - if (group != HLOpcodeGroup::NotHL) { + HLOpcodeGroup Group = GetHLOpcodeGroupByName(CI->getCalledFunction()); + if (Group != HLOpcodeGroup::NotHL) { unsigned opcode = GetHLOpcode(CI); - if (group == HLOpcodeGroup::HLIntrinsic) { + if (Group == HLOpcodeGroup::HLIntrinsic) { + // RayQuery this pointer replacement. + if (OldVal->getType()->isPointerTy() && + dxilutil::IsHLSLRayQueryType( + OldVal->getType()->getPointerElementType())) { + // For RayQuery methods, we want to replace the RayQuery this pointer + // with a load and use of the underlying handle value. + // This will allow elimination of RayQuery types earlier. + RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + /*loadElts*/ true); + DeadInsts.push_back(CI); + return; + } + IntrinsicOp IOP = static_cast(opcode); switch (IOP) { case IntrinsicOp::MOP_Append: { @@ -2756,36 +2845,33 @@ void SROA_Helper::RewriteCall(CallInst *CI) { /*loadElts*/ false); DeadInsts.push_back(CI); } break; - case IntrinsicOp::IOP_TraceRay: { - if (OldVal == - CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx)) { - RewriteCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx, - /*bIn*/ true, /*bOut*/ false); - } else { - DXASSERT(OldVal == - CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx), - "else invalid TraceRay"); - RewriteCallArg(CI, HLOperandIndex::kTraceRayPayLoadOpIdx, - /*bIn*/ true, /*bOut*/ true); - } - } break; - case IntrinsicOp::IOP_ReportHit: { - RewriteCallArg(CI, HLOperandIndex::kReportIntersectionAttributeOpIdx, - /*bIn*/ true, /*bOut*/ false); - } break; - case IntrinsicOp::IOP_CallShader: { - RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx, - /*bIn*/ true, /*bOut*/ true); - } break; - case IntrinsicOp::MOP_DxHitObject_MakeMiss: { + //case IntrinsicOp::IOP_TraceRay: + // if (OldVal == + // CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx)) { + // // TODO: flatten RayDesc + // RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + // /*loadElts*/ true); + // DeadInsts.push_back(CI); + // } + //break; + //case IntrinsicOp::MOP_DxHitObject_TraceRay: + // if (OldVal == + // CI->getArgOperand(HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx)) { + // // TODO: flatten RayDesc + // RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + // /*loadElts*/ true); + // DeadInsts.push_back(CI); + // } + // break; + case IntrinsicOp::MOP_DxHitObject_MakeMiss: if (OldVal == CI->getArgOperand(HLOperandIndex::kHitObjectMakeMissRayDescOpIdx)) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); } - } break; - case IntrinsicOp::MOP_TraceRayInline: { + break; + case IntrinsicOp::MOP_TraceRayInline: if (OldVal == CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, @@ -2793,47 +2879,8 @@ void SROA_Helper::RewriteCall(CallInst *CI) { DeadInsts.push_back(CI); break; } - } LLVM_FALLTHROUGH; - case IntrinsicOp::MOP_DxHitObject_FromRayQuery: { - const bool IsWithAttrs = - CI->getNumArgOperands() == - HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp; - if (IsWithAttrs && - (OldVal == - CI->getArgOperand( - HLOperandIndex:: - kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx))) { - RewriteCallArg( - CI, - HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx, - /*bIn*/ true, /*bOut*/ false); - break; - } - - // For RayQuery methods, we want to replace the RayQuery this pointer - // with a load and use of the underlying handle value. - // This will allow elimination of RayQuery types earlier. - RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, - /*loadElts*/ true); - DeadInsts.push_back(CI); - break; - } default: - // RayQuery this pointer replacement. - if (OldVal->getType()->isPointerTy() && - CI->getNumArgOperands() >= HLOperandIndex::kHandleOpIdx && - OldVal == CI->getArgOperand(HLOperandIndex::kHandleOpIdx) && - dxilutil::IsHLSLRayQueryType( - OldVal->getType()->getPointerElementType())) { - // For RayQuery methods, we want to replace the RayQuery this pointer - // with a load and use of the underlying handle value. - // This will allow elimination of RayQuery types earlier. - RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, - /*loadElts*/ true); - DeadInsts.push_back(CI); - break; - } DXASSERT(0, "cannot flatten hlsl intrinsic."); } } @@ -4416,6 +4463,9 @@ class SROA_Parameter_HLSL : public ModulePass { F->eraseFromParent(); } + // Expand flattened copy-in/copy-out for intrinsic UDT args: + copyIntrinsicUDTArgs(*m_pHLModule); + // SROA globals and allocas. SROAGlobalAndAllocas(*m_pHLModule, m_HasDbgInfo); diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll index 5afd30b524..85c3a34eb9 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll @@ -95,10 +95,10 @@ target triple = "dxil-ms-dx" @"$Globals" = external constant %ConstantBuffer ; CHECK: %[[RQA:[^ ]+]] = alloca i32 -; CHECK: %[[ATTRA0:[^ ]+]] = alloca %struct.CustomAttrs -; CHECK: %[[ATTRA1:[^ ]+]] = alloca %struct.CustomAttrs ; CHECK: %[[XATTRA:[^ ]+]] = alloca float ; CHECK: %[[YATTRA:[^ ]+]] = alloca float +; CHECK: %[[ATTRA0:[^ ]+]] = alloca %struct.CustomAttrs +; CHECK: %[[ATTRA1:[^ ]+]] = alloca %struct.CustomAttrs ; COM: Check same query handle used for TraceRayInline and the FromRayQuery calls ; CHECK: %[[RQH:[^ ]+]] = load i32, i32* %[[RQA]] @@ -122,7 +122,7 @@ target triple = "dxil-ms-dx" ; CHECK: store float %[[XF1]], float* %[[XPTR0]] ; CHECK: %[[YPTR0:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA0]], i32 0, i32 1 ; CHECK: %[[YF1:[^ ]+]] = load float, float* %[[YATTRA]] -; CHECK: store float %[[YF1]], float* %[[YPTR0]], align 4 +; CHECK: store float %[[YF1]], float* %[[YPTR0]] ; CHECK: %[[RQH1:[^ ]+]] = load i32, i32* %[[RQA]] ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %{{[^ ]+}}, i32 %[[RQH1]], i32 16, %struct.CustomAttrs* %[[ATTRA0]]) @@ -140,7 +140,7 @@ target triple = "dxil-ms-dx" ; CHECK: store float %[[XF2]], float* %[[XPTR1]] ; CHECK: %[[YPTR1:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA1]], i32 0, i32 1 ; CHECK: %[[YF2:[^ ]+]] = load float, float* %[[YATTRA]] -; CHECK: store float %[[YF2]], float* %[[YPTR1]], align 4 +; CHECK: store float %[[YF2]], float* %[[YPTR1]] ; CHECK: %[[RQH2:[^ ]+]] = load i32, i32* %[[RQA]] ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %{{[^ ]+}}, i32 %[[RQH2]], i32 17, %struct.CustomAttrs* %[[ATTRA1]]) diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl index 12df1ecbcf..98997a52b1 100644 --- a/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl +++ b/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl @@ -1,10 +1,10 @@ // RUN: %dxc -T lib_6_5 -auto-binding-space 11 %s | FileCheck %s // CHECK: define void [[intersection1:@"\\01\?intersection1@[^\"]+"]]() #0 { -// CHECK: [[rayTCurrent:%[^ ]+]] = call float @dx.op.rayTCurrent.f32(i32 154) -// CHECK: [[GeometryIndex:%[^ ]+]] = call i32 @dx.op.geometryIndex.i32(i32 213) -// CHECK: icmp eq i32 [[GeometryIndex]], 0 -// CHECK: call i1 @dx.op.reportHit.struct.MyAttributes(i32 158, float [[rayTCurrent]], i32 0, %struct.MyAttributes* nonnull {{.*}}) +// CHECK-DAG: [[rayTCurrent:%[^ ]+]] = call float @dx.op.rayTCurrent.f32(i32 154) +// CHECK-DAG: [[GeometryIndex:%[^ ]+]] = call i32 @dx.op.geometryIndex.i32(i32 213) +// CHECK-DAG: icmp eq i32 [[GeometryIndex]], 0 +// CHECK-DAG: call i1 @dx.op.reportHit.struct.MyAttributes(i32 158, float [[rayTCurrent]], i32 0, %struct.MyAttributes* nonnull {{.*}}) // CHECK: ret void struct MyAttributes { From 39f76a398e999f266f1483c9885fbe9ae0e003d8 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Thu, 8 May 2025 13:42:55 -0700 Subject: [PATCH 02/16] Copy RayDesc args in too, copyIntrinsicUDTArgs -> copyIntrinsicAggArgs This renames: - copyIntrinsicUDTArgs -> copyIntrinsicAggArgs - RewriteCallArg -> memcpyAggCallArg. Updated to copy the RayDesc aggregate arguments as well. This will fix the issue with a RayDesc argument provided directly from a cbuffer, since the incoming argument pointer will no longer be skipped by SROA. Expected IR for tests will need to be updated after flattening RayDesc, so holding off on updating tests until then. --- .../Scalar/ScalarReplAggregatesHLSL.cpp | 60 ++++++++++++------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 5a0bf5392d..04ab7edc21 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -2695,11 +2695,11 @@ void SROA_Helper::RewriteBitCast(BitCastInst *BCI) { RewriteForGEP(cast(GEP), GEPBuilder); } -/// RewriteCallArg - For Functions which don't flat, -/// replace OldVal with alloca and -/// copy in copy out data between alloca and flattened NewElts -/// in CallInst. -static void RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, bool bOut) { +/// memcpyAggCallArg - For an aggregate call argument, this replaces the +/// argument with an alloca and inserts a memcpy for input (if CopyIn) and +/// output (if CopyOut). +static void memcpyAggCallArg(CallInst *CI, unsigned ArgIdx, bool CopyIn, + bool CopyOut) { Function *F = CI->getParent()->getParent(); IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F)); const DataLayout &DL = F->getParent()->getDataLayout(); @@ -2709,21 +2709,24 @@ static void RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, bool bOut) { Type *userTyElt = userTy->getElementType(); Value *Alloca = AllocaBuilder.CreateAlloca(userTyElt); IRBuilder<> Builder(CI); - if (bIn) { + if (CopyIn) { Builder.CreateMemCpy(Alloca, userTyV, DL.getTypeAllocSize(userTyElt), false); } CI->setArgOperand(ArgIdx, Alloca); - if (bOut) { + if (CopyOut) { Builder.SetInsertPoint(CI->getNextNode()); Builder.CreateMemCpy(userTyV, Alloca, DL.getTypeAllocSize(userTyElt), false); } } -static void copyIntrinsicUDTArgs(HLModule &HLM) { +static void copyIntrinsicAggArgs(HLModule &HLM) { // Iterate HLIntrinsic function users - // For specific intrinsics, use RewriteCallArg on UDT args + // For specific intrinsics, use memcpyAggCallArg on aggregate args + // This ensures that the call does not directly use the pointer supplied, + // allowing certain arguments to be flattened, and UDT args to be correctly + // lowered. for (Function &F : HLM.GetModule()->functions()) { if (F.isIntrinsic() || !F.isDeclaration()) continue; @@ -2734,32 +2737,43 @@ static void copyIntrinsicUDTArgs(HLModule &HLM) { if (CallInst *CI = dyn_cast(U)) { switch (static_cast(GetHLOpcode(CI))) { case IntrinsicOp::IOP_TraceRay: - // TODO: Remove RayDesc for flattening - RewriteCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx, - /*bIn*/ true, /*bOut*/ false); - RewriteCallArg(CI, HLOperandIndex::kTraceRayPayloadPreOpIdx, - /*bIn*/ true, /*bOut*/ true); + memcpyAggCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + memcpyAggCallArg(CI, HLOperandIndex::kTraceRayPayloadPreOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); break; case IntrinsicOp::IOP_ReportHit: - RewriteCallArg(CI, HLOperandIndex::kReportIntersectionAttributeOpIdx, - /*bIn*/ true, /*bOut*/ false); + memcpyAggCallArg(CI, + HLOperandIndex::kReportIntersectionAttributeOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); break; case IntrinsicOp::IOP_CallShader: - RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx, - /*bIn*/ true, /*bOut*/ true); + memcpyAggCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); + break; + case IntrinsicOp::MOP_TraceRayInline: + memcpyAggCallArg(CI, HLOperandIndex::kTraceRayInlineRayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); break; case IntrinsicOp::MOP_DxHitObject_FromRayQuery: if (CI->getNumArgOperands() == HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) { - RewriteCallArg( + memcpyAggCallArg( CI, HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx, - /*bIn*/ true, /*bOut*/ false); + /*CopyIn*/ true, /*CopyOut*/ false); } break; + case IntrinsicOp::MOP_DxHitObject_MakeMiss: + memcpyAggCallArg(CI, HLOperandIndex::kHitObjectMakeMissRayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + break; case IntrinsicOp::MOP_DxHitObject_TraceRay: - RewriteCallArg(CI, HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx, - /*bIn*/ true, /*bOut*/ true); + memcpyAggCallArg(CI, HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + memcpyAggCallArg(CI, + HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); break; } } @@ -4464,7 +4478,7 @@ class SROA_Parameter_HLSL : public ModulePass { } // Expand flattened copy-in/copy-out for intrinsic UDT args: - copyIntrinsicUDTArgs(*m_pHLModule); + copyIntrinsicAggArgs(*m_pHLModule); // SROA globals and allocas. SROAGlobalAndAllocas(*m_pHLModule, m_HasDbgInfo); From 291b36a74942bab0c4b31a1f69ba7a94be2b35f8 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Fri, 9 May 2025 00:08:09 -0700 Subject: [PATCH 03/16] Flatten RayDesc, update tests - Enable RayDesc flattening in SROA - Update lowering code accordingly - Separated MakeNop and MakeMiss lowering into two functions - Updated tests TODO: Update PIX pass tests --- include/dxc/DXIL/DxilConstants.h | 4 + include/dxc/HLSL/HLOperations.h | 18 +- lib/HLSL/HLOperationLower.cpp | 192 +++++++---------- .../Scalar/ScalarReplAggregatesHLSL.cpp | 79 ++++--- .../DxilGen/hitobject_traceinvoke_dxilgen.ll | 201 +++++++----------- .../hitobject_make_scalarrepl.ll | 13 +- 6 files changed, 233 insertions(+), 274 deletions(-) diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index fe32c06f63..a65b4bc149 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -1583,6 +1583,10 @@ const unsigned kMSStoreOutputColOpIdx = 3; const unsigned kMSStoreOutputVIdxOpIdx = 4; const unsigned kMSStoreOutputValOpIdx = 5; +// HitObject::MakeMiss +const unsigned kHitObjectMakeMiss_RayDescOpIdx = 3; +const unsigned kHitObjectMakeMiss_NumOp = 11; + // HitObject::TraceRay const unsigned kHitObjectTraceRay_RayDescOpIdx = 7; const unsigned kHitObjectTraceRay_PayloadOpIdx = 15; diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index 55843ffb67..0da9804ecb 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -399,7 +399,9 @@ const unsigned kTraceRayRayDescOpIdx = 7; // kTraceRayPayloadPreOpIdx is before flattening the RayDesc const unsigned kTraceRayPayloadPreOpIdx = 8; // kTraceRayPayloadOpIdx is after flattening the RayDesc -const unsigned kTraceRayPayloadOpIdx = 8; // TODO: Update after flattening +const unsigned kTraceRayPayloadOpIdx = 11; +const unsigned kTraceRay_PreNumOp = 9; +const unsigned kTraceRay_NumOp = 12; // AllocateRayQuery const unsigned kAllocateRayQueryRayFlagsIdx = 1; @@ -410,6 +412,10 @@ const unsigned kCallShaderPayloadOpIdx = 2; // TraceRayInline. const unsigned kTraceRayInlineRayDescOpIdx = 5; +// kTraceRayInlinePayloadPreOpIdx is before flattening the RayDesc +const unsigned kTraceRayInlinePayloadPreOpIdx = 6; +// kTraceRayInlinePayloadOpIdx is after flattening the RayDesc +const unsigned kTraceRayInlinePayloadOpIdx = 9; // ReportIntersection. const unsigned kReportIntersectionAttributeOpIdx = 3; @@ -437,19 +443,17 @@ const unsigned kAnnotateNodeHandleNodePropIdx = 2; const unsigned kAnnotateNodeRecordHandleNodeRecordPropIdx = 2; // HitObject::MakeMiss -const unsigned kHitObjectMakeMiss_PreNumOp = 8; -const unsigned kHitObjectMakeMiss_NumOp = 8; // TODO: Update after flattening -const unsigned kHitObjectMakeMissRayDescOpIdx = 4; +const unsigned kHitObjectMakeMiss_NumOp = 8; +const unsigned kHitObjectMakeMiss_RayDescOpIdx = 4; // HitObject::TraceRay const unsigned kHitObjectTraceRay_RayDescOpIdx = 8; // kHitObjectTraceRay_PayloadPreOpIdx is before flattening the RayDesc const unsigned kHitObjectTraceRay_PayloadPreOpIdx = 9; // kHitObjectTraceRay_PayloadOpIdx is after flattening the RayDesc -const unsigned kHitObjectTraceRay_PayloadOpIdx = 9; // TODO: Update after - // flattening +const unsigned kHitObjectTraceRay_PayloadOpIdx = 12; const unsigned kHitObjectTraceRay_PreNumOp = 10; -const unsigned kHitObjectTraceRay_NumOp = 10; // TODO: Update after flattening +const unsigned kHitObjectTraceRay_NumOp = 13; // HitObject::Invoke const unsigned kHitObjectInvoke_PayloadOpIdx = 2; diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 7db160e86f..8115ad0d5d 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -5720,37 +5720,24 @@ Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter}); } -static unsigned LoadRayDescElementsIntoArgs(Value **Args, hlsl::OP *OP, - IRBuilder<> &Builder, - Value *RayDescPtr, unsigned Index) { - // struct RayDesc - //{ - // float3 Origin; - // float TMin; - // float3 Direction; - // float TMax; - //}; - Value *ZeroIdx = OP->GetU32Const(0); - Value *Origin = Builder.CreateGEP(RayDescPtr, {ZeroIdx, ZeroIdx}); - Origin = Builder.CreateLoad(Origin); - Args[Index++] = Builder.CreateExtractElement(Origin, (uint64_t)0); - Args[Index++] = Builder.CreateExtractElement(Origin, 1); - Args[Index++] = Builder.CreateExtractElement(Origin, 2); - - Value *TMinPtr = Builder.CreateGEP(RayDescPtr, {ZeroIdx, OP->GetU32Const(1)}); - Args[Index++] = Builder.CreateLoad(TMinPtr); - - Value *DirectionPtr = - Builder.CreateGEP(RayDescPtr, {ZeroIdx, OP->GetU32Const(2)}); - Value *Direction = Builder.CreateLoad(DirectionPtr); - - Args[Index++] = Builder.CreateExtractElement(Direction, (uint64_t)0); - Args[Index++] = Builder.CreateExtractElement(Direction, 1); - Args[Index++] = Builder.CreateExtractElement(Direction, 2); - - Value *TMaxPtr = Builder.CreateGEP(RayDescPtr, {ZeroIdx, OP->GetU32Const(3)}); - Args[Index++] = Builder.CreateLoad(TMaxPtr); - return Index; +static void TransferRayDescArgs(Value **Args, hlsl::OP *OP, + IRBuilder<> &Builder, CallInst *CI, + unsigned &Index, unsigned &HLIndex) { + // Extract elements from flattened ray desc arguments in HL op. + // float3 Origin; + Value *origin = CI->getArgOperand(HLIndex++); + Args[Index++] = Builder.CreateExtractElement(origin, (uint64_t)0); + Args[Index++] = Builder.CreateExtractElement(origin, 1); + Args[Index++] = Builder.CreateExtractElement(origin, 2); + // float TMin; + Args[Index++] = CI->getArgOperand(HLIndex++); + // float3 Direction; + Value *direction = CI->getArgOperand(HLIndex++); + Args[Index++] = Builder.CreateExtractElement(direction, (uint64_t)0); + Args[Index++] = Builder.CreateExtractElement(direction, 1); + Args[Index++] = Builder.CreateExtractElement(direction, 2); + // float TMax; + Args[Index++] = CI->getArgOperand(HLIndex++); } Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, @@ -5759,21 +5746,24 @@ Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, bool &Translated) { hlsl::OP *OP = &Helper.hlslOP; - Value *RayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx); - Value *PayLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayloadOpIdx); - Value *Args[DXIL::OperandIndex::kTraceRayNumOp]; Args[0] = OP->GetU32Const(static_cast(OpCode)); - for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) - Args[i] = CI->getArgOperand(i); + unsigned Index = 1, HLIndex = 1; + while (HLIndex < HLOperandIndex::kTraceRayRayDescOpIdx) + Args[Index++] = CI->getArgOperand(HLIndex++); IRBuilder<> Builder(CI); - LoadRayDescElementsIntoArgs(Args, OP, Builder, RayDesc, - DXIL::OperandIndex::kTraceRayRayDescOpIdx); + TransferRayDescArgs(Args, OP, Builder, CI, Index, HLIndex); + DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands() - 1); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayPayloadOpIdx); + + Value *Payload = CI->getArgOperand(HLIndex++); + Args[Index++] = Payload; - Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = PayLoad; + DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands()); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayNumOp); - Type *Ty = PayLoad->getType(); + Type *Ty = Payload->getType(); Function *F = OP->GetOpFunc(OpCode, Ty); return Builder.CreateCall(F, Args); @@ -5817,33 +5807,16 @@ Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp]; Args[0] = opArg; - for (unsigned i = 1; i < HLOperandIndex::kTraceRayInlineRayDescOpIdx; i++) { - Args[i] = CI->getArgOperand(i); - } + unsigned Index = 1, HLIndex = 1; + while (HLIndex < HLOperandIndex::kTraceRayInlineRayDescOpIdx) + Args[Index++] = CI->getArgOperand(HLIndex++); IRBuilder<> Builder(CI); - unsigned hlIndex = HLOperandIndex::kTraceRayInlineRayDescOpIdx; - unsigned index = DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx; - - // struct RayDesc - //{ - // float3 Origin; - Value *origin = CI->getArgOperand(hlIndex++); - Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0); - Args[index++] = Builder.CreateExtractElement(origin, 1); - Args[index++] = Builder.CreateExtractElement(origin, 2); - // float TMin; - Args[index++] = CI->getArgOperand(hlIndex++); - // float3 Direction; - Value *direction = CI->getArgOperand(hlIndex++); - Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0); - Args[index++] = Builder.CreateExtractElement(direction, 1); - Args[index++] = Builder.CreateExtractElement(direction, 2); - // float TMax; - Args[index++] = CI->getArgOperand(hlIndex++); - //}; - - DXASSERT_NOMSG(index == DXIL::OperandIndex::kTraceRayInlineNumOp); + DXASSERT_NOMSG(HLIndex == HLOperandIndex::kTraceRayInlineRayDescOpIdx); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx); + TransferRayDescArgs(Args, hlslOP, Builder, CI, Index, HLIndex); + DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands()); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineNumOp); Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy()); @@ -6197,55 +6170,47 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, // Shader Execution Reordering. namespace { -Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, +Value *TranslateHitObjectMakeNop(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { hlsl::OP *HlslOP = &Helper.hlslOP; IRBuilder<> Builder(CI); - unsigned SrcIdx = 1; - Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); - if (Opcode == OP::OpCode::HitObject_MakeNop) { - Value *HitObject = TrivialDxilOperation( - Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP); - Builder.CreateStore(HitObject, HitObjectPtr); - DXASSERT( - CI->use_empty(), - "Default ctor return type is a Clang artifact. Value must not be used"); - return nullptr; - } + Value *HitObjectPtr = CI->getArgOperand(1); + Value *HitObject = TrivialDxilOperation( + Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP); + Builder.CreateStore(HitObject, HitObjectPtr); + DXASSERT( + CI->use_empty(), + "Default ctor return type is a Clang artifact. Value must not be used"); + return nullptr; +} +Value *TranslateHitObjectMakeMiss(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { DXASSERT_NOMSG(CI->getNumArgOperands() == HLOperandIndex::kHitObjectMakeMiss_NumOp); - Value *RayFlags = CI->getArgOperand(SrcIdx++); - Value *MissShaderIdx = CI->getArgOperand(SrcIdx++); - DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMissRayDescOpIdx); - Value *RayDescOrigin = CI->getArgOperand(SrcIdx++); - Value *RayDescOriginX = - Builder.CreateExtractElement(RayDescOrigin, (uint64_t)0); - Value *RayDescOriginY = - Builder.CreateExtractElement(RayDescOrigin, (uint64_t)1); - Value *RayDescOriginZ = - Builder.CreateExtractElement(RayDescOrigin, (uint64_t)2); - - Value *RayDescTMin = CI->getArgOperand(SrcIdx++); - Value *RayDescDirection = CI->getArgOperand(SrcIdx++); - Value *RayDescDirectionX = - Builder.CreateExtractElement(RayDescDirection, (uint64_t)0); - Value *RayDescDirectionY = - Builder.CreateExtractElement(RayDescDirection, (uint64_t)1); - Value *RayDescDirectionZ = - Builder.CreateExtractElement(RayDescDirection, (uint64_t)2); - - Value *RayDescTMax = CI->getArgOperand(SrcIdx++); + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + Value *Args[DXIL::OperandIndex::kHitObjectMakeMiss_NumOp]; + Args[0] = nullptr; // Filled in by TrivialDxilOperation + + unsigned DestIdx = 1, SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // RayFlags + Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // MissShaderIdx + + DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx); + DXASSERT_NOMSG(DestIdx == + DXIL::OperandIndex::kHitObjectMakeMiss_RayDescOpIdx); + TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx); DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectMakeMiss_NumOp); - Value *OutHitObject = TrivialDxilOperation( - Opcode, - {nullptr, RayFlags, MissShaderIdx, RayDescOriginX, RayDescOriginY, - RayDescOriginZ, RayDescTMin, RayDescDirectionX, RayDescDirectionY, - RayDescDirectionZ, RayDescTMax}, - Helper.voidTy, CI, HlslOP); + Value *OutHitObject = + TrivialDxilOperation(Opcode, Args, Helper.voidTy, CI, OP); Builder.CreateStore(OutHitObject, HitObjectPtr); return nullptr; } @@ -6348,10 +6313,9 @@ Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, hlsl::OP *OP = &Helper.hlslOP; IRBuilder<> Builder(CI); - const unsigned DxilNumArgs = DxilInst_HitObject_TraceRay::arg_payload + 1; DXASSERT_NOMSG(CI->getNumArgOperands() == HLOperandIndex::kHitObjectTraceRay_NumOp); - Value *Args[DxilNumArgs]; + Value *Args[DXIL::OperandIndex::kHitObjectTraceRay_NumOp]; Value *OpArg = OP->GetU32Const(static_cast(OpCode)); Args[0] = OpArg; @@ -6363,13 +6327,19 @@ Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, Args[DestIdx] = CI->getArgOperand(SrcIdx); } - Value *RayDescPtr = CI->getArgOperand(SrcIdx++); - DestIdx = LoadRayDescElementsIntoArgs(Args, OP, Builder, RayDescPtr, DestIdx); + DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx); + DXASSERT_NOMSG(DestIdx == + DXIL::OperandIndex::kHitObjectTraceRay_RayDescOpIdx); + TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx); + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands() - 1); + DXASSERT_NOMSG(DestIdx == + DXIL::OperandIndex::kHitObjectTraceRay_PayloadOpIdx); + Value *Payload = CI->getArgOperand(SrcIdx++); Args[DestIdx++] = Payload; DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); - DXASSERT_NOMSG(DestIdx == DxilNumArgs); + DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectTraceRay_NumOp); Function *F = OP->GetOpFunc(OpCode, Payload->getType()); @@ -7402,7 +7372,7 @@ IntrinsicLower gLowerTable[] = { DXIL::OpCode::NumOpCodes}, {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes}, - {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMake, + {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMakeNop, DXIL::OpCode::HitObject_MakeNop}, {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread, DXIL::OpCode::MaybeReorderThread}, @@ -7462,7 +7432,7 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant, TranslateHitObjectLoadLocalRootTableConstant, DXIL::OpCode::HitObject_LoadLocalRootTableConstant}, - {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMake, + {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMakeMiss, DXIL::OpCode::HitObject_MakeMiss}, {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex, TranslateHitObjectSetShaderTableIndex, diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 04ab7edc21..3b9edb131e 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -1480,11 +1480,14 @@ void isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, static bool isUDTIntrinsicArg(CallInst *CI, unsigned OpIdx) { if (HLOpcodeGroup::HLIntrinsic != GetHLOpcodeGroup(CI->getCalledFunction())) return false; + const unsigned NumOps = CI->getNumArgOperands(); switch (static_cast(GetHLOpcode(CI))) { case IntrinsicOp::IOP_TraceRay: - if (OpIdx == HLOperandIndex::kTraceRayRayDescOpIdx) + if (NumOps == HLOperandIndex::kTraceRay_PreNumOp && + OpIdx == HLOperandIndex::kTraceRayPayloadPreOpIdx) return true; - if (OpIdx == HLOperandIndex::kTraceRayPayloadPreOpIdx) + else if (NumOps == HLOperandIndex::kTraceRay_NumOp && + OpIdx == HLOperandIndex::kTraceRayPayloadOpIdx) return true; break; case IntrinsicOp::IOP_ReportHit: @@ -1496,15 +1499,17 @@ static bool isUDTIntrinsicArg(CallInst *CI, unsigned OpIdx) { return true; break; case IntrinsicOp::MOP_DxHitObject_FromRayQuery: - if (OpIdx == - HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx) + if (NumOps == HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp && + OpIdx == + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx) return true; break; case IntrinsicOp::MOP_DxHitObject_TraceRay: - // TODO: Remove RayDesc for flattening - if (OpIdx == HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx) + if (NumOps == HLOperandIndex::kHitObjectTraceRay_PreNumOp && + OpIdx == HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx) return true; - if (OpIdx == HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx) + else if (NumOps == HLOperandIndex::kHitObjectTraceRay_NumOp && + OpIdx == HLOperandIndex::kHitObjectTraceRay_PayloadOpIdx) return true; break; case IntrinsicOp::MOP_DxHitObject_Invoke: @@ -2765,7 +2770,7 @@ static void copyIntrinsicAggArgs(HLModule &HLM) { } break; case IntrinsicOp::MOP_DxHitObject_MakeMiss: - memcpyAggCallArg(CI, HLOperandIndex::kHitObjectMakeMissRayDescOpIdx, + memcpyAggCallArg(CI, HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx, /*CopyIn*/ true, /*CopyOut*/ false); break; case IntrinsicOp::MOP_DxHitObject_TraceRay: @@ -2775,6 +2780,12 @@ static void copyIntrinsicAggArgs(HLModule &HLM) { HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx, /*CopyIn*/ true, /*CopyOut*/ true); break; + case IntrinsicOp::MOP_DxHitObject_Invoke: + memcpyAggCallArg(CI, HLOperandIndex::kHitObjectInvoke_PayloadOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); + break; + default: + break; } } } @@ -2850,7 +2861,7 @@ void SROA_Helper::RewriteCall(CallInst *CI) { IntrinsicOp IOP = static_cast(opcode); switch (IOP) { - case IntrinsicOp::MOP_Append: { + case IntrinsicOp::MOP_Append: // Buffer Append already expand in code gen. // Must be OutputStream Append here. // Every Elt has a pointer type. @@ -2858,31 +2869,34 @@ void SROA_Helper::RewriteCall(CallInst *CI) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ false); DeadInsts.push_back(CI); - } break; - //case IntrinsicOp::IOP_TraceRay: - // if (OldVal == - // CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx)) { - // // TODO: flatten RayDesc - // RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, - // /*loadElts*/ true); - // DeadInsts.push_back(CI); - // } - //break; - //case IntrinsicOp::MOP_DxHitObject_TraceRay: - // if (OldVal == - // CI->getArgOperand(HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx)) { - // // TODO: flatten RayDesc - // RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, - // /*loadElts*/ true); - // DeadInsts.push_back(CI); - // } - // break; + return; + case IntrinsicOp::IOP_TraceRay: + if (OldVal == + CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx)) { + // TODO: flatten RayDesc + RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + /*loadElts*/ true); + DeadInsts.push_back(CI); + return; + } + break; + case IntrinsicOp::MOP_DxHitObject_TraceRay: + if (OldVal == CI->getArgOperand( + HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx)) { + // TODO: flatten RayDesc + RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + /*loadElts*/ true); + DeadInsts.push_back(CI); + return; + } + break; case IntrinsicOp::MOP_DxHitObject_MakeMiss: if (OldVal == - CI->getArgOperand(HLOperandIndex::kHitObjectMakeMissRayDescOpIdx)) { + CI->getArgOperand(HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx)) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); + return; } break; case IntrinsicOp::MOP_TraceRayInline: @@ -2891,12 +2905,13 @@ void SROA_Helper::RewriteCall(CallInst *CI) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); - break; + return; } - LLVM_FALLTHROUGH; + break; default: - DXASSERT(0, "cannot flatten hlsl intrinsic."); + break; } + DXASSERT(0, "cannot flatten hlsl intrinsic."); } // TODO: check other high level dx operations if need to. } else { diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll index 6f364a0161..03bb0716ce 100644 --- a/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll @@ -1,26 +1,16 @@ ; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s ; REQUIRES: dxil-1-9 -; -; Buffer Definitions: -; -; -; Resource Bindings: -; -; Name Type Format Dim ID HLSL Bind Count -; ------------------------------ ---------- ------- ----------- ------- -------------- ------ -; RTAS texture i32 ras T0t4294967295,space4294967295 1 -; target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" target triple = "dxil-ms-dx" %struct.RaytracingAccelerationStructure = type { i32 } -%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } %struct.Payload = type { <3 x float> } %dx.types.HitObject = type { i8* } %dx.types.Handle = type { i8* } %dx.types.ResourceProperties = type { i32, i32 } %"class.RWStructuredBuffer" = type { float } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } %"class.dx::HitObject" = type { i32 } @"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 @@ -28,55 +18,37 @@ target triple = "dxil-ms-dx" ; Function Attrs: nounwind define void @"\01?main@@YAXXZ"() #0 { entry: - %rayDesc = alloca %struct.RayDesc, align 4 - %pld = alloca %struct.Payload, align 4 + %pld_invoke = alloca %struct.Payload + %pld_trace = alloca %struct.Payload %hit = alloca %dx.types.HitObject, align 4 - %0 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !31 ; line:80 col:3 - call void @llvm.lifetime.start(i64 32, i8* %0) #0, !dbg !31 ; line:80 col:3 - %Origin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 0, !dbg !35 ; line:81 col:11 - store <3 x float> , <3 x float>* %Origin, align 4, !dbg !36, !tbaa !37 ; line:81 col:18 - %TMin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 1, !dbg !40 ; line:82 col:11 - store float 3.000000e+00, float* %TMin, align 4, !dbg !41, !tbaa !42 ; line:82 col:16 - %Direction = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 2, !dbg !44 ; line:83 col:11 - store <3 x float> , <3 x float>* %Direction, align 4, !dbg !45, !tbaa !37 ; line:83 col:21 - %TMax = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 3, !dbg !46 ; line:84 col:11 - store float 7.000000e+00, float* %TMax, align 4, !dbg !47, !tbaa !42 ; line:84 col:16 - %1 = bitcast %struct.Payload* %pld to i8*, !dbg !48 ; line:86 col:3 - call void @llvm.lifetime.start(i64 12, i8* %1) #0, !dbg !48 ; line:86 col:3 - %dummy = getelementptr inbounds %struct.Payload, %struct.Payload* %pld, i32 0, i32 0, !dbg !49 ; line:87 col:7 - store <3 x float> , <3 x float>* %dummy, align 4, !dbg !50, !tbaa !37 ; line:87 col:13 - %2 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !51 ; line:89 col:3 - call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !51 ; line:89 col:3 - %3 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !52 ; line:89 col:23 - %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %3), !dbg !52 ; line:89 col:23 - %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !52 ; line:89 col:23 - ; CHECK: %[[ORIGINPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR:[^ ]+]], i32 0, i32 0 - ; CHECK: %[[ORIGIN:[^ ]+]] = load <3 x float>, <3 x float>* %[[ORIGINPTR]] - ; CHECK: %[[O0:[^ ]+]] = extractelement <3 x float> %[[ORIGIN]], i64 0 - ; CHECK: %[[O1:[^ ]+]] = extractelement <3 x float> %[[ORIGIN]], i64 1 - ; CHECK: %[[O2:[^ ]+]] = extractelement <3 x float> %[[ORIGIN]], i64 2 - ; CHECK: %[[TMINPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR]], i32 0, i32 1 - ; CHECK: %[[TMIN:[^ ]+]] = load float, float* %[[TMINPTR]] - ; CHECK: %[[DIRPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR]], i32 0, i32 2 - ; CHECK: %[[DIR:[^ ]+]] = load <3 x float>, <3 x float>* %[[DIRPTR]] - ; CHECK: %[[D0:[^ ]+]] = extractelement <3 x float> %[[DIR]], i64 0 - ; CHECK: %[[D1:[^ ]+]] = extractelement <3 x float> %[[DIR]], i64 1 - ; CHECK: %[[D2:[^ ]+]] = extractelement <3 x float> %[[DIR]], i64 2 - ; CHECK: %[[TMAXPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR]], i32 0, i32 3 - ; CHECK: %[[TMAX:[^ ]+]] = load float, float* %[[TMAXPTR]] - ; CHECK: %[[TRACEHO:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %5, i32 513, i32 1, i32 2, i32 4, i32 0, float %[[O0]], float %[[O1]], float %[[O2]], float %[[TMIN]], float %[[D0]], float %[[D1]], float %[[D2]], float %[[TMAX]], %struct.Payload* %pld) - call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 389, %dx.types.HitObject* %hit, %dx.types.Handle %5, i32 513, i32 1, i32 2, i32 4, i32 0, %struct.RayDesc* %rayDesc, %struct.Payload* %pld), !dbg !52 ; line:89 col:23 - ; CHECK: store %dx.types.HitObject %[[TRACEHO]], %dx.types.HitObject* %[[HOPTR:[^ ]+]] - ; CHECK: %[[INVOKEHO:[^ ]+]] = load %dx.types.HitObject, %dx.types.HitObject* %[[HOPTR]] - ; CHECK: call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %[[INVOKEHO]], %struct.Payload* %pld) - call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %hit, %struct.Payload* %pld), !dbg !53 ; line:99 col:3 - %6 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !54 ; line:100 col:1 - call void @llvm.lifetime.end(i64 4, i8* %6) #0, !dbg !54 ; line:100 col:1 - %7 = bitcast %struct.Payload* %pld to i8*, !dbg !54 ; line:100 col:1 - call void @llvm.lifetime.end(i64 12, i8* %7) #0, !dbg !54 ; line:100 col:1 - %8 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !54 ; line:100 col:1 - call void @llvm.lifetime.end(i64 32, i8* %8) #0, !dbg !54 ; line:100 col:1 - ret void, !dbg !54 ; line:100 col:1 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !32 ; line:91 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !32 ; line:91 col:3 + %1 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !36 ; line:91 col:23 + %rtas = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %1), !dbg !36 ; line:91 col:23 + + ; Capture the handle for the RTAS + ; CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) + %2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %rtas, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !36 ; line:91 col:23 + + %3 = getelementptr inbounds %struct.Payload, %struct.Payload* %pld_trace, i32 0, i32 0, !dbg !36 ; line:91 col:23 + store <3 x float> , <3 x float>* %3, !dbg !36 ; line:91 col:23 + + ; CHECK: %[[TRACEHO:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* %pld_trace), !dbg !3 ; line:91 col:23 + ; CHECK: store %dx.types.HitObject %[[TRACEHO]], %dx.types.HitObject* %hit + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 389, %dx.types.HitObject* %hit, %dx.types.Handle %2, i32 513, i32 1, i32 2, i32 4, i32 0, <3 x float> , float 3.000000e+00, <3 x float> , float 7.000000e+00, %struct.Payload* %pld_trace), !dbg !36 ; line:91 col:23 + + %4 = getelementptr inbounds %struct.Payload, %struct.Payload* %pld_trace, i32 0, i32 0, !dbg !37 ; line:101 col:3 + %5 = load <3 x float>, <3 x float>* %4, !dbg !37 ; line:101 col:3 + %6 = getelementptr inbounds %struct.Payload, %struct.Payload* %pld_invoke, i32 0, i32 0, !dbg !37 ; line:101 col:3 + store <3 x float> %5, <3 x float>* %6, !dbg !37 ; line:101 col:3 + + ; CHECK: %[[INVOKEHO:[^ ]+]] = load %dx.types.HitObject, %dx.types.HitObject* %hit + ; CHECK: call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %[[INVOKEHO]], %struct.Payload* %pld_invoke) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %hit, %struct.Payload* %pld_invoke), !dbg !37 ; line:101 col:3 + + %7 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !38 ; line:102 col:1 + call void @llvm.lifetime.end(i64 4, i8* %7) #0, !dbg !38 ; line:102 col:1 + ret void, !dbg !38 ; line:102 col:1 } ; Function Attrs: nounwind @@ -85,9 +57,6 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0 ; Function Attrs: nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) #0 -; Function Attrs: nounwind -declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*) #0 - ; Function Attrs: nounwind readnone declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 @@ -97,71 +66,59 @@ declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.type ; Function Attrs: nounwind declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32, %dx.types.HitObject*, %struct.Payload*) #0 +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*) #0 + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } !llvm.module.flags = !{!0} !pauseresume = !{!1} -!dx.version = !{!2} -!dx.valver = !{!2} -!dx.shaderModel = !{!3} -!dx.typeAnnotations = !{!4, !19} -!dx.entryPoints = !{!23} -!dx.fnprops = !{!28} -!dx.options = !{!29, !30} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!29} +!dx.options = !{!30, !31} !0 = !{i32 2, !"Debug Info Version", i32 3} !1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} -!2 = !{i32 1, i32 9} -!3 = !{!"lib", i32 6, i32 9} -!4 = !{i32 0, %"class.RWStructuredBuffer" undef, !5, %struct.RayDesc undef, !10, %struct.Payload undef, !15, %"class.dx::HitObject" undef, !17} -!5 = !{i32 4, !6, !7} -!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} -!7 = !{i32 0, !8} -!8 = !{!9} -!9 = !{i32 0, float undef} -!10 = !{i32 32, !11, !12, !13, !14} -!11 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} -!12 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} -!13 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} -!14 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} -!15 = !{i32 12, !16} -!16 = !{i32 6, !"dummy", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} -!17 = !{i32 4, !18} -!18 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} -!19 = !{i32 1, void ()* @"\01?main@@YAXXZ", !20} -!20 = !{!21} -!21 = !{i32 1, !22, !22} -!22 = !{} -!23 = !{null, !"", null, !24, null} -!24 = !{!25, null, null, null} -!25 = !{!26} -!26 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !27} -!27 = !{i32 0, i32 4} -!28 = !{void ()* @"\01?main@@YAXXZ", i32 7} -!29 = !{i32 -2147483584} -!30 = !{i32 -1} -!31 = !DILocation(line: 80, column: 3, scope: !32) -!32 = !DISubprogram(name: "main", scope: !33, file: !33, line: 79, type: !34, isLocal: false, isDefinition: true, scopeLine: 79, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") -!33 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl", directory: "") -!34 = !DISubroutineType(types: !22) -!35 = !DILocation(line: 81, column: 11, scope: !32) -!36 = !DILocation(line: 81, column: 18, scope: !32) -!37 = !{!38, !38, i64 0} -!38 = !{!"omnipotent char", !39, i64 0} -!39 = !{!"Simple C/C++ TBAA"} -!40 = !DILocation(line: 82, column: 11, scope: !32) -!41 = !DILocation(line: 82, column: 16, scope: !32) -!42 = !{!43, !43, i64 0} -!43 = !{!"float", !38, i64 0} -!44 = !DILocation(line: 83, column: 11, scope: !32) -!45 = !DILocation(line: 83, column: 21, scope: !32) -!46 = !DILocation(line: 84, column: 11, scope: !32) -!47 = !DILocation(line: 84, column: 16, scope: !32) -!48 = !DILocation(line: 86, column: 3, scope: !32) -!49 = !DILocation(line: 87, column: 7, scope: !32) -!50 = !DILocation(line: 87, column: 13, scope: !32) -!51 = !DILocation(line: 89, column: 3, scope: !32) -!52 = !DILocation(line: 89, column: 23, scope: !32) -!53 = !DILocation(line: 99, column: 3, scope: !32) -!54 = !DILocation(line: 100, column: 1, scope: !32) +!2 = !{!"dxc(private) 1.8.0.4928 (ser_hlslattributes_patch, 937c16cc6)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer" undef, !6, %struct.RayDesc undef, !11, %struct.Payload undef, !16, %"class.dx::HitObject" undef, !18} +!6 = !{i32 4, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, float undef} +!11 = !{i32 32, !12, !13, !14, !15} +!12 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!13 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!14 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!15 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!16 = !{i32 12, !17} +!17 = !{i32 6, !"dummy", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!18 = !{i32 4, !19} +!19 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!20 = !{i32 1, void ()* @"\01?main@@YAXXZ", !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{null, !"", null, !25, null} +!25 = !{!26, null, null, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!30 = !{i32 -2147483584} +!31 = !{i32 -1} +!32 = !DILocation(line: 91, column: 3, scope: !33) +!33 = !DISubprogram(name: "main", scope: !34, file: !34, line: 81, type: !35, isLocal: false, isDefinition: true, scopeLine: 81, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!34 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl", directory: "") +!35 = !DISubroutineType(types: !23) +!36 = !DILocation(line: 91, column: 23, scope: !33) +!37 = !DILocation(line: 101, column: 3, scope: !33) +!38 = !DILocation(line: 102, column: 1, scope: !33) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll index 89ee886c2e..78f7271e94 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll @@ -33,7 +33,7 @@ entry: %hit = alloca %dx.types.HitObject, align 4 %tmp = alloca %dx.types.HitObject, align 4 %ray = alloca %struct.RayDesc, align 4 -; CHECK-NOT: %{{[^ ]+}} = alloca %struct.RayDesc +; CHECK-NOT: alloca %struct.RayDesc %tmp2 = alloca %dx.types.HitObject, align 4 ; CHECK: %[[HIT0:[^ ]+]] = alloca %dx.types.HitObject, align 4 ; CHECK: %[[HIT1:[^ ]+]] = alloca %dx.types.HitObject, align 4 @@ -69,7 +69,16 @@ entry: ; CHECK-DAG: %[[RDTMIN:[^ ]+]] = load float, float* %[[pRDTMIN]], ; CHECK-DAG: %[[RDD:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDD]], ; CHECK-DAG: %[[RDTMAX:[^ ]+]] = load float, float* %[[pRDTMAX]], -; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 387, %dx.types.HitObject* %[[HIT2]], i32 0, i32 1, <3 x float> %[[RDO]], float %[[RDTMIN]], <3 x float> %[[RDD]], float %[[RDTMAX]]) +; Copy introduced for RayDesc argument +; CHECK-DAG: store <3 x float> %[[RDO]], <3 x float>* %[[pRDO2:[^ ]+]], +; CHECK-DAG: store float %[[RDTMIN]], float* %[[pRDTMIN2:[^ ]+]], +; CHECK-DAG: store <3 x float> %[[RDD]], <3 x float>* %[[pRDD2:[^ ]+]], +; CHECK-DAG: store float %[[RDTMAX]], float* %[[pRDTMAX2:[^ ]+]], +; CHECK-DAG: %[[RDO2:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDO2]], +; CHECK-DAG: %[[RDTMIN2:[^ ]+]] = load float, float* %[[pRDTMIN2]], +; CHECK-DAG: %[[RDD2:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDD2]], +; CHECK-DAG: %[[RDTMAX2:[^ ]+]] = load float, float* %[[pRDTMAX2]], +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 387, %dx.types.HitObject* %[[HIT2]], i32 0, i32 1, <3 x float> %[[RDO2]], float %[[RDTMIN2]], <3 x float> %[[RDD2]], float %[[RDTMAX2]]) call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %tmp2, i32 0, i32 1, %struct.RayDesc* %ray), !dbg !31 ; line:45 col:3 %10 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3 call void @llvm.lifetime.end(i64 4, i8* %10) #0, !dbg !31 ; line:45 col:3 From 8ded3e0b3e2bffa0fd1b0fbe0ee4a99378062899 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Fri, 9 May 2025 00:45:43 -0700 Subject: [PATCH 04/16] clang-format --- lib/HLSL/HLOperationLower.cpp | 18 ++++++++++-------- .../Scalar/ScalarReplAggregatesHLSL.cpp | 4 ++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 8115ad0d5d..58c1de3941 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -6170,10 +6170,11 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, // Shader Execution Reordering. namespace { -Value *TranslateHitObjectMakeNop(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, - HLOperationLowerHelper &Helper, - HLObjectOperationLowerHelper *ObjHelper, - bool &Translated) { +Value *TranslateHitObjectMakeNop(CallInst *CI, IntrinsicOp IOP, + OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { hlsl::OP *HlslOP = &Helper.hlslOP; IRBuilder<> Builder(CI); Value *HitObjectPtr = CI->getArgOperand(1); @@ -6186,10 +6187,11 @@ Value *TranslateHitObjectMakeNop(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcod return nullptr; } -Value *TranslateHitObjectMakeMiss(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, - HLOperationLowerHelper &Helper, - HLObjectOperationLowerHelper *ObjHelper, - bool &Translated) { +Value *TranslateHitObjectMakeMiss(CallInst *CI, IntrinsicOp IOP, + OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { DXASSERT_NOMSG(CI->getNumArgOperands() == HLOperandIndex::kHitObjectMakeMiss_NumOp); hlsl::OP *OP = &Helper.hlslOP; diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 3b9edb131e..82cd59f375 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -2891,8 +2891,8 @@ void SROA_Helper::RewriteCall(CallInst *CI) { } break; case IntrinsicOp::MOP_DxHitObject_MakeMiss: - if (OldVal == - CI->getArgOperand(HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx)) { + if (OldVal == CI->getArgOperand( + HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx)) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); From ec4134476cf9a8ff3d925b6d95a37e4f0a1939a5 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Mon, 12 May 2025 17:22:32 -0700 Subject: [PATCH 05/16] Remove/Update PIX tests checking for old RayDesc IR pattern The PIX test or portion of test that relies on the old compiler behavior of alloca for RayDesc. Since these tests were made specifically to verfify this old pattern, it was ok to remove them. --- .../pix/AnnotateVirtualRegs-Raygen.hlsl | 36 ------- tools/clang/unittests/HLSL/PixTest.cpp | 100 ------------------ 2 files changed, 136 deletions(-) delete mode 100644 tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl diff --git a/tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl b/tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl deleted file mode 100644 index b9670bdaba..0000000000 --- a/tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl +++ /dev/null @@ -1,36 +0,0 @@ -// RUN: %dxc -Od -T lib_6_6 %s | %opt -S -dxil-annotate-with-virtual-regs | FileCheck %s - - -/* To run locally run: -%dxc -Od -T lib_6_6 %s -Fc %t.ll -%opt %t.ll -S -dxil-annotate-with-virtual-regs | FileCheck %s -*/ - -RaytracingAccelerationStructure scene : register(t0); - -struct RayPayload -{ - int3 color; -}; - -[shader("raygeneration")] -void ENTRY() -{ - RayDesc ray = {{0,0,0}, {0,0,1}, 0.05, 1000.0}; - RayPayload pld; - TraceRay(scene, 0 /*rayFlags*/, 0xFF /*rayMask*/, 0 /*sbtRecordOffset*/, 1 /*sbtRecordStride*/, 0 /*missIndex*/, ray, pld); -} - -// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 0, !pix-dxil-reg [[RDGEP:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 1, !pix-dxil-reg [[RDGEP2:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP2:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 2, !pix-dxil-reg [[RDGEP3:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP3:![0-9]+]], !pix-dxil-inst-num {{.*}} - -// CHECK-DAG: [[RDGEP]] = !{i32 0, i32 0} -// CHECK-DAG: [[NothGEP]] = !{i32 0, i32 11} -// CHECK-DAG: [[RDGEP2]] = !{i32 0, i32 3} -// CHECK-DAG: [[NothGEP2]] = !{i32 0, i32 12} -// CHECK-DAG: [[RDGEP3]] = !{i32 0, i32 4} -// CHECK-DAG: [[NothGEP3]] = !{i32 0, i32 13} diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index e337d2951c..af7801c7bf 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -119,7 +119,6 @@ class PixTest : public ::testing::Test { TEST_METHOD(AccessTracking_ModificationReport_SM66) TEST_METHOD(PixStructAnnotation_Lib_DualRaygen) - TEST_METHOD(PixStructAnnotation_Lib_RaygenAllocaStructAlignment) TEST_METHOD(PixStructAnnotation_Simple) TEST_METHOD(PixStructAnnotation_CopiedStruct) @@ -1455,100 +1454,6 @@ void Raygen1() } } -TEST_F(PixTest, PixStructAnnotation_Lib_RaygenAllocaStructAlignment) { - if (m_ver.SkipDxilVersion(1, 5)) - return; - - const char *hlsl = R"( - -RaytracingAccelerationStructure Scene : register(t0, space0); -RWTexture2D RenderTarget : register(u0); - -struct SceneConstantBuffer -{ - float4x4 projectionToWorld; - float4 cameraPosition; - float4 lightPosition; - float4 lightAmbientColor; - float4 lightDiffuseColor; -}; - -ConstantBuffer g_sceneCB : register(b0); - -struct RayPayload -{ - float4 color; -}; - -inline void GenerateCameraRay(uint2 index, out float3 origin, out float3 direction) -{ - float2 xy = index + 0.5f; // center in the middle of the pixel. - float2 screenPos = xy;// / DispatchRaysDimensions().xy * 2.0 - 1.0; - - // Invert Y for DirectX-style coordinates. - screenPos.y = -screenPos.y; - - // Unproject the pixel coordinate into a ray. - float4 world = /*mul(*/float4(screenPos, 0, 1)/*, g_sceneCB.projectionToWorld)*/; - - //world.xyz /= world.w; - origin = world.xyz; //g_sceneCB.cameraPosition.xyz; - direction = float3(1,0,0);//normalize(world.xyz - origin); -} - -void RaygenCommon() -{ - float3 rayDir; - float3 origin; - - // Generate a ray for a camera pixel corresponding to an index from the dispatched 2D grid. - GenerateCameraRay(DispatchRaysIndex().xy, origin, rayDir); - - // Trace the ray. - // Set the ray's extents. - RayDesc ray; - ray.Origin = origin; - ray.Direction = rayDir; - // Set TMin to a non-zero small value to avoid aliasing issues due to floating - point errors. - // TMin should be kept small to prevent missing geometry at close contact areas. - ray.TMin = 0.001; - ray.TMax = 10000.0; - RayPayload payload = { float4(0, 0, 0, 0) }; - TraceRay(Scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, ~0, 0, 1, 0, ray, payload); - - // Write the raytraced color to the output texture. - // RenderTarget[DispatchRaysIndex().xy] = payload.color; -} - -[shader("raygeneration")] -void Raygen() -{ - RaygenCommon(); -} -)"; - - auto Testables = TestStructAnnotationCase(hlsl, L"-Od", true, L"lib_6_6"); - - // Built-in type "RayDesc" has this structure: struct { float3 Origin; float - // TMin; float3 Direction; float TMax; } This is 8 floats, with members at - // offsets 0,3,4,7 respectively. - - auto FindAtLeastOneOf = [=](char const *name, uint32_t index) { - VERIFY_IS_TRUE(std::find_if(Testables.AllocaWrites.begin(), - Testables.AllocaWrites.end(), - [&name, &index](AllocaWrite const &aw) { - return 0 == strcmp(aw.memberName.c_str(), - name) && - aw.index == index; - }) != Testables.AllocaWrites.end()); - }; - - FindAtLeastOneOf("Origin.x", 0); - FindAtLeastOneOf("TMin", 3); - FindAtLeastOneOf("Direction.x", 4); - FindAtLeastOneOf("TMax", 7); -} - TEST_F(PixTest, PixStructAnnotation_Simple) { if (m_ver.SkipDxilVersion(1, 5)) return; @@ -3441,7 +3346,6 @@ void RaygenInternalName() // check that there are alloca writes that cover all of them. RayPayload // has four elements, and RayDesc has eight. std::array RayPayloadElementCoverage; - std::array RayDescElementCoverage; for (auto const &write : metaDataKeyToValue.allocaWrites) { // the whole point of the changes with this test is to separate vector @@ -3452,14 +3356,10 @@ void RaygenInternalName() if (findAlloca != metaDataKeyToValue.allocaDefinitions.end()) { if (findAlloca->second.count == 4) { RayPayloadElementCoverage[write.second.offset] = true; - } else if (findAlloca->second.count == 8) { - RayDescElementCoverage[write.second.offset] = true; } } } // Check that coverage for every element was emitted: for (auto const &b : RayPayloadElementCoverage) VERIFY_IS_TRUE(b); - for (auto const &b : RayDescElementCoverage) - VERIFY_IS_TRUE(b); } From 3720504f955ffe6c6d0c2de97991c0f7ae1a98c5 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Mon, 12 May 2025 17:44:03 -0700 Subject: [PATCH 06/16] Remove leftover TODOs in SROA --- lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 82cd59f375..7cdd2b7e20 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -2873,7 +2873,6 @@ void SROA_Helper::RewriteCall(CallInst *CI) { case IntrinsicOp::IOP_TraceRay: if (OldVal == CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx)) { - // TODO: flatten RayDesc RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); @@ -2883,7 +2882,6 @@ void SROA_Helper::RewriteCall(CallInst *CI) { case IntrinsicOp::MOP_DxHitObject_TraceRay: if (OldVal == CI->getArgOperand( HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx)) { - // TODO: flatten RayDesc RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); From c1a2b125b14b895729296c7e566d7587630c8f5f Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Mon, 12 May 2025 19:02:13 -0700 Subject: [PATCH 07/16] Address feedback. --- lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 7cdd2b7e20..0063cdcefb 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -1477,6 +1477,8 @@ void isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, } } +// Returns whether the `OpIdx` argument of HL intrinsic call `CI` is expected to +// be a user-defined-type. static bool isUDTIntrinsicArg(CallInst *CI, unsigned OpIdx) { if (HLOpcodeGroup::HLIntrinsic != GetHLOpcodeGroup(CI->getCalledFunction())) return false; @@ -1579,12 +1581,9 @@ void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info) { // Most HL functions are safe for scalar repl. if (HLOpcodeGroup::NotHL == group) return MarkUnsafe(Info, User); - else if (HLOpcodeGroup::HLIntrinsic == group) { - for (unsigned OpIdx = 0; OpIdx < CI->getNumArgOperands(); OpIdx++) { - if (CI->getArgOperand(OpIdx) == I && isUDTIntrinsicArg(CI, OpIdx)) - return MarkUnsafe(Info, User); - } - } + else if (HLOpcodeGroup::HLIntrinsic == group && + isUDTIntrinsicArg(CI, U.getOperandNo())) + return MarkUnsafe(Info, User); } else { return MarkUnsafe(Info, User); } @@ -2714,10 +2713,9 @@ static void memcpyAggCallArg(CallInst *CI, unsigned ArgIdx, bool CopyIn, Type *userTyElt = userTy->getElementType(); Value *Alloca = AllocaBuilder.CreateAlloca(userTyElt); IRBuilder<> Builder(CI); - if (CopyIn) { + if (CopyIn) Builder.CreateMemCpy(Alloca, userTyV, DL.getTypeAllocSize(userTyElt), false); - } CI->setArgOperand(ArgIdx, Alloca); if (CopyOut) { Builder.SetInsertPoint(CI->getNextNode()); From c1976792a137c5c4b121b00a16c1761d771ad1a7 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Mon, 12 May 2025 20:07:38 -0700 Subject: [PATCH 08/16] Add Trace and Invoke SROA pass test --- .../hitobject_traceinvoke_scalarrepl.ll | 199 ++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll new file mode 100644 index 0000000000..bf3394936b --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll @@ -0,0 +1,199 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; COM: Based on tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl + +; CHECK: %[[HITOBJ:[^ ]+]] = alloca %dx.types.HitObject, align 4 + +; COM: Init RayDesc. +; CHECK-DAG: store <3 x float> , <3 x float>* %[[ORIGIN_P0:[^ ]+]], align 4 +; CHECK-DAG: store float 3.000000e+00, float* %[[TMIN_P0:[^ ]+]], align 4 +; CHECK-DAG: store <3 x float> , <3 x float>* %[[DIRECTION_P0:[^ ]+]], align 4 +; CHECK-DAG: store float 7.000000e+00, float* %[[TMAX_P0:[^ ]+]], align 4 + +; CHECK-DAG: %[[RTAS:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + +; COM: Copy RayDesc. +; CHECK-DAG: %[[ORIGIN_L0:[^ ]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P0]] +; CHECK-DAG: store <3 x float> %[[ORIGIN_L0]], <3 x float>* %[[ORIGIN_P1:[^ ]+]] +; CHECK-DAG: %[[TMIN_L0:[^ ]+]] = load float, float* %[[TMIN_P0]] +; CHECK-DAG: store float %[[TMIN_L0]], float* %[[TMIN_P1:[^ ]+]] +; CHECK-DAG: %[[DIRECTION_L0:[^ ]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] +; CHECK-DAG: store <3 x float> %[[DIRECTION_L0]], <3 x float>* %[[DIRECTION_P1:[^ ]+]] +; CHECK-DAG: %[[TMAX_L0:[^ ]+]] = load float, float* %[[TMAX_P0]] +; CHECK-DAG: store float %[[TMAX_L0]], float* %[[TMAX_P1:[^ ]+]] + +; COM: Load RayDesc. +; CHECK-DAG: %[[ORIGIN_L1:[^ ]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P1]] +; CHECK-DAG: %[[TMIN_L1:[^ ]+]] = load float, float* %[[TMIN_P1]] +; CHECK-DAG: %[[DIRECTION_L1:[^ ]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P1]] +; CHECK-DAG: %[[TMAX_L1:[^ ]+]] = load float, float* %[[TMAX_P1]] + +; COM: RayDesc is scalar replaced in HL op for dx::HitObject::TraceRay. +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 389, %dx.types.HitObject* %[[HITOBJ]], %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, <3 x float> %[[ORIGIN_L1]], float %[[TMIN_L1]], <3 x float> %[[DIRECTION_L1]], float %[[TMAX_L1]], %struct.Payload* %[[PLD_P0:[^ ]+]]) + +; COM: Copy payload. +; CHECK: %[[GEP_PLD_P0:[^ ]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 0 +; CHECK: %[[PLD_L0:[^ ]+]] = load <3 x float>, <3 x float>* %[[GEP_PLD_P0]] +; CHECK: store <3 x float> %[[PLD_L0]], <3 x float>* %[[PLD_M0_P0:[^ ]+]] +; CHECK: %[[GEP_PLD_P1:[^ ]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P1:[^ ]+]], i32 0, i32 0 +; CHECK: [[PLD_L1:[^ ]+]] = load <3 x float>, <3 x float>* %[[PLD_M0_P0]] +; CHECK: store <3 x float> [[PLD_L1]], <3 x float>* %[[GEP_PLD_P1]] + +; COM: dx::HitObject::Invoke +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %[[HITOBJ]], %struct.Payload* %[[PLD_P1]]) + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"class.RWStructuredBuffer" = type { float } +%ConstantBuffer = type opaque +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%struct.Payload = type { <3 x float> } +%dx.types.HitObject = type { i8* } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.dx::HitObject" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?UAV@@3V?$RWStructuredBuffer@M@@A" = external global %"class.RWStructuredBuffer", align 4 +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %rayDesc = alloca %struct.RayDesc, align 4 + %pld = alloca %struct.Payload, align 4 + %hit = alloca %dx.types.HitObject, align 4 + %0 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !37 ; line:82 col:3 + call void @llvm.lifetime.start(i64 32, i8* %0) #0, !dbg !37 ; line:82 col:3 + + ; COM: Init RayDesc. + %Origin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 0, !dbg !41 ; line:83 col:11 + store <3 x float> , <3 x float>* %Origin, align 4, !dbg !42, !tbaa !43 ; line:83 col:18 + %TMin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 1, !dbg !46 ; line:84 col:11 + store float 3.000000e+00, float* %TMin, align 4, !dbg !47, !tbaa !48 ; line:84 col:16 + %Direction = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 2, !dbg !50 ; line:85 col:11 + store <3 x float> , <3 x float>* %Direction, align 4, !dbg !51, !tbaa !43 ; line:85 col:21 + %TMax = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 3, !dbg !52 ; line:86 col:11 + store float 7.000000e+00, float* %TMax, align 4, !dbg !53, !tbaa !48 ; line:86 col:16 + + %1 = bitcast %struct.Payload* %pld to i8*, !dbg !54 ; line:88 col:3 + call void @llvm.lifetime.start(i64 12, i8* %1) #0, !dbg !54 ; line:88 col:3 + %dummy = getelementptr inbounds %struct.Payload, %struct.Payload* %pld, i32 0, i32 0, !dbg !55 ; line:89 col:7 + store <3 x float> , <3 x float>* %dummy, align 4, !dbg !56, !tbaa !43 ; line:89 col:13 + %2 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !57 ; line:91 col:3 + call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !57 ; line:91 col:3 + %3 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !58 ; line:91 col:23 + %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %3), !dbg !58 ; line:91 col:23 + %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !58 ; line:91 col:23 + + ; COM: dx::HitObject::TraceRay + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 389, %dx.types.HitObject* %hit, %dx.types.Handle %5, i32 513, i32 1, i32 2, i32 4, i32 0, %struct.RayDesc* %rayDesc, %struct.Payload* %pld), !dbg !58 ; line:91 col:23 + + ; COM: dx::HitObject::Invoke + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %hit, %struct.Payload* %pld), !dbg !59 ; line:101 col:3 + + %6 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !60 ; line:102 col:1 + call void @llvm.lifetime.end(i64 4, i8* %6) #0, !dbg !60 ; line:102 col:1 + %7 = bitcast %struct.Payload* %pld to i8*, !dbg !60 ; line:102 col:1 + call void @llvm.lifetime.end(i64 12, i8* %7) #0, !dbg !60 ; line:102 col:1 + %8 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !60 ; line:102 col:1 + call void @llvm.lifetime.end(i64 32, i8* %8) #0, !dbg !60 ; line:102 col:1 + ret void, !dbg !60 ; line:102 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32, %dx.types.HitObject*, %struct.Payload*) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!34} +!dx.options = !{!35, !36} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4928 (ser_hlslattributes_patch, 937c16cc6)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer" undef, !6, %struct.RayDesc undef, !11, %struct.Payload undef, !16, %"class.dx::HitObject" undef, !18} +!6 = !{i32 4, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, float undef} +!11 = !{i32 32, !12, !13, !14, !15} +!12 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!13 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!14 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!15 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!16 = !{i32 12, !17} +!17 = !{i32 6, !"dummy", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!18 = !{i32 4, !19} +!19 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!20 = !{i32 1, void ()* @"\01?main@@YAXXZ", !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{null, !"", null, !25, null} +!25 = !{!26, !29, !32, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{!30} +!30 = !{i32 0, %"class.RWStructuredBuffer"* @"\01?UAV@@3V?$RWStructuredBuffer@M@@A", !"UAV", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !31} +!31 = !{i32 1, i32 4} +!32 = !{!33} +!33 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!34 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!35 = !{i32 -2147483584} +!36 = !{i32 -1} +!37 = !DILocation(line: 82, column: 3, scope: !38) +!38 = !DISubprogram(name: "main", scope: !39, file: !39, line: 81, type: !40, isLocal: false, isDefinition: true, scopeLine: 81, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!39 = !DIFile(filename: "D:\5Cgit\5Cdxc\5Cmain\5Ctools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cobjects\5CHitObject\5Chitobject_traceinvoke.hlsl", directory: "") +!40 = !DISubroutineType(types: !23) +!41 = !DILocation(line: 83, column: 11, scope: !38) +!42 = !DILocation(line: 83, column: 18, scope: !38) +!43 = !{!44, !44, i64 0} +!44 = !{!"omnipotent char", !45, i64 0} +!45 = !{!"Simple C/C++ TBAA"} +!46 = !DILocation(line: 84, column: 11, scope: !38) +!47 = !DILocation(line: 84, column: 16, scope: !38) +!48 = !{!49, !49, i64 0} +!49 = !{!"float", !44, i64 0} +!50 = !DILocation(line: 85, column: 11, scope: !38) +!51 = !DILocation(line: 85, column: 21, scope: !38) +!52 = !DILocation(line: 86, column: 11, scope: !38) +!53 = !DILocation(line: 86, column: 16, scope: !38) +!54 = !DILocation(line: 88, column: 3, scope: !38) +!55 = !DILocation(line: 89, column: 7, scope: !38) +!56 = !DILocation(line: 89, column: 13, scope: !38) +!57 = !DILocation(line: 91, column: 3, scope: !38) +!58 = !DILocation(line: 91, column: 23, scope: !38) +!59 = !DILocation(line: 101, column: 3, scope: !38) +!60 = !DILocation(line: 102, column: 1, scope: !38) From b5bdcc707d1d15bf101b4bf2a79dc3cd44682623 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Mon, 12 May 2025 21:35:50 -0700 Subject: [PATCH 09/16] Add TraceRay SROA pass test --- .../ScalarReplHLSL/traceray_scalarrepl.ll | 180 ++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll new file mode 100644 index 0000000000..5c256f9af7 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll @@ -0,0 +1,180 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; CHECK: define <4 x float> @" +; CHECK-SAME: ?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z"(<2 x float>* noalias dereferenceable(8) %f2, %struct.RayDesc* %Ray, %struct.Payload* noalias %p) + +; Copy Payload fields (PLD_F0, PLD_F1) to local allocas: +; CHECK: %[[GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %p, i32 0, i32 0 +; CHECK: %[[LOAD:[^ ,]+]] = load <2 x float>, <2 x float>* %[[GEP]] +; CHECK: store <2 x float> %[[LOAD]], <2 x float>* %[[PLD_F0:[^ ,]+]] +; CHECK: %[[GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %p, i32 0, i32 1 +; CHECK: %[[LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[GEP]] +; CHECK: store <3 x i32> %[[LOAD]], <3 x i32>* %[[PLD_F1:[^ ,]+]] + +; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %21, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + +; Copy RayDesc fields (Origin, TMin, Direction, TMax) to local allocas: +; CHECK: %[[RAY_ORIGIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 0 +; CHECK: %[[RAY_ORIGIN_LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RAY_ORIGIN_GEP]] +; CHECK: store <3 x float> %[[RAY_ORIGIN_LOAD]], <3 x float>* %[[RAY_ORIGIN_P0:[^ ,]+]] +; CHECK: %[[TMIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 1 +; CHECK: %[[TMIN_LOAD:[^ ,]+]] = load float, float* %[[TMIN_GEP]] +; CHECK: store float %[[TMIN_LOAD]], float* %[[TMIN_P0:[^ ,]+]] +; CHECK: %[[DIRECTION_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 2 +; CHECK: %[[DIRECTION_LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_GEP]] +; CHECK: store <3 x float> %[[DIRECTION_LOAD]], <3 x float>* %[[DIRECTION_P0:[^ ,]+]] +; CHECK: %[[TMAX_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 3 +; CHECK: %[[TMAX_LOAD:[^ ,]+]] = load float, float* %[[TMAX_GEP]] +; CHECK: store float %[[TMAX_LOAD]], float* %[[TMAX_P0:[^ ,]+]] + +; COM: Copy Payload fields into payload struct for call: +; CHECK: %[[PLD_F0_GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0:[^ ,]+]], i32 0, i32 0 +; CHECK: %[[PLD_F0_LOAD:[^ ,]+]] = load <2 x float>, <2 x float>* %[[PLD_F0]] +; CHECK: store <2 x float> %[[PLD_F0_LOAD]], <2 x float>* %[[PLD_F0_GEP]] +; CHECK: %[[PLD_F1_GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 1 +; CHECK: %[[PLD_F1_LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[PLD_F1]] +; CHECK: store <3 x i32> %[[PLD_F1_LOAD]], <3 x i32>* %[[PLD_F1_GEP]] + +; COM: Load RayDesc fields: +; CHECK: %[[RAY_ORIGIN_LOAD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RAY_ORIGIN_P0]] +; CHECK: %[[TMIN_LOAD2:[^ ,]+]] = load float, float* %[[TMIN_P0]] +; CHECK: %[[DIRECTION_LOAD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] +; CHECK: %[[TMAX_LOAD2:[^ ,]+]] = load float, float* %[[TMAX_P0]] + +; call TraceRay with the local allocas: +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 69, %dx.types.Handle %[[RTAS]], i32 %19, i32 %18, i32 %17, i32 %16, i32 %15, <3 x float> %[[RAY_ORIGIN_LOAD2]], float %[[TMIN_LOAD2]], <3 x float> %[[DIRECTION_LOAD2]], float %[[TMAX_LOAD2]], %struct.Payload* %[[PLD_P0]]) + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"$Globals" = type { i32, i32, i32, i32, i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%struct.Payload = type { <2 x float>, <3 x i32> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?Acc@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?RayFlags@@3IB" = external constant i32, align 4 +@"\01?InstanceInclusionMask@@3IB" = external constant i32, align 4 +@"\01?RayContributionToHitGroupIndex@@3IB" = external constant i32, align 4 +@"\01?MultiplierForGeometryContributionToHitGroupIndex@@3IB" = external constant i32, align 4 +@"\01?MissShaderIndex@@3IB" = external constant i32, align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define <4 x float> @"\01?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z"(<2 x float>* noalias dereferenceable(8) %f2, %struct.RayDesc* %Ray, %struct.Payload* noalias %p) #0 { +entry: + %0 = alloca %struct.RayDesc, !dbg !39 ; line:22 col:61 + %1 = bitcast %struct.RayDesc* %0 to i8*, !dbg !39 ; line:22 col:61 + %2 = bitcast %struct.RayDesc* %Ray to i8*, !dbg !39 ; line:22 col:61 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 32, i32 1, i1 false), !dbg !39 ; line:22 col:61 + %3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0), !dbg !39 ; line:22 col:61 + %4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 13, i32 20 }, %"$Globals" undef), !dbg !39 ; line:22 col:61 + %5 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %4, i32 0), !dbg !39 ; line:22 col:61 + %6 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 0, !dbg !39 ; line:22 col:61 + %7 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 1, !dbg !39 ; line:22 col:61 + %8 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 2, !dbg !39 ; line:22 col:61 + %9 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 3, !dbg !39 ; line:22 col:61 + %10 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 4, !dbg !39 ; line:22 col:61 + %11 = load i32, i32* %10, align 4, !dbg !39, !tbaa !43 ; line:22 col:61 + %12 = load i32, i32* %9, align 4, !dbg !47, !tbaa !43 ; line:22 col:12 + %13 = load i32, i32* %8, align 4, !dbg !48, !tbaa !43 ; line:21 col:12 + %14 = load i32, i32* %7, align 4, !dbg !49, !tbaa !43 ; line:20 col:25 + %15 = load i32, i32* %6, align 4, !dbg !50, !tbaa !43 ; line:20 col:16 + %16 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?Acc@@3URaytracingAccelerationStructure@@A", !dbg !51 ; line:20 col:3 + %17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %16), !dbg !51 ; line:20 col:3 + %18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %17, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !51 ; line:20 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 69, %dx.types.Handle %18, i32 %15, i32 %14, i32 %13, i32 %12, i32 %11, %struct.RayDesc* %0, %struct.Payload* %p), !dbg !51 ; line:20 col:3 + ret <4 x float> , !dbg !52 ; line:24 col:4 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !21} +!dx.entryPoints = !{!30} +!dx.fnprops = !{} +!dx.options = !{!37, !38} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4928 (ser_hlslattributes_patch, 937c16cc6)"} +!3 = !{i32 1, i32 3} +!4 = !{i32 1, i32 9} +!5 = !{!"lib", i32 6, i32 3} +!6 = !{i32 0, %struct.RayDesc undef, !7, %struct.Payload undef, !12, %"$Globals" undef, !15} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 28, !13, !14} +!13 = !{i32 6, !"t", i32 3, i32 0, i32 7, i32 9} +!14 = !{i32 6, !"t2", i32 3, i32 16, i32 7, i32 4} +!15 = !{i32 20, !16, !17, !18, !19, !20} +!16 = !{i32 6, !"RayFlags", i32 3, i32 0, i32 7, i32 5} +!17 = !{i32 6, !"InstanceInclusionMask", i32 3, i32 4, i32 7, i32 5} +!18 = !{i32 6, !"RayContributionToHitGroupIndex", i32 3, i32 8, i32 7, i32 5} +!19 = !{i32 6, !"MultiplierForGeometryContributionToHitGroupIndex", i32 3, i32 12, i32 7, i32 5} +!20 = !{i32 6, !"MissShaderIndex", i32 3, i32 16, i32 7, i32 5} +!21 = !{i32 1, <4 x float> (<2 x float>*, %struct.RayDesc*, %struct.Payload*)* @"\01?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z", !22} +!22 = !{!23, !26, !27, !29} +!23 = !{i32 1, !24, !25} +!24 = !{i32 7, i32 9} +!25 = !{} +!26 = !{i32 2, !24, !25} +!27 = !{i32 0, !28, !25} +!28 = !{i32 4, !"R"} +!29 = !{i32 2, !25, !25} +!30 = !{null, !"", null, !31, null} +!31 = !{!32, null, !35, null} +!32 = !{!33} +!33 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?Acc@@3URaytracingAccelerationStructure@@A", !"Acc", i32 -1, i32 -1, i32 1, i32 16, i32 0, !34} +!34 = !{i32 0, i32 4} +!35 = !{!36} +!36 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 20, null} +!37 = !{i32 -2147483584} +!38 = !{i32 11} +!39 = !DILocation(line: 22, column: 61, scope: !40) +!40 = !DISubprogram(name: "emit", scope: !41, file: !41, line: 19, type: !42, isLocal: false, isDefinition: true, scopeLine: 19, flags: DIFlagPrototyped, isOptimized: false, function: <4 x float> (<2 x float>*, %struct.RayDesc*, %struct.Payload*)* @"\01?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z") +!41 = !DIFile(filename: "D:\5Cgit\5Cdxc\5Cmain\5Ctools\5Cclang\5Ctest\5CHLSLFileCheck\5Cshader_targets\5Craytracing\5Craytracing_traceray.hlsl", directory: "") +!42 = !DISubroutineType(types: !25) +!43 = !{!44, !44, i64 0} +!44 = !{!"int", !45, i64 0} +!45 = !{!"omnipotent char", !46, i64 0} +!46 = !{!"Simple C/C++ TBAA"} +!47 = !DILocation(line: 22, column: 12, scope: !40) +!48 = !DILocation(line: 21, column: 12, scope: !40) +!49 = !DILocation(line: 20, column: 25, scope: !40) +!50 = !DILocation(line: 20, column: 16, scope: !40) +!51 = !DILocation(line: 20, column: 3, scope: !40) +!52 = !DILocation(line: 24, column: 4, scope: !40) From e6b8799aa986d5de17fc42becb3e2139790d6ede Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 13 May 2025 20:25:51 -0700 Subject: [PATCH 10/16] Add TraceRayInline tests, including for RayDesc from cbuffer --- .../hlsl/objects/RayQuery/tracerayinline.hlsl | 0 .../RayQuery/tracerayinline_cb_raydesc.hlsl | 14 ++ .../tracerayinline_cb_raydesc_dxilgen.ll | 157 ++++++++++++++++++ .../Passes/DxilGen/tracerayinline_dxilgen.ll | 132 +++++++++++++++ .../tracerayinline_cb_raydesc_scalarrepl.ll | 144 ++++++++++++++++ .../tracerayinline_scalarrepl.hlsl | 143 ++++++++++++++++ .../tracerayinline_scalarrepl.ll | 152 +++++++++++++++++ 7 files changed, 742 insertions(+) rename tools/clang/test/{HLSLFileCheck => CodeGenDXIL}/hlsl/objects/RayQuery/tracerayinline.hlsl (100%) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl create mode 100644 tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll create mode 100644 tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.hlsl create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/tracerayinline.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl similarity index 100% rename from tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/tracerayinline.hlsl rename to tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl new file mode 100644 index 0000000000..256b6a04e8 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s + +// CHECK-DAG: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false) +// CHECK-DAG: %[[RQ:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) +// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, + +RaytracingAccelerationStructure RTAS; + +RayDesc rayDesc; + +void main() { + RayQuery rayQuery; + rayQuery.TraceRayInline(RTAS, 1, 2, rayDesc); +} diff --git a/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll new file mode 100644 index 0000000000..18acc28ddb --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll @@ -0,0 +1,157 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl + +; Capture CB, RTAS, and RayQuery +; CHECK-DAG: %[[CB:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %"$Globals", %dx.types.ResourceProperties { i32 13, i32 32 }) +; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) +; CHECK-DAG: %[[RQ:[^ ,]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) + +; Load RayDesc.Origin +; CHECK: %[[ORIG_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 0) +; CHECK: %[[ORIG_EX0:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 0 +; CHECK: %[[ORIG_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[ORIG_EX0]], i64 0 +; CHECK: %[[ORIG_EX1:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 1 +; CHECK: %[[ORIG_VXY:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VX]], float %[[ORIG_EX1]], i64 1 +; CHECK: %[[ORIG_EX2:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 2 +; CHECK: %[[ORIG_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VXY]], float %[[ORIG_EX2]], i64 2 + +; Load RayDesc.TMin +; CHECK: %[[TMIN_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 0) +; CHECK: %[[TMIN:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[TMIN_CB_LD]], 3 + +; Load RayDesc.Direction +; CHECK: %[[DIR_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 1) +; CHECK: %[[DIR_EX0:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 0 +; CHECK: %[[DIR_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[DIR_EX0]], i64 0 +; CHECK: %[[DIR_EX1:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 1 +; CHECK: %[[DIR_VXY:[^ ,]+]] = insertelement <3 x float> %[[DIR_VX]], float %[[DIR_EX1]], i64 1 +; CHECK: %[[DIR_EX2:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 2 +; CHECK: %[[DIR_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[DIR_VXY]], float %[[DIR_EX2]], i64 2 + +; Load RayDesc.TMax +; CHECK: %21 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 1) +; CHECK: %22 = extractvalue %dx.types.CBufRet.f32 %21, 3 + +; Extract RayDesc vector fields +; CHECK: %[[ORIGX:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 0 +; CHECK: %[[ORIGY:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 1 +; CHECK: %[[ORIGZ:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 2 +; CHECK: %[[DIRX:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 0 +; CHECK: %[[DIRY:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 1 +; CHECK: %[[DIRZ:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 2 + +; Call TraceRayInline +; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, float %[[ORIGX]], float %[[ORIGY]], float %[[ORIGZ]], float %[[TMIN]], float %[[DIRX]], float %[[DIRY]], float %[[DIRZ]], float %22) + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"$Globals" = type { %struct.RayDesc } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RayQuery<513, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define void @main() #0 { +entry: + %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) + %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) + %3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0 + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !34 ; line:12 col:71 + %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !38 ; line:13 col:3 + %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !38 ; line:13 col:3 + %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !38 ; line:13 col:3 + %7 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 0, !dbg !38 ; line:13 col:3 + %8 = load <3 x float>, <3 x float>* %7, !dbg !38 ; line:13 col:3 + %9 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 1, !dbg !38 ; line:13 col:3 + %10 = load float, float* %9, !dbg !38 ; line:13 col:3 + %11 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 2, !dbg !38 ; line:13 col:3 + %12 = load <3 x float>, <3 x float>* %11, !dbg !38 ; line:13 col:3 + %13 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 3, !dbg !38 ; line:13 col:3 + %14 = load float, float* %13, !dbg !38 ; line:13 col:3 + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery1, %dx.types.Handle %6, i32 1, i32 2, <3 x float> %8, float %10, <3 x float> %12, float %14), !dbg !38 ; line:13 col:3 + ret void, !dbg !39 ; line:14 col:1 +} + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!31} +!dx.options = !{!32, !33} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12, %"$Globals" undef, !18} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 32, !19} +!19 = !{i32 6, !"rayDesc", i32 3, i32 0} +!20 = !{i32 1, void ()* @main, !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{void ()* @main, !"main", null, !25, null} +!25 = !{!26, null, !29, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{!30} +!30 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 32, null} +!31 = !{void ()* @main, i32 1} +!32 = !{i32 64} +!33 = !{i32 -1} +!34 = !DILocation(line: 12, column: 71, scope: !35) +!35 = !DISubprogram(name: "main", scope: !36, file: !36, line: 11, type: !37, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @main) +!36 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl", directory: "") +!37 = !DISubroutineType(types: !23) +!38 = !DILocation(line: 13, column: 3, scope: !35) +!39 = !DILocation(line: 14, column: 1, scope: !35) diff --git a/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll new file mode 100644 index 0000000000..6ed69a1a25 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll @@ -0,0 +1,132 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl, +; with call to DoTrace commented out. + +; Load RayDesc fields from input +; CHECK-DAG: %[[ORIGX_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) +; CHECK-DAG: %[[ORIGY_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef) +; CHECK-DAG: %[[ORIGZ_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef) +; CHECK-DAG: %[[TMIN:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef) +; CHECK-DAG: %[[DIRX_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 undef) +; CHECK-DAG: %[[DIRY_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 undef) +; CHECK-DAG: %[[DIRZ_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 undef) +; CHECK-DAG: %[[TMAX:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 0, i32 undef) +; CHECK-DAG: %[[ORIG_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[ORIGX_LI]], i64 0 +; CHECK-DAG: %[[ORIG_VXY:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VX]], float %[[ORIGY_LI]], i64 1 +; CHECK-DAG: %[[ORIG_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VXY]], float %[[ORIGZ_LI]], i64 2 +; CHECK-DAG: %[[DIR_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[DIRX_LI]], i64 0 +; CHECK-DAG: %[[DIR_VXY:[^ ,]+]] = insertelement <3 x float> %[[DIR_VX]], float %[[DIRY_LI]], i64 1 +; CHECK-DAG: %[[DIR_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[DIR_VXY]], float %[[DIRZ_LI]], i64 2 + +; Capture RayQuery and RTAS +; CHECK-DAG: %[[RQ:[^ ,]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) +; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) + +; Extract RayDesc vector fields +; CHECK-DAG: %[[ORIGX:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 0 +; CHECK-DAG: %[[ORIGY:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 1 +; CHECK-DAG: %[[ORIGZ:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 2 +; CHECK-DAG: %[[DIRX:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 0 +; CHECK-DAG: %[[DIRY:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 1 +; CHECK-DAG: %[[DIRZ:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 2 + +; Call TraceRayInline +; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, float %[[ORIGX]], float %[[ORIGY]], float %[[ORIGZ]], float %[[TMIN]], float %[[DIRX]], float %[[DIRY]], float %[[DIRZ]], float %[[TMAX]]) + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.RayQuery<513, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #0 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #1 + +; Function Attrs: nounwind +define void @main(float* noalias, <3 x float>, float, <3 x float>, float) #1 { +entry: + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !41 ; line:15 col:71 + %5 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !45 ; line:17 col:3 + %6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %5), !dbg !45 ; line:17 col:3 + %7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !45 ; line:17 col:3 + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery1, %dx.types.Handle %7, i32 1, i32 2, <3 x float> %1, float %2, <3 x float> %3, float %4), !dbg !45 ; line:17 col:3 + store float 0.000000e+00, float* %0, !dbg !46 ; line:18 col:3 + ret void, !dbg !46 ; line:18 col:3 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #1 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !18} +!dx.entryPoints = !{!33} +!dx.fnprops = !{!38} +!dx.options = !{!39, !40} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 1, void (float*, <3 x float>, float, <3 x float>, float)* @main, !19} +!19 = !{!20, !22, !25, !27, !29, !31} +!20 = !{i32 0, !21, !21} +!21 = !{} +!22 = !{i32 1, !23, !24} +!23 = !{i32 4, !"OUT", i32 7, i32 9} +!24 = !{i32 0} +!25 = !{i32 0, !26, !24} +!26 = !{i32 4, !"RAYDESC", i32 7, i32 9} +!27 = !{i32 0, !26, !28} +!28 = !{i32 1} +!29 = !{i32 0, !26, !30} +!30 = !{i32 2} +!31 = !{i32 0, !26, !32} +!32 = !{i32 3} +!33 = !{void (float*, <3 x float>, float, <3 x float>, float)* @main, !"main", null, !34, null} +!34 = !{!35, null, null, null} +!35 = !{!36} +!36 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !37} +!37 = !{i32 0, i32 4} +!38 = !{void (float*, <3 x float>, float, <3 x float>, float)* @main, i32 1} +!39 = !{i32 64} +!40 = !{i32 -1} +!41 = !DILocation(line: 15, column: 71, scope: !42) +!42 = !DISubprogram(name: "main", scope: !43, file: !43, line: 14, type: !44, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: false, function: void (float*, <3 x float>, float, <3 x float>, float)* @main) +!43 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl", directory: "") +!44 = !DISubroutineType(types: !21) +!45 = !DILocation(line: 17, column: 3, scope: !42) +!46 = !DILocation(line: 18, column: 3, scope: !42) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll new file mode 100644 index 0000000000..b9c0125dc3 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll @@ -0,0 +1,144 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl + +; Capture CB, RayDesc ptr from CB, RTAS, and init RayQuery +; CHECK-DAG: %[[CB_H:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) +; CHECK-DAG: %[[CB_PTR:[^ ,]+]] = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %[[CB_H]], i32 0) +; CHECK-DAG: %[[RAYDESC_PTR:[^ ,]+]] = getelementptr inbounds %"$Globals", %"$Globals"* %[[CB_PTR]], i32 0, i32 0 +; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) +; CHECK-DAG: %[[RQ0:[^ ,]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) +; CHECK-DAG: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] + +; Load RayDesc fields from CB to local copy +; CHECK-DAG: %[[ORIG_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 0 +; CHECK-DAG: %[[ORIG_LD_CB:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIG_CBP]] +; CHECK-DAG: store <3 x float> %[[ORIG_LD_CB]], <3 x float>* %[[ORIG_P0:[^ ,]+]] +; CHECK-DAG: %[[TMIN_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 1 +; CHECK-DAG: %[[TMIN_LD_CB:[^ ,]+]] = load float, float* %[[TMIN_CBP]] +; CHECK-DAG: store float %[[TMIN_LD_CB]], float* %[[TMIN_P0:[^ ,]+]] +; CHECK-DAG: %[[DIR_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 2 +; CHECK-DAG: %[[DIR_LD_CB:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIR_CBP]] +; CHECK-DAG: store <3 x float> %[[DIR_LD_CB]], <3 x float>* %[[DIR_P0:[^ ,]+]] +; CHECK-DAG: %[[TMAX_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 3 +; CHECK-DAG: %[[TMAX_LD_CB:[^ ,]+]] = load float, float* %[[TMAX_CBP]] +; CHECK-DAG: store float %[[TMAX_LD_CB]], float* %[[TMAX_P0:[^ ,]+]] + +; Load RayDesc fields from local copy +; CHECK-DAG: %[[ORIG:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIG_P0]] +; CHECK-DAG: %[[TMIN:[^ ,]+]] = load float, float* %[[TMIN_P0]] +; CHECK-DAG: %[[DIR:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIR_P0]] +; CHECK-DAG: %[[TMAX:[^ ,]+]] = load float, float* %[[TMAX_P0]] +; CHECK-DAG: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] + +; Call TraceRayInline +; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, <3 x float> %[[ORIG]], float %[[TMIN]], <3 x float> %[[DIR]], float %[[TMAX]]) + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"$Globals" = type { %struct.RayDesc } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RayQuery<513, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?rayDesc@@3URayDesc@@B" = external constant %struct.RayDesc, align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define void @main() #0 { +entry: + %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) + %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) + %3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0 + %rayQuery = alloca %"class.RayQuery<513, 0>", align 4 + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !34 ; line:12 col:71 + %4 = getelementptr inbounds %"class.RayQuery<513, 0>", %"class.RayQuery<513, 0>"* %rayQuery, i32 0, i32 0, !dbg !34 ; line:12 col:71 + store i32 %rayQuery1, i32* %4, !dbg !34 ; line:12 col:71 + %5 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !38 ; line:13 col:3 + %6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %5), !dbg !38 ; line:13 col:3 + %7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !38 ; line:13 col:3 + call void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<513, 0>"* %rayQuery, %dx.types.Handle %7, i32 1, i32 2, %struct.RayDesc* %3), !dbg !38 ; line:13 col:3 + ret void, !dbg !39 ; line:14 col:1 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32, %"class.RayQuery<513, 0>"*, %dx.types.Handle, i32, i32, %struct.RayDesc*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!31} +!dx.options = !{!32, !33} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12, %"$Globals" undef, !18} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 32, !19} +!19 = !{i32 6, !"rayDesc", i32 3, i32 0} +!20 = !{i32 1, void ()* @main, !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{void ()* @main, !"main", null, !25, null} +!25 = !{!26, null, !29, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{!30} +!30 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 32, null} +!31 = !{void ()* @main, i32 1} +!32 = !{i32 64} +!33 = !{i32 -1} +!34 = !DILocation(line: 12, column: 71, scope: !35) +!35 = !DISubprogram(name: "main", scope: !36, file: !36, line: 11, type: !37, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @main) +!36 = !DIFile(filename: "/home/texr/git/dxc/main/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl", directory: "") +!37 = !DISubroutineType(types: !23) +!38 = !DILocation(line: 13, column: 3, scope: !35) +!39 = !DILocation(line: 14, column: 1, scope: !35) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.hlsl b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.hlsl new file mode 100644 index 0000000000..b42e4f8d70 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.hlsl @@ -0,0 +1,143 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; CHECK: define void @main(float* noalias, <3 x float>, float, <3 x float>, float) + +; Copy flattened RayDesc input to main function +; RayDesc fields: %1: Origin, %2: TMin, %3: Direction, %4: TMax +; CHECK: store float %4, float* %[[RD3_P0:[^ ,]+]] +; CHECK: store <3 x float> %3, <3 x float>* %[[RD2_P0:[^ ,]+]] +; CHECK: store float %2, float* %[[RD1_P0:[^ ,]+]] +; CHECK: store <3 x float> %1, <3 x float>* %[[RD0_P0:[^ ,]+]] + +; CHECK: %[[RQ0:[^ ]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) +; CHECK: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] + +; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P0]] +; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P1:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P0]] +; CHECK: store float %[[LOAD]], float* %[[RD1_P1:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P0]] +; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P1:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P0]] +; CHECK: store float %[[LOAD]], float* %[[RD3_P1:[^ ,]+]] + +; CHECK: %19 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %18, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + +; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P1]] +; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P2:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P1]] +; CHECK: store float %[[LOAD]], float* %[[RD1_P2:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P1]] +; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P2:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P1]] +; CHECK: store float %[[LOAD]], float* %[[RD3_P2:[^ ,]+]] +; CHECK: %[[RD0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P2]] +; CHECK: %[[RD1:[^ ,]+]] = load float, float* %[[RD1_P2]] +; CHECK: %[[RD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P2]] +; CHECK: %[[RD3:[^ ,]+]] = load float, float* %[[RD3_P2]] + +; CHECK: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] + +; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %19, i32 1, i32 2, <3 x float> %[[RD0]], float %[[RD1]], <3 x float> %[[RD2]], float %[[RD3]]) + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%ConstantBuffer = type opaque +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.RayQuery<513, 0>" = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define float @main(%struct.RayDesc* %rayDesc) #0 { +entry: + %0 = alloca %struct.RayDesc + %1 = bitcast %struct.RayDesc* %0 to i8* + %2 = bitcast %struct.RayDesc* %rayDesc to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 32, i32 1, i1 false) + %rayQuery = alloca %"class.RayQuery<513, 0>", align 4 + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !35 ; line:15 col:71 + %3 = getelementptr inbounds %"class.RayQuery<513, 0>", %"class.RayQuery<513, 0>"* %rayQuery, i32 0, i32 0, !dbg !35 ; line:15 col:71 + store i32 %rayQuery1, i32* %3, !dbg !35 ; line:15 col:71 + %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !39 ; line:17 col:3 + %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !39 ; line:17 col:3 + %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !39 ; line:17 col:3 + call void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<513, 0>"* %rayQuery, %dx.types.Handle %6, i32 1, i32 2, %struct.RayDesc* %0), !dbg !39 ; line:17 col:3 + ret float 0.000000e+00, !dbg !40 ; line:18 col:3 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32, %"class.RayQuery<513, 0>"*, %dx.types.Handle, i32, i32, %struct.RayDesc*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !18} +!dx.entryPoints = !{!25} +!dx.fnprops = !{!32} +!dx.options = !{!33, !34} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 1, float (%struct.RayDesc*)* @main, !19} +!19 = !{!20, !23} +!20 = !{i32 1, !21, !22} +!21 = !{i32 4, !"OUT", i32 7, i32 9} +!22 = !{} +!23 = !{i32 0, !24, !22} +!24 = !{i32 4, !"RAYDESC"} +!25 = !{float (%struct.RayDesc*)* @main, !"main", null, !26, null} +!26 = !{!27, null, !30, null} +!27 = !{!28} +!28 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !29} +!29 = !{i32 0, i32 4} +!30 = !{!31} +!31 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!32 = !{float (%struct.RayDesc*)* @main, i32 1} +!33 = !{i32 64} +!34 = !{i32 -1} +!35 = !DILocation(line: 15, column: 71, scope: !36) +!36 = !DISubprogram(name: "main", scope: !37, file: !37, line: 14, type: !38, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: false, function: float (%struct.RayDesc*)* @main) +!37 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/tracerayinline.hlsl", directory: "") +!38 = !DISubroutineType(types: !22) +!39 = !DILocation(line: 17, column: 3, scope: !36) +!40 = !DILocation(line: 18, column: 3, scope: !36) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll new file mode 100644 index 0000000000..4ed2504b53 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll @@ -0,0 +1,152 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl, +; with call to DoTrace commented out. + +; CHECK: define void @main(float* noalias, <3 x float>, float, <3 x float>, float) + +; Copy flattened RayDesc input to main function +; RayDesc fields: %1: Origin, %2: TMin, %3: Direction, %4: TMax +; CHECK: store float %4, float* %[[RD3_P0:[^ ,]+]] +; CHECK: store <3 x float> %3, <3 x float>* %[[RD2_P0:[^ ,]+]] +; CHECK: store float %2, float* %[[RD1_P0:[^ ,]+]] +; CHECK: store <3 x float> %1, <3 x float>* %[[RD0_P0:[^ ,]+]] + +; Copy RayDesc fields again +; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P0]] +; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P1:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P0]] +; CHECK: store float %[[LOAD]], float* %[[RD1_P1:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P0]] +; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P1:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P0]] +; CHECK: store float %[[LOAD]], float* %[[RD3_P1:[^ ,]+]] + +; Capture RayQuery ptr and RTAS handle +; CHECK: %[[RQ0:[^ ]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) +; CHECK: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] +; CHECK: %19 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %18, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + +; Copy RayDesc fields again +; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P1]] +; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P2:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P1]] +; CHECK: store float %[[LOAD]], float* %[[RD1_P2:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P1]] +; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P2:[^ ,]+]] +; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P1]] +; CHECK: store float %[[LOAD]], float* %[[RD3_P2:[^ ,]+]] + +; Load RayDesc fields for TraceRayInline +; CHECK: %[[RD0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P2]] +; CHECK: %[[RD1:[^ ,]+]] = load float, float* %[[RD1_P2]] +; CHECK: %[[RD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P2]] +; CHECK: %[[RD3:[^ ,]+]] = load float, float* %[[RD3_P2]] + +; Load RayQuery +; CHECK: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] + +; TraceRayInline call +; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %19, i32 1, i32 2, <3 x float> %[[RD0]], float %[[RD1]], <3 x float> %[[RD2]], float %[[RD3]]) + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%ConstantBuffer = type opaque +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.RayQuery<513, 0>" = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define float @main(%struct.RayDesc* %rayDesc) #0 { +entry: + %0 = alloca %struct.RayDesc + %1 = bitcast %struct.RayDesc* %0 to i8* + %2 = bitcast %struct.RayDesc* %rayDesc to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 32, i32 1, i1 false) + %rayQuery = alloca %"class.RayQuery<513, 0>", align 4 + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !35 ; line:15 col:71 + %3 = getelementptr inbounds %"class.RayQuery<513, 0>", %"class.RayQuery<513, 0>"* %rayQuery, i32 0, i32 0, !dbg !35 ; line:15 col:71 + store i32 %rayQuery1, i32* %3, !dbg !35 ; line:15 col:71 + %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !39 ; line:17 col:3 + %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !39 ; line:17 col:3 + %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !39 ; line:17 col:3 + call void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<513, 0>"* %rayQuery, %dx.types.Handle %6, i32 1, i32 2, %struct.RayDesc* %0), !dbg !39 ; line:17 col:3 + ret float 0.000000e+00, !dbg !40 ; line:18 col:3 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32, %"class.RayQuery<513, 0>"*, %dx.types.Handle, i32, i32, %struct.RayDesc*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !18} +!dx.entryPoints = !{!25} +!dx.fnprops = !{!32} +!dx.options = !{!33, !34} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 1, float (%struct.RayDesc*)* @main, !19} +!19 = !{!20, !23} +!20 = !{i32 1, !21, !22} +!21 = !{i32 4, !"OUT", i32 7, i32 9} +!22 = !{} +!23 = !{i32 0, !24, !22} +!24 = !{i32 4, !"RAYDESC"} +!25 = !{float (%struct.RayDesc*)* @main, !"main", null, !26, null} +!26 = !{!27, null, !30, null} +!27 = !{!28} +!28 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !29} +!29 = !{i32 0, i32 4} +!30 = !{!31} +!31 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!32 = !{float (%struct.RayDesc*)* @main, i32 1} +!33 = !{i32 64} +!34 = !{i32 -1} +!35 = !DILocation(line: 15, column: 71, scope: !36) +!36 = !DISubprogram(name: "main", scope: !37, file: !37, line: 14, type: !38, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: false, function: float (%struct.RayDesc*)* @main) +!37 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl", directory: "") +!38 = !DISubroutineType(types: !22) +!39 = !DILocation(line: 17, column: 3, scope: !36) +!40 = !DILocation(line: 18, column: 3, scope: !36) From 1a3064fa396e7054ec98368c40ed368f1bce5c6a Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 13 May 2025 20:45:24 -0700 Subject: [PATCH 11/16] Fix a couple missing variable substitutions --- .../DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll index 4ed2504b53..7fb918c299 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll @@ -25,7 +25,7 @@ ; Capture RayQuery ptr and RTAS handle ; CHECK: %[[RQ0:[^ ]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) ; CHECK: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] -; CHECK: %19 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %18, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) +; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) ; Copy RayDesc fields again ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P1]] @@ -47,7 +47,7 @@ ; CHECK: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] ; TraceRayInline call -; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %19, i32 1, i32 2, <3 x float> %[[RD0]], float %[[RD1]], <3 x float> %[[RD2]], float %[[RD3]]) +; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, <3 x float> %[[RD0]], float %[[RD1]], <3 x float> %[[RD2]], float %[[RD3]]) target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" target triple = "dxil-ms-dx" From 23b49a0632942fcaf39c0b8f89ba2d59498be8a1 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 13 May 2025 22:56:50 -0700 Subject: [PATCH 12/16] Remove accidental add of bad test file --- .../tracerayinline_scalarrepl.hlsl | 143 ------------------ 1 file changed, 143 deletions(-) delete mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.hlsl diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.hlsl b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.hlsl deleted file mode 100644 index b42e4f8d70..0000000000 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.hlsl +++ /dev/null @@ -1,143 +0,0 @@ -; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s - -; CHECK: define void @main(float* noalias, <3 x float>, float, <3 x float>, float) - -; Copy flattened RayDesc input to main function -; RayDesc fields: %1: Origin, %2: TMin, %3: Direction, %4: TMax -; CHECK: store float %4, float* %[[RD3_P0:[^ ,]+]] -; CHECK: store <3 x float> %3, <3 x float>* %[[RD2_P0:[^ ,]+]] -; CHECK: store float %2, float* %[[RD1_P0:[^ ,]+]] -; CHECK: store <3 x float> %1, <3 x float>* %[[RD0_P0:[^ ,]+]] - -; CHECK: %[[RQ0:[^ ]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) -; CHECK: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] - -; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P0]] -; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P1:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P0]] -; CHECK: store float %[[LOAD]], float* %[[RD1_P1:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P0]] -; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P1:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P0]] -; CHECK: store float %[[LOAD]], float* %[[RD3_P1:[^ ,]+]] - -; CHECK: %19 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %18, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) - -; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P1]] -; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P2:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P1]] -; CHECK: store float %[[LOAD]], float* %[[RD1_P2:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P1]] -; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P2:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P1]] -; CHECK: store float %[[LOAD]], float* %[[RD3_P2:[^ ,]+]] -; CHECK: %[[RD0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P2]] -; CHECK: %[[RD1:[^ ,]+]] = load float, float* %[[RD1_P2]] -; CHECK: %[[RD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P2]] -; CHECK: %[[RD3:[^ ,]+]] = load float, float* %[[RD3_P2]] - -; CHECK: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] - -; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %19, i32 1, i32 2, <3 x float> %[[RD0]], float %[[RD1]], <3 x float> %[[RD2]], float %[[RD3]]) - -target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" -target triple = "dxil-ms-dx" - -%struct.RaytracingAccelerationStructure = type { i32 } -%ConstantBuffer = type opaque -%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } -%"class.RayQuery<513, 0>" = type { i32 } -%dx.types.Handle = type { i8* } -%dx.types.ResourceProperties = type { i32, i32 } - -@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 -@"$Globals" = external constant %ConstantBuffer - -; Function Attrs: nounwind -define float @main(%struct.RayDesc* %rayDesc) #0 { -entry: - %0 = alloca %struct.RayDesc - %1 = bitcast %struct.RayDesc* %0 to i8* - %2 = bitcast %struct.RayDesc* %rayDesc to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 32, i32 1, i1 false) - %rayQuery = alloca %"class.RayQuery<513, 0>", align 4 - %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !35 ; line:15 col:71 - %3 = getelementptr inbounds %"class.RayQuery<513, 0>", %"class.RayQuery<513, 0>"* %rayQuery, i32 0, i32 0, !dbg !35 ; line:15 col:71 - store i32 %rayQuery1, i32* %3, !dbg !35 ; line:15 col:71 - %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !39 ; line:17 col:3 - %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !39 ; line:17 col:3 - %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !39 ; line:17 col:3 - call void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<513, 0>"* %rayQuery, %dx.types.Handle %6, i32 1, i32 2, %struct.RayDesc* %0), !dbg !39 ; line:17 col:3 - ret float 0.000000e+00, !dbg !40 ; line:18 col:3 -} - -; Function Attrs: nounwind -declare void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32, %"class.RayQuery<513, 0>"*, %dx.types.Handle, i32, i32, %struct.RayDesc*) #0 - -; Function Attrs: nounwind readnone -declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 - -; Function Attrs: nounwind readnone -declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 - -; Function Attrs: nounwind -declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 - -; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } - -!llvm.module.flags = !{!0} -!pauseresume = !{!1} -!llvm.ident = !{!2} -!dx.version = !{!3} -!dx.valver = !{!4} -!dx.shaderModel = !{!5} -!dx.typeAnnotations = !{!6, !18} -!dx.entryPoints = !{!25} -!dx.fnprops = !{!32} -!dx.options = !{!33, !34} - -!0 = !{i32 2, !"Debug Info Version", i32 3} -!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} -!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} -!3 = !{i32 1, i32 5} -!4 = !{i32 1, i32 9} -!5 = !{!"vs", i32 6, i32 5} -!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12} -!7 = !{i32 32, !8, !9, !10, !11} -!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} -!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} -!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} -!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} -!12 = !{i32 4, !13, !14} -!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} -!14 = !{i32 0, !15} -!15 = !{!16, !17} -!16 = !{i32 1, i64 513} -!17 = !{i32 1, i64 0} -!18 = !{i32 1, float (%struct.RayDesc*)* @main, !19} -!19 = !{!20, !23} -!20 = !{i32 1, !21, !22} -!21 = !{i32 4, !"OUT", i32 7, i32 9} -!22 = !{} -!23 = !{i32 0, !24, !22} -!24 = !{i32 4, !"RAYDESC"} -!25 = !{float (%struct.RayDesc*)* @main, !"main", null, !26, null} -!26 = !{!27, null, !30, null} -!27 = !{!28} -!28 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !29} -!29 = !{i32 0, i32 4} -!30 = !{!31} -!31 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} -!32 = !{float (%struct.RayDesc*)* @main, i32 1} -!33 = !{i32 64} -!34 = !{i32 -1} -!35 = !DILocation(line: 15, column: 71, scope: !36) -!36 = !DISubprogram(name: "main", scope: !37, file: !37, line: 14, type: !38, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: false, function: float (%struct.RayDesc*)* @main) -!37 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/tracerayinline.hlsl", directory: "") -!38 = !DISubroutineType(types: !22) -!39 = !DILocation(line: 17, column: 3, scope: !36) -!40 = !DILocation(line: 18, column: 3, scope: !36) From b986d7920bd2ef39bcd64d3195e26a714cfcb9a8 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 13 May 2025 23:00:51 -0700 Subject: [PATCH 13/16] Remove extra braces --- lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 0063cdcefb..d8746862bc 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -2760,12 +2760,11 @@ static void copyIntrinsicAggArgs(HLModule &HLM) { break; case IntrinsicOp::MOP_DxHitObject_FromRayQuery: if (CI->getNumArgOperands() == - HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) { + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) memcpyAggCallArg( CI, HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx, /*CopyIn*/ true, /*CopyOut*/ false); - } break; case IntrinsicOp::MOP_DxHitObject_MakeMiss: memcpyAggCallArg(CI, HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx, From 28d0100c3ae10ba810341109d211d0c661547df4 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 13 May 2025 23:16:29 -0700 Subject: [PATCH 14/16] test robustness, fix missing value captures, exclude comma --- .../hitobject_traceinvoke_scalarrepl.ll | 48 +++++++++---------- .../ScalarReplHLSL/traceray_scalarrepl.ll | 4 +- .../tracerayinline_cb_raydesc_scalarrepl.ll | 2 +- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll index bf3394936b..6110b4743e 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll @@ -2,41 +2,41 @@ ; COM: Based on tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl -; CHECK: %[[HITOBJ:[^ ]+]] = alloca %dx.types.HitObject, align 4 +; CHECK: %[[HITOBJ:[^ ,]+]] = alloca %dx.types.HitObject, align 4 ; COM: Init RayDesc. -; CHECK-DAG: store <3 x float> , <3 x float>* %[[ORIGIN_P0:[^ ]+]], align 4 -; CHECK-DAG: store float 3.000000e+00, float* %[[TMIN_P0:[^ ]+]], align 4 -; CHECK-DAG: store <3 x float> , <3 x float>* %[[DIRECTION_P0:[^ ]+]], align 4 -; CHECK-DAG: store float 7.000000e+00, float* %[[TMAX_P0:[^ ]+]], align 4 +; CHECK-DAG: store <3 x float> , <3 x float>* %[[ORIGIN_P0:[^ ,]+]], align 4 +; CHECK-DAG: store float 3.000000e+00, float* %[[TMIN_P0:[^ ,]+]], align 4 +; CHECK-DAG: store <3 x float> , <3 x float>* %[[DIRECTION_P0:[^ ,]+]], align 4 +; CHECK-DAG: store float 7.000000e+00, float* %[[TMAX_P0:[^ ,]+]], align 4 -; CHECK-DAG: %[[RTAS:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) +; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) ; COM: Copy RayDesc. -; CHECK-DAG: %[[ORIGIN_L0:[^ ]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P0]] -; CHECK-DAG: store <3 x float> %[[ORIGIN_L0]], <3 x float>* %[[ORIGIN_P1:[^ ]+]] -; CHECK-DAG: %[[TMIN_L0:[^ ]+]] = load float, float* %[[TMIN_P0]] -; CHECK-DAG: store float %[[TMIN_L0]], float* %[[TMIN_P1:[^ ]+]] -; CHECK-DAG: %[[DIRECTION_L0:[^ ]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] -; CHECK-DAG: store <3 x float> %[[DIRECTION_L0]], <3 x float>* %[[DIRECTION_P1:[^ ]+]] -; CHECK-DAG: %[[TMAX_L0:[^ ]+]] = load float, float* %[[TMAX_P0]] -; CHECK-DAG: store float %[[TMAX_L0]], float* %[[TMAX_P1:[^ ]+]] +; CHECK-DAG: %[[ORIGIN_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P0]] +; CHECK-DAG: store <3 x float> %[[ORIGIN_L0]], <3 x float>* %[[ORIGIN_P1:[^ ,]+]] +; CHECK-DAG: %[[TMIN_L0:[^ ,]+]] = load float, float* %[[TMIN_P0]] +; CHECK-DAG: store float %[[TMIN_L0]], float* %[[TMIN_P1:[^ ,]+]] +; CHECK-DAG: %[[DIRECTION_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] +; CHECK-DAG: store <3 x float> %[[DIRECTION_L0]], <3 x float>* %[[DIRECTION_P1:[^ ,]+]] +; CHECK-DAG: %[[TMAX_L0:[^ ,]+]] = load float, float* %[[TMAX_P0]] +; CHECK-DAG: store float %[[TMAX_L0]], float* %[[TMAX_P1:[^ ,]+]] ; COM: Load RayDesc. -; CHECK-DAG: %[[ORIGIN_L1:[^ ]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P1]] -; CHECK-DAG: %[[TMIN_L1:[^ ]+]] = load float, float* %[[TMIN_P1]] -; CHECK-DAG: %[[DIRECTION_L1:[^ ]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P1]] -; CHECK-DAG: %[[TMAX_L1:[^ ]+]] = load float, float* %[[TMAX_P1]] +; CHECK-DAG: %[[ORIGIN_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P1]] +; CHECK-DAG: %[[TMIN_L1:[^ ,]+]] = load float, float* %[[TMIN_P1]] +; CHECK-DAG: %[[DIRECTION_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P1]] +; CHECK-DAG: %[[TMAX_L1:[^ ,]+]] = load float, float* %[[TMAX_P1]] ; COM: RayDesc is scalar replaced in HL op for dx::HitObject::TraceRay. -; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 389, %dx.types.HitObject* %[[HITOBJ]], %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, <3 x float> %[[ORIGIN_L1]], float %[[TMIN_L1]], <3 x float> %[[DIRECTION_L1]], float %[[TMAX_L1]], %struct.Payload* %[[PLD_P0:[^ ]+]]) +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 389, %dx.types.HitObject* %[[HITOBJ]], %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, <3 x float> %[[ORIGIN_L1]], float %[[TMIN_L1]], <3 x float> %[[DIRECTION_L1]], float %[[TMAX_L1]], %struct.Payload* %[[PLD_P0:[^ ,]+]]) ; COM: Copy payload. -; CHECK: %[[GEP_PLD_P0:[^ ]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 0 -; CHECK: %[[PLD_L0:[^ ]+]] = load <3 x float>, <3 x float>* %[[GEP_PLD_P0]] -; CHECK: store <3 x float> %[[PLD_L0]], <3 x float>* %[[PLD_M0_P0:[^ ]+]] -; CHECK: %[[GEP_PLD_P1:[^ ]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P1:[^ ]+]], i32 0, i32 0 -; CHECK: [[PLD_L1:[^ ]+]] = load <3 x float>, <3 x float>* %[[PLD_M0_P0]] +; CHECK: %[[GEP_PLD_P0:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 0 +; CHECK: %[[PLD_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[GEP_PLD_P0]] +; CHECK: store <3 x float> %[[PLD_L0]], <3 x float>* %[[PLD_M0_P0:[^ ,]+]] +; CHECK: %[[GEP_PLD_P1:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P1:[^ ,]+]], i32 0, i32 0 +; CHECK: [[PLD_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[PLD_M0_P0]] ; CHECK: store <3 x float> [[PLD_L1]], <3 x float>* %[[GEP_PLD_P1]] ; COM: dx::HitObject::Invoke diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll index 5c256f9af7..9b767c0a78 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll @@ -11,7 +11,7 @@ ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[GEP]] ; CHECK: store <3 x i32> %[[LOAD]], <3 x i32>* %[[PLD_F1:[^ ,]+]] -; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %21, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) +; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+]}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) ; Copy RayDesc fields (Origin, TMin, Direction, TMax) to local allocas: ; CHECK: %[[RAY_ORIGIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 0 @@ -42,7 +42,7 @@ ; CHECK: %[[TMAX_LOAD2:[^ ,]+]] = load float, float* %[[TMAX_P0]] ; call TraceRay with the local allocas: -; CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 69, %dx.types.Handle %[[RTAS]], i32 %19, i32 %18, i32 %17, i32 %16, i32 %15, <3 x float> %[[RAY_ORIGIN_LOAD2]], float %[[TMIN_LOAD2]], <3 x float> %[[DIRECTION_LOAD2]], float %[[TMAX_LOAD2]], %struct.Payload* %[[PLD_P0]]) +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 69, %dx.types.Handle %[[RTAS]], i32 %{{[^ ,]+]}}, i32 %{{[^ ,]+]}}, i32 %{{[^ ,]+]}}, i32 %{{[^ ,]+]}}, i32 %{{[^ ,]+]}}, <3 x float> %[[RAY_ORIGIN_LOAD2]], float %[[TMIN_LOAD2]], <3 x float> %[[DIRECTION_LOAD2]], float %[[TMAX_LOAD2]], %struct.Payload* %[[PLD_P0]]) target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll index b9c0125dc3..f7e8030c8b 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll @@ -6,7 +6,7 @@ ; CHECK-DAG: %[[CB_H:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) ; CHECK-DAG: %[[CB_PTR:[^ ,]+]] = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %[[CB_H]], i32 0) ; CHECK-DAG: %[[RAYDESC_PTR:[^ ,]+]] = getelementptr inbounds %"$Globals", %"$Globals"* %[[CB_PTR]], i32 0, i32 0 -; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) +; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+]}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) ; CHECK-DAG: %[[RQ0:[^ ,]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) ; CHECK-DAG: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] From fc2b54f83fcfa3d3e284a1f80dd8188c09f99e69 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Wed, 14 May 2025 09:05:00 -0700 Subject: [PATCH 15/16] Fix bad CHECK expressions in tests --- .../test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll | 4 ++-- .../ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll index 9b767c0a78..33f90be816 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll @@ -11,7 +11,7 @@ ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[GEP]] ; CHECK: store <3 x i32> %[[LOAD]], <3 x i32>* %[[PLD_F1:[^ ,]+]] -; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+]}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) +; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) ; Copy RayDesc fields (Origin, TMin, Direction, TMax) to local allocas: ; CHECK: %[[RAY_ORIGIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 0 @@ -42,7 +42,7 @@ ; CHECK: %[[TMAX_LOAD2:[^ ,]+]] = load float, float* %[[TMAX_P0]] ; call TraceRay with the local allocas: -; CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 69, %dx.types.Handle %[[RTAS]], i32 %{{[^ ,]+]}}, i32 %{{[^ ,]+]}}, i32 %{{[^ ,]+]}}, i32 %{{[^ ,]+]}}, i32 %{{[^ ,]+]}}, <3 x float> %[[RAY_ORIGIN_LOAD2]], float %[[TMIN_LOAD2]], <3 x float> %[[DIRECTION_LOAD2]], float %[[TMAX_LOAD2]], %struct.Payload* %[[PLD_P0]]) +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 69, %dx.types.Handle %[[RTAS]], i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, <3 x float> %[[RAY_ORIGIN_LOAD2]], float %[[TMIN_LOAD2]], <3 x float> %[[DIRECTION_LOAD2]], float %[[TMAX_LOAD2]], %struct.Payload* %[[PLD_P0]]) target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll index f7e8030c8b..404eee8e77 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll @@ -6,7 +6,7 @@ ; CHECK-DAG: %[[CB_H:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) ; CHECK-DAG: %[[CB_PTR:[^ ,]+]] = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %[[CB_H]], i32 0) ; CHECK-DAG: %[[RAYDESC_PTR:[^ ,]+]] = getelementptr inbounds %"$Globals", %"$Globals"* %[[CB_PTR]], i32 0, i32 0 -; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+]}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) +; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) ; CHECK-DAG: %[[RQ0:[^ ,]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) ; CHECK-DAG: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] From a307db7011177c3f7715a62120b8463f7170b8ea Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Thu, 15 May 2025 00:07:13 -0700 Subject: [PATCH 16/16] feedback: interleave CHECKs and remove 'COM:' --- .../tracerayinline_cb_raydesc_dxilgen.ll | 87 +++++++++-------- .../Passes/DxilGen/tracerayinline_dxilgen.ll | 64 +++++++------ .../hitobject_traceinvoke_scalarrepl.ll | 89 +++++++++-------- .../ScalarReplHLSL/traceray_scalarrepl.ll | 92 +++++++++--------- .../tracerayinline_cb_raydesc_scalarrepl.ll | 76 ++++++++------- .../tracerayinline_scalarrepl.ll | 95 ++++++++++--------- 6 files changed, 261 insertions(+), 242 deletions(-) diff --git a/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll index 18acc28ddb..b969a63f12 100644 --- a/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll +++ b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll @@ -2,48 +2,6 @@ ; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl -; Capture CB, RTAS, and RayQuery -; CHECK-DAG: %[[CB:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %"$Globals", %dx.types.ResourceProperties { i32 13, i32 32 }) -; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) -; CHECK-DAG: %[[RQ:[^ ,]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) - -; Load RayDesc.Origin -; CHECK: %[[ORIG_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 0) -; CHECK: %[[ORIG_EX0:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 0 -; CHECK: %[[ORIG_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[ORIG_EX0]], i64 0 -; CHECK: %[[ORIG_EX1:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 1 -; CHECK: %[[ORIG_VXY:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VX]], float %[[ORIG_EX1]], i64 1 -; CHECK: %[[ORIG_EX2:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 2 -; CHECK: %[[ORIG_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VXY]], float %[[ORIG_EX2]], i64 2 - -; Load RayDesc.TMin -; CHECK: %[[TMIN_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 0) -; CHECK: %[[TMIN:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[TMIN_CB_LD]], 3 - -; Load RayDesc.Direction -; CHECK: %[[DIR_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 1) -; CHECK: %[[DIR_EX0:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 0 -; CHECK: %[[DIR_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[DIR_EX0]], i64 0 -; CHECK: %[[DIR_EX1:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 1 -; CHECK: %[[DIR_VXY:[^ ,]+]] = insertelement <3 x float> %[[DIR_VX]], float %[[DIR_EX1]], i64 1 -; CHECK: %[[DIR_EX2:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 2 -; CHECK: %[[DIR_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[DIR_VXY]], float %[[DIR_EX2]], i64 2 - -; Load RayDesc.TMax -; CHECK: %21 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 1) -; CHECK: %22 = extractvalue %dx.types.CBufRet.f32 %21, 3 - -; Extract RayDesc vector fields -; CHECK: %[[ORIGX:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 0 -; CHECK: %[[ORIGY:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 1 -; CHECK: %[[ORIGZ:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 2 -; CHECK: %[[DIRX:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 0 -; CHECK: %[[DIRY:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 1 -; CHECK: %[[DIRZ:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 2 - -; Call TraceRayInline -; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, float %[[ORIGX]], float %[[ORIGY]], float %[[ORIGZ]], float %[[TMIN]], float %[[DIRX]], float %[[DIRY]], float %[[DIRZ]], float %22) - target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" target triple = "dxil-ms-dx" @@ -60,6 +18,12 @@ target triple = "dxil-ms-dx" ; Function Attrs: nounwind define void @main() #0 { entry: + + ; Capture CB, RTAS, and RayQuery + ; CHECK-DAG: %[[CB:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %"$Globals", %dx.types.ResourceProperties { i32 13, i32 32 }) + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) + ; CHECK-DAG: %[[RQ:[^ ,]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) + %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) @@ -68,6 +32,41 @@ entry: %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !38 ; line:13 col:3 %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !38 ; line:13 col:3 %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !38 ; line:13 col:3 + + ; Load RayDesc.Origin + ; CHECK: %[[ORIG_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 0) + ; CHECK: %[[ORIG_EX0:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 0 + ; CHECK: %[[ORIG_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[ORIG_EX0]], i64 0 + ; CHECK: %[[ORIG_EX1:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 1 + ; CHECK: %[[ORIG_VXY:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VX]], float %[[ORIG_EX1]], i64 1 + ; CHECK: %[[ORIG_EX2:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 2 + ; CHECK: %[[ORIG_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VXY]], float %[[ORIG_EX2]], i64 2 + + ; Load RayDesc.TMin + ; CHECK: %[[TMIN_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 0) + ; CHECK: %[[TMIN:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[TMIN_CB_LD]], 3 + + ; Load RayDesc.Direction + ; CHECK: %[[DIR_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 1) + ; CHECK: %[[DIR_EX0:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 0 + ; CHECK: %[[DIR_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[DIR_EX0]], i64 0 + ; CHECK: %[[DIR_EX1:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 1 + ; CHECK: %[[DIR_VXY:[^ ,]+]] = insertelement <3 x float> %[[DIR_VX]], float %[[DIR_EX1]], i64 1 + ; CHECK: %[[DIR_EX2:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 2 + ; CHECK: %[[DIR_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[DIR_VXY]], float %[[DIR_EX2]], i64 2 + + ; Load RayDesc.TMax + ; CHECK: %21 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 1) + ; CHECK: %22 = extractvalue %dx.types.CBufRet.f32 %21, 3 + + ; Extract RayDesc vector fields + ; CHECK: %[[ORIGX:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 0 + ; CHECK: %[[ORIGY:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 1 + ; CHECK: %[[ORIGZ:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 2 + ; CHECK: %[[DIRX:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 0 + ; CHECK: %[[DIRY:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 1 + ; CHECK: %[[DIRZ:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 2 + %7 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 0, !dbg !38 ; line:13 col:3 %8 = load <3 x float>, <3 x float>* %7, !dbg !38 ; line:13 col:3 %9 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 1, !dbg !38 ; line:13 col:3 @@ -76,6 +75,10 @@ entry: %12 = load <3 x float>, <3 x float>* %11, !dbg !38 ; line:13 col:3 %13 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 3, !dbg !38 ; line:13 col:3 %14 = load float, float* %13, !dbg !38 ; line:13 col:3 + + ; Call TraceRayInline + ; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, float %[[ORIGX]], float %[[ORIGY]], float %[[ORIGZ]], float %[[TMIN]], float %[[DIRX]], float %[[DIRY]], float %[[DIRZ]], float %22) + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery1, %dx.types.Handle %6, i32 1, i32 2, <3 x float> %8, float %10, <3 x float> %12, float %14), !dbg !38 ; line:13 col:3 ret void, !dbg !39 ; line:14 col:1 } diff --git a/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll index 6ed69a1a25..0d97d8782d 100644 --- a/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll +++ b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll @@ -3,37 +3,6 @@ ; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl, ; with call to DoTrace commented out. -; Load RayDesc fields from input -; CHECK-DAG: %[[ORIGX_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) -; CHECK-DAG: %[[ORIGY_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef) -; CHECK-DAG: %[[ORIGZ_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef) -; CHECK-DAG: %[[TMIN:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef) -; CHECK-DAG: %[[DIRX_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 undef) -; CHECK-DAG: %[[DIRY_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 undef) -; CHECK-DAG: %[[DIRZ_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 undef) -; CHECK-DAG: %[[TMAX:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 0, i32 undef) -; CHECK-DAG: %[[ORIG_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[ORIGX_LI]], i64 0 -; CHECK-DAG: %[[ORIG_VXY:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VX]], float %[[ORIGY_LI]], i64 1 -; CHECK-DAG: %[[ORIG_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VXY]], float %[[ORIGZ_LI]], i64 2 -; CHECK-DAG: %[[DIR_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[DIRX_LI]], i64 0 -; CHECK-DAG: %[[DIR_VXY:[^ ,]+]] = insertelement <3 x float> %[[DIR_VX]], float %[[DIRY_LI]], i64 1 -; CHECK-DAG: %[[DIR_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[DIR_VXY]], float %[[DIRZ_LI]], i64 2 - -; Capture RayQuery and RTAS -; CHECK-DAG: %[[RQ:[^ ,]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) -; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) - -; Extract RayDesc vector fields -; CHECK-DAG: %[[ORIGX:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 0 -; CHECK-DAG: %[[ORIGY:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 1 -; CHECK-DAG: %[[ORIGZ:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 2 -; CHECK-DAG: %[[DIRX:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 0 -; CHECK-DAG: %[[DIRY:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 1 -; CHECK-DAG: %[[DIRZ:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 2 - -; Call TraceRayInline -; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, float %[[ORIGX]], float %[[ORIGY]], float %[[ORIGZ]], float %[[TMIN]], float %[[DIRX]], float %[[DIRY]], float %[[DIRZ]], float %[[TMAX]]) - target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" target triple = "dxil-ms-dx" @@ -57,10 +26,43 @@ declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #1 ; Function Attrs: nounwind define void @main(float* noalias, <3 x float>, float, <3 x float>, float) #1 { entry: + + ; Load RayDesc fields from input + ; CHECK-DAG: %[[ORIGX_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[ORIGY_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef) + ; CHECK-DAG: %[[ORIGZ_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef) + ; CHECK-DAG: %[[TMIN:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[DIRX_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[DIRY_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 undef) + ; CHECK-DAG: %[[DIRZ_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 undef) + ; CHECK-DAG: %[[TMAX:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[ORIG_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[ORIGX_LI]], i64 0 + ; CHECK-DAG: %[[ORIG_VXY:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VX]], float %[[ORIGY_LI]], i64 1 + ; CHECK-DAG: %[[ORIG_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VXY]], float %[[ORIGZ_LI]], i64 2 + ; CHECK-DAG: %[[DIR_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[DIRX_LI]], i64 0 + ; CHECK-DAG: %[[DIR_VXY:[^ ,]+]] = insertelement <3 x float> %[[DIR_VX]], float %[[DIRY_LI]], i64 1 + ; CHECK-DAG: %[[DIR_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[DIR_VXY]], float %[[DIRZ_LI]], i64 2 + + ; Capture RayQuery and RTAS + ; CHECK-DAG: %[[RQ:[^ ,]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !41 ; line:15 col:71 %5 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !45 ; line:17 col:3 %6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %5), !dbg !45 ; line:17 col:3 %7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !45 ; line:17 col:3 + + ; Extract RayDesc vector fields + ; CHECK-DAG: %[[ORIGX:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 0 + ; CHECK-DAG: %[[ORIGY:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 1 + ; CHECK-DAG: %[[ORIGZ:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 2 + ; CHECK-DAG: %[[DIRX:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 0 + ; CHECK-DAG: %[[DIRY:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 1 + ; CHECK-DAG: %[[DIRZ:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 2 + + ; Call TraceRayInline + ; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, float %[[ORIGX]], float %[[ORIGY]], float %[[ORIGZ]], float %[[TMIN]], float %[[DIRX]], float %[[DIRY]], float %[[DIRZ]], float %[[TMAX]]) + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery1, %dx.types.Handle %7, i32 1, i32 2, <3 x float> %1, float %2, <3 x float> %3, float %4), !dbg !45 ; line:17 col:3 store float 0.000000e+00, float* %0, !dbg !46 ; line:18 col:3 ret void, !dbg !46 ; line:18 col:3 diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll index 6110b4743e..fa22ee5744 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll @@ -1,47 +1,6 @@ ; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s -; COM: Based on tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl - -; CHECK: %[[HITOBJ:[^ ,]+]] = alloca %dx.types.HitObject, align 4 - -; COM: Init RayDesc. -; CHECK-DAG: store <3 x float> , <3 x float>* %[[ORIGIN_P0:[^ ,]+]], align 4 -; CHECK-DAG: store float 3.000000e+00, float* %[[TMIN_P0:[^ ,]+]], align 4 -; CHECK-DAG: store <3 x float> , <3 x float>* %[[DIRECTION_P0:[^ ,]+]], align 4 -; CHECK-DAG: store float 7.000000e+00, float* %[[TMAX_P0:[^ ,]+]], align 4 - -; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) - -; COM: Copy RayDesc. -; CHECK-DAG: %[[ORIGIN_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P0]] -; CHECK-DAG: store <3 x float> %[[ORIGIN_L0]], <3 x float>* %[[ORIGIN_P1:[^ ,]+]] -; CHECK-DAG: %[[TMIN_L0:[^ ,]+]] = load float, float* %[[TMIN_P0]] -; CHECK-DAG: store float %[[TMIN_L0]], float* %[[TMIN_P1:[^ ,]+]] -; CHECK-DAG: %[[DIRECTION_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] -; CHECK-DAG: store <3 x float> %[[DIRECTION_L0]], <3 x float>* %[[DIRECTION_P1:[^ ,]+]] -; CHECK-DAG: %[[TMAX_L0:[^ ,]+]] = load float, float* %[[TMAX_P0]] -; CHECK-DAG: store float %[[TMAX_L0]], float* %[[TMAX_P1:[^ ,]+]] - -; COM: Load RayDesc. -; CHECK-DAG: %[[ORIGIN_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P1]] -; CHECK-DAG: %[[TMIN_L1:[^ ,]+]] = load float, float* %[[TMIN_P1]] -; CHECK-DAG: %[[DIRECTION_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P1]] -; CHECK-DAG: %[[TMAX_L1:[^ ,]+]] = load float, float* %[[TMAX_P1]] - -; COM: RayDesc is scalar replaced in HL op for dx::HitObject::TraceRay. -; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 389, %dx.types.HitObject* %[[HITOBJ]], %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, <3 x float> %[[ORIGIN_L1]], float %[[TMIN_L1]], <3 x float> %[[DIRECTION_L1]], float %[[TMAX_L1]], %struct.Payload* %[[PLD_P0:[^ ,]+]]) - -; COM: Copy payload. -; CHECK: %[[GEP_PLD_P0:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 0 -; CHECK: %[[PLD_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[GEP_PLD_P0]] -; CHECK: store <3 x float> %[[PLD_L0]], <3 x float>* %[[PLD_M0_P0:[^ ,]+]] -; CHECK: %[[GEP_PLD_P1:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P1:[^ ,]+]], i32 0, i32 0 -; CHECK: [[PLD_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[PLD_M0_P0]] -; CHECK: store <3 x float> [[PLD_L1]], <3 x float>* %[[GEP_PLD_P1]] - -; COM: dx::HitObject::Invoke -; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %[[HITOBJ]], %struct.Payload* %[[PLD_P1]]) - +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" target triple = "dxil-ms-dx" @@ -65,11 +24,20 @@ define void @"\01?main@@YAXXZ"() #0 { entry: %rayDesc = alloca %struct.RayDesc, align 4 %pld = alloca %struct.Payload, align 4 + + ; CHECK: %[[HITOBJ:[^ ,]+]] = alloca %dx.types.HitObject, align 4 + %hit = alloca %dx.types.HitObject, align 4 + %0 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !37 ; line:82 col:3 call void @llvm.lifetime.start(i64 32, i8* %0) #0, !dbg !37 ; line:82 col:3 - ; COM: Init RayDesc. + ; Init RayDesc. + ; CHECK-DAG: store <3 x float> , <3 x float>* %[[ORIGIN_P0:[^ ,]+]], align 4 + ; CHECK-DAG: store float 3.000000e+00, float* %[[TMIN_P0:[^ ,]+]], align 4 + ; CHECK-DAG: store <3 x float> , <3 x float>* %[[DIRECTION_P0:[^ ,]+]], align 4 + ; CHECK-DAG: store float 7.000000e+00, float* %[[TMAX_P0:[^ ,]+]], align 4 + %Origin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 0, !dbg !41 ; line:83 col:11 store <3 x float> , <3 x float>* %Origin, align 4, !dbg !42, !tbaa !43 ; line:83 col:18 %TMin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 1, !dbg !46 ; line:84 col:11 @@ -87,12 +55,43 @@ entry: call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !57 ; line:91 col:3 %3 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !58 ; line:91 col:23 %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %3), !dbg !58 ; line:91 col:23 + + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !58 ; line:91 col:23 - ; COM: dx::HitObject::TraceRay + ; Copy RayDesc. + ; CHECK-DAG: %[[ORIGIN_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P0]] + ; CHECK-DAG: store <3 x float> %[[ORIGIN_L0]], <3 x float>* %[[ORIGIN_P1:[^ ,]+]] + ; CHECK-DAG: %[[TMIN_L0:[^ ,]+]] = load float, float* %[[TMIN_P0]] + ; CHECK-DAG: store float %[[TMIN_L0]], float* %[[TMIN_P1:[^ ,]+]] + ; CHECK-DAG: %[[DIRECTION_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] + ; CHECK-DAG: store <3 x float> %[[DIRECTION_L0]], <3 x float>* %[[DIRECTION_P1:[^ ,]+]] + ; CHECK-DAG: %[[TMAX_L0:[^ ,]+]] = load float, float* %[[TMAX_P0]] + ; CHECK-DAG: store float %[[TMAX_L0]], float* %[[TMAX_P1:[^ ,]+]] + + ; Load RayDesc. + ; CHECK-DAG: %[[ORIGIN_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P1]] + ; CHECK-DAG: %[[TMIN_L1:[^ ,]+]] = load float, float* %[[TMIN_P1]] + ; CHECK-DAG: %[[DIRECTION_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P1]] + ; CHECK-DAG: %[[TMAX_L1:[^ ,]+]] = load float, float* %[[TMAX_P1]] + + ; RayDesc is scalar replaced in HL op for dx::HitObject::TraceRay. + ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 389, %dx.types.HitObject* %[[HITOBJ]], %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, <3 x float> %[[ORIGIN_L1]], float %[[TMIN_L1]], <3 x float> %[[DIRECTION_L1]], float %[[TMAX_L1]], %struct.Payload* %[[PLD_P0:[^ ,]+]]) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 389, %dx.types.HitObject* %hit, %dx.types.Handle %5, i32 513, i32 1, i32 2, i32 4, i32 0, %struct.RayDesc* %rayDesc, %struct.Payload* %pld), !dbg !58 ; line:91 col:23 - ; COM: dx::HitObject::Invoke + ; Copy payload. + ; CHECK: %[[GEP_PLD_P0:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 0 + ; CHECK: %[[PLD_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[GEP_PLD_P0]] + ; CHECK: store <3 x float> %[[PLD_L0]], <3 x float>* %[[PLD_M0_P0:[^ ,]+]] + ; CHECK: %[[GEP_PLD_P1:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P1:[^ ,]+]], i32 0, i32 0 + ; CHECK: [[PLD_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[PLD_M0_P0]] + ; CHECK: store <3 x float> [[PLD_L1]], <3 x float>* %[[GEP_PLD_P1]] + + ; dx::HitObject::Invoke + ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %[[HITOBJ]], %struct.Payload* %[[PLD_P1]]) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %hit, %struct.Payload* %pld), !dbg !59 ; line:101 col:3 %6 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !60 ; line:102 col:1 diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll index 33f90be816..59551a7eb4 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll @@ -1,50 +1,5 @@ ; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s -; CHECK: define <4 x float> @" -; CHECK-SAME: ?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z"(<2 x float>* noalias dereferenceable(8) %f2, %struct.RayDesc* %Ray, %struct.Payload* noalias %p) - -; Copy Payload fields (PLD_F0, PLD_F1) to local allocas: -; CHECK: %[[GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %p, i32 0, i32 0 -; CHECK: %[[LOAD:[^ ,]+]] = load <2 x float>, <2 x float>* %[[GEP]] -; CHECK: store <2 x float> %[[LOAD]], <2 x float>* %[[PLD_F0:[^ ,]+]] -; CHECK: %[[GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %p, i32 0, i32 1 -; CHECK: %[[LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[GEP]] -; CHECK: store <3 x i32> %[[LOAD]], <3 x i32>* %[[PLD_F1:[^ ,]+]] - -; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) - -; Copy RayDesc fields (Origin, TMin, Direction, TMax) to local allocas: -; CHECK: %[[RAY_ORIGIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 0 -; CHECK: %[[RAY_ORIGIN_LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RAY_ORIGIN_GEP]] -; CHECK: store <3 x float> %[[RAY_ORIGIN_LOAD]], <3 x float>* %[[RAY_ORIGIN_P0:[^ ,]+]] -; CHECK: %[[TMIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 1 -; CHECK: %[[TMIN_LOAD:[^ ,]+]] = load float, float* %[[TMIN_GEP]] -; CHECK: store float %[[TMIN_LOAD]], float* %[[TMIN_P0:[^ ,]+]] -; CHECK: %[[DIRECTION_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 2 -; CHECK: %[[DIRECTION_LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_GEP]] -; CHECK: store <3 x float> %[[DIRECTION_LOAD]], <3 x float>* %[[DIRECTION_P0:[^ ,]+]] -; CHECK: %[[TMAX_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 3 -; CHECK: %[[TMAX_LOAD:[^ ,]+]] = load float, float* %[[TMAX_GEP]] -; CHECK: store float %[[TMAX_LOAD]], float* %[[TMAX_P0:[^ ,]+]] - -; COM: Copy Payload fields into payload struct for call: -; CHECK: %[[PLD_F0_GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0:[^ ,]+]], i32 0, i32 0 -; CHECK: %[[PLD_F0_LOAD:[^ ,]+]] = load <2 x float>, <2 x float>* %[[PLD_F0]] -; CHECK: store <2 x float> %[[PLD_F0_LOAD]], <2 x float>* %[[PLD_F0_GEP]] -; CHECK: %[[PLD_F1_GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 1 -; CHECK: %[[PLD_F1_LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[PLD_F1]] -; CHECK: store <3 x i32> %[[PLD_F1_LOAD]], <3 x i32>* %[[PLD_F1_GEP]] - -; COM: Load RayDesc fields: -; CHECK: %[[RAY_ORIGIN_LOAD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RAY_ORIGIN_P0]] -; CHECK: %[[TMIN_LOAD2:[^ ,]+]] = load float, float* %[[TMIN_P0]] -; CHECK: %[[DIRECTION_LOAD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] -; CHECK: %[[TMAX_LOAD2:[^ ,]+]] = load float, float* %[[TMAX_P0]] - -; call TraceRay with the local allocas: -; CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 69, %dx.types.Handle %[[RTAS]], i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, <3 x float> %[[RAY_ORIGIN_LOAD2]], float %[[TMIN_LOAD2]], <3 x float> %[[DIRECTION_LOAD2]], float %[[TMAX_LOAD2]], %struct.Payload* %[[PLD_P0]]) - - target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" target triple = "dxil-ms-dx" @@ -63,9 +18,21 @@ target triple = "dxil-ms-dx" @"\01?MissShaderIndex@@3IB" = external constant i32, align 4 @"$Globals" = external constant %"$Globals" +; CHECK: define <4 x float> @" +; CHECK-SAME: ?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z"(<2 x float>* noalias dereferenceable(8) %f2, %struct.RayDesc* %Ray, %struct.Payload* noalias %p) + ; Function Attrs: nounwind define <4 x float> @"\01?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z"(<2 x float>* noalias dereferenceable(8) %f2, %struct.RayDesc* %Ray, %struct.Payload* noalias %p) #0 { entry: + + ; Copy Payload fields (PLD_F0, PLD_F1) to local allocas: + ; CHECK: %[[GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %p, i32 0, i32 0 + ; CHECK: %[[LOAD:[^ ,]+]] = load <2 x float>, <2 x float>* %[[GEP]] + ; CHECK: store <2 x float> %[[LOAD]], <2 x float>* %[[PLD_F0:[^ ,]+]] + ; CHECK: %[[GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %p, i32 0, i32 1 + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[GEP]] + ; CHECK: store <3 x i32> %[[LOAD]], <3 x i32>* %[[PLD_F1:[^ ,]+]] + %0 = alloca %struct.RayDesc, !dbg !39 ; line:22 col:61 %1 = bitcast %struct.RayDesc* %0 to i8*, !dbg !39 ; line:22 col:61 %2 = bitcast %struct.RayDesc* %Ray to i8*, !dbg !39 ; line:22 col:61 @@ -85,8 +52,43 @@ entry: %15 = load i32, i32* %6, align 4, !dbg !50, !tbaa !43 ; line:20 col:16 %16 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?Acc@@3URaytracingAccelerationStructure@@A", !dbg !51 ; line:20 col:3 %17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %16), !dbg !51 ; line:20 col:3 + + ; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) %18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %17, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !51 ; line:20 col:3 + + ; Copy RayDesc fields (Origin, TMin, Direction, TMax) to local allocas: + ; CHECK: %[[RAY_ORIGIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 0 + ; CHECK: %[[RAY_ORIGIN_LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RAY_ORIGIN_GEP]] + ; CHECK: store <3 x float> %[[RAY_ORIGIN_LOAD]], <3 x float>* %[[RAY_ORIGIN_P0:[^ ,]+]] + ; CHECK: %[[TMIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 1 + ; CHECK: %[[TMIN_LOAD:[^ ,]+]] = load float, float* %[[TMIN_GEP]] + ; CHECK: store float %[[TMIN_LOAD]], float* %[[TMIN_P0:[^ ,]+]] + ; CHECK: %[[DIRECTION_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 2 + ; CHECK: %[[DIRECTION_LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_GEP]] + ; CHECK: store <3 x float> %[[DIRECTION_LOAD]], <3 x float>* %[[DIRECTION_P0:[^ ,]+]] + ; CHECK: %[[TMAX_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 3 + ; CHECK: %[[TMAX_LOAD:[^ ,]+]] = load float, float* %[[TMAX_GEP]] + ; CHECK: store float %[[TMAX_LOAD]], float* %[[TMAX_P0:[^ ,]+]] + + ; Copy Payload fields into payload struct for call: + ; CHECK: %[[PLD_F0_GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0:[^ ,]+]], i32 0, i32 0 + ; CHECK: %[[PLD_F0_LOAD:[^ ,]+]] = load <2 x float>, <2 x float>* %[[PLD_F0]] + ; CHECK: store <2 x float> %[[PLD_F0_LOAD]], <2 x float>* %[[PLD_F0_GEP]] + ; CHECK: %[[PLD_F1_GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 1 + ; CHECK: %[[PLD_F1_LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[PLD_F1]] + ; CHECK: store <3 x i32> %[[PLD_F1_LOAD]], <3 x i32>* %[[PLD_F1_GEP]] + + ; Load RayDesc fields: + ; CHECK: %[[RAY_ORIGIN_LOAD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RAY_ORIGIN_P0]] + ; CHECK: %[[TMIN_LOAD2:[^ ,]+]] = load float, float* %[[TMIN_P0]] + ; CHECK: %[[DIRECTION_LOAD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] + ; CHECK: %[[TMAX_LOAD2:[^ ,]+]] = load float, float* %[[TMAX_P0]] + + ; call TraceRay with the local allocas: + ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 69, %dx.types.Handle %[[RTAS]], i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, <3 x float> %[[RAY_ORIGIN_LOAD2]], float %[[TMIN_LOAD2]], <3 x float> %[[DIRECTION_LOAD2]], float %[[TMAX_LOAD2]], %struct.Payload* %[[PLD_P0]]) + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 69, %dx.types.Handle %18, i32 %15, i32 %14, i32 %13, i32 %12, i32 %11, %struct.RayDesc* %0, %struct.Payload* %p), !dbg !51 ; line:20 col:3 + ret <4 x float> , !dbg !52 ; line:24 col:4 } diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll index 404eee8e77..c01ec797bb 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll @@ -2,39 +2,6 @@ ; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl -; Capture CB, RayDesc ptr from CB, RTAS, and init RayQuery -; CHECK-DAG: %[[CB_H:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) -; CHECK-DAG: %[[CB_PTR:[^ ,]+]] = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %[[CB_H]], i32 0) -; CHECK-DAG: %[[RAYDESC_PTR:[^ ,]+]] = getelementptr inbounds %"$Globals", %"$Globals"* %[[CB_PTR]], i32 0, i32 0 -; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) -; CHECK-DAG: %[[RQ0:[^ ,]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) -; CHECK-DAG: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] - -; Load RayDesc fields from CB to local copy -; CHECK-DAG: %[[ORIG_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 0 -; CHECK-DAG: %[[ORIG_LD_CB:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIG_CBP]] -; CHECK-DAG: store <3 x float> %[[ORIG_LD_CB]], <3 x float>* %[[ORIG_P0:[^ ,]+]] -; CHECK-DAG: %[[TMIN_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 1 -; CHECK-DAG: %[[TMIN_LD_CB:[^ ,]+]] = load float, float* %[[TMIN_CBP]] -; CHECK-DAG: store float %[[TMIN_LD_CB]], float* %[[TMIN_P0:[^ ,]+]] -; CHECK-DAG: %[[DIR_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 2 -; CHECK-DAG: %[[DIR_LD_CB:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIR_CBP]] -; CHECK-DAG: store <3 x float> %[[DIR_LD_CB]], <3 x float>* %[[DIR_P0:[^ ,]+]] -; CHECK-DAG: %[[TMAX_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 3 -; CHECK-DAG: %[[TMAX_LD_CB:[^ ,]+]] = load float, float* %[[TMAX_CBP]] -; CHECK-DAG: store float %[[TMAX_LD_CB]], float* %[[TMAX_P0:[^ ,]+]] - -; Load RayDesc fields from local copy -; CHECK-DAG: %[[ORIG:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIG_P0]] -; CHECK-DAG: %[[TMIN:[^ ,]+]] = load float, float* %[[TMIN_P0]] -; CHECK-DAG: %[[DIR:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIR_P0]] -; CHECK-DAG: %[[TMAX:[^ ,]+]] = load float, float* %[[TMAX_P0]] -; CHECK-DAG: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] - -; Call TraceRayInline -; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, <3 x float> %[[ORIG]], float %[[TMIN]], <3 x float> %[[DIR]], float %[[TMAX]]) - - target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" target triple = "dxil-ms-dx" @@ -53,16 +20,59 @@ target triple = "dxil-ms-dx" define void @main() #0 { entry: %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) + + ; Capture CB, RayDesc ptr from CB, RTAS, and init RayQuery + ; CHECK-DAG: %[[CB_H:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + + ; CHECK-DAG: %[[CB_PTR:[^ ,]+]] = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %[[CB_H]], i32 0) + %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) + + ; CHECK-DAG: %[[RAYDESC_PTR:[^ ,]+]] = getelementptr inbounds %"$Globals", %"$Globals"* %[[CB_PTR]], i32 0, i32 0 + %3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0 + + ; CHECK-DAG: %[[RQ0:[^ ,]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) + ; CHECK-DAG: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] + %rayQuery = alloca %"class.RayQuery<513, 0>", align 4 %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !34 ; line:12 col:71 %4 = getelementptr inbounds %"class.RayQuery<513, 0>", %"class.RayQuery<513, 0>"* %rayQuery, i32 0, i32 0, !dbg !34 ; line:12 col:71 store i32 %rayQuery1, i32* %4, !dbg !34 ; line:12 col:71 + %5 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !38 ; line:13 col:3 %6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %5), !dbg !38 ; line:13 col:3 + + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + %7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !38 ; line:13 col:3 + + ; Load RayDesc fields from CB to local copy + ; CHECK-DAG: %[[ORIG_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 0 + ; CHECK-DAG: %[[ORIG_LD_CB:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIG_CBP]] + ; CHECK-DAG: store <3 x float> %[[ORIG_LD_CB]], <3 x float>* %[[ORIG_P0:[^ ,]+]] + ; CHECK-DAG: %[[TMIN_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 1 + ; CHECK-DAG: %[[TMIN_LD_CB:[^ ,]+]] = load float, float* %[[TMIN_CBP]] + ; CHECK-DAG: store float %[[TMIN_LD_CB]], float* %[[TMIN_P0:[^ ,]+]] + ; CHECK-DAG: %[[DIR_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 2 + ; CHECK-DAG: %[[DIR_LD_CB:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIR_CBP]] + ; CHECK-DAG: store <3 x float> %[[DIR_LD_CB]], <3 x float>* %[[DIR_P0:[^ ,]+]] + ; CHECK-DAG: %[[TMAX_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 3 + ; CHECK-DAG: %[[TMAX_LD_CB:[^ ,]+]] = load float, float* %[[TMAX_CBP]] + ; CHECK-DAG: store float %[[TMAX_LD_CB]], float* %[[TMAX_P0:[^ ,]+]] + + ; Load RayDesc fields from local copy + ; CHECK-DAG: %[[ORIG:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIG_P0]] + ; CHECK-DAG: %[[TMIN:[^ ,]+]] = load float, float* %[[TMIN_P0]] + ; CHECK-DAG: %[[DIR:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIR_P0]] + ; CHECK-DAG: %[[TMAX:[^ ,]+]] = load float, float* %[[TMAX_P0]] + ; CHECK-DAG: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] + + ; Call TraceRayInline + ; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, <3 x float> %[[ORIG]], float %[[TMIN]], <3 x float> %[[DIR]], float %[[TMAX]]) + call void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<513, 0>"* %rayQuery, %dx.types.Handle %7, i32 1, i32 2, %struct.RayDesc* %3), !dbg !38 ; line:13 col:3 ret void, !dbg !39 ; line:14 col:1 } diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll index 7fb918c299..ee76872441 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll @@ -3,52 +3,6 @@ ; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl, ; with call to DoTrace commented out. -; CHECK: define void @main(float* noalias, <3 x float>, float, <3 x float>, float) - -; Copy flattened RayDesc input to main function -; RayDesc fields: %1: Origin, %2: TMin, %3: Direction, %4: TMax -; CHECK: store float %4, float* %[[RD3_P0:[^ ,]+]] -; CHECK: store <3 x float> %3, <3 x float>* %[[RD2_P0:[^ ,]+]] -; CHECK: store float %2, float* %[[RD1_P0:[^ ,]+]] -; CHECK: store <3 x float> %1, <3 x float>* %[[RD0_P0:[^ ,]+]] - -; Copy RayDesc fields again -; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P0]] -; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P1:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P0]] -; CHECK: store float %[[LOAD]], float* %[[RD1_P1:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P0]] -; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P1:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P0]] -; CHECK: store float %[[LOAD]], float* %[[RD3_P1:[^ ,]+]] - -; Capture RayQuery ptr and RTAS handle -; CHECK: %[[RQ0:[^ ]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) -; CHECK: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] -; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) - -; Copy RayDesc fields again -; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P1]] -; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P2:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P1]] -; CHECK: store float %[[LOAD]], float* %[[RD1_P2:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P1]] -; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P2:[^ ,]+]] -; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P1]] -; CHECK: store float %[[LOAD]], float* %[[RD3_P2:[^ ,]+]] - -; Load RayDesc fields for TraceRayInline -; CHECK: %[[RD0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P2]] -; CHECK: %[[RD1:[^ ,]+]] = load float, float* %[[RD1_P2]] -; CHECK: %[[RD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P2]] -; CHECK: %[[RD3:[^ ,]+]] = load float, float* %[[RD3_P2]] - -; Load RayQuery -; CHECK: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] - -; TraceRayInline call -; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, <3 x float> %[[RD0]], float %[[RD1]], <3 x float> %[[RD2]], float %[[RD3]]) - target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" target triple = "dxil-ms-dx" @@ -62,13 +16,39 @@ target triple = "dxil-ms-dx" @"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 @"$Globals" = external constant %ConstantBuffer +; CHECK: define void @main(float* noalias, <3 x float>, float, <3 x float>, float) + ; Function Attrs: nounwind define float @main(%struct.RayDesc* %rayDesc) #0 { entry: %0 = alloca %struct.RayDesc + + ; Copy flattened RayDesc input to main function + ; RayDesc fields: %1: Origin, %2: TMin, %3: Direction, %4: TMax + ; CHECK: store float %4, float* %[[RD3_P0:[^ ,]+]] + ; CHECK: store <3 x float> %3, <3 x float>* %[[RD2_P0:[^ ,]+]] + ; CHECK: store float %2, float* %[[RD1_P0:[^ ,]+]] + ; CHECK: store <3 x float> %1, <3 x float>* %[[RD0_P0:[^ ,]+]] + + ; Copy RayDesc fields again + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P0]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P1:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P0]] + ; CHECK: store float %[[LOAD]], float* %[[RD1_P1:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P0]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P1:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P0]] + ; CHECK: store float %[[LOAD]], float* %[[RD3_P1:[^ ,]+]] + %1 = bitcast %struct.RayDesc* %0 to i8* %2 = bitcast %struct.RayDesc* %rayDesc to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 32, i32 1, i1 false) + + ; Capture RayQuery ptr and RTAS handle + ; CHECK: %[[RQ0:[^ ]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) + ; CHECK: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] + ; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + %rayQuery = alloca %"class.RayQuery<513, 0>", align 4 %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !35 ; line:15 col:71 %3 = getelementptr inbounds %"class.RayQuery<513, 0>", %"class.RayQuery<513, 0>"* %rayQuery, i32 0, i32 0, !dbg !35 ; line:15 col:71 @@ -76,6 +56,29 @@ entry: %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !39 ; line:17 col:3 %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !39 ; line:17 col:3 %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !39 ; line:17 col:3 + + ; Copy RayDesc fields again + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P1]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P2:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P1]] + ; CHECK: store float %[[LOAD]], float* %[[RD1_P2:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P1]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P2:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P1]] + ; CHECK: store float %[[LOAD]], float* %[[RD3_P2:[^ ,]+]] + + ; Load RayDesc fields for TraceRayInline + ; CHECK: %[[RD0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P2]] + ; CHECK: %[[RD1:[^ ,]+]] = load float, float* %[[RD1_P2]] + ; CHECK: %[[RD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P2]] + ; CHECK: %[[RD3:[^ ,]+]] = load float, float* %[[RD3_P2]] + + ; Load RayQuery + ; CHECK: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] + + ; TraceRayInline call + ; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, <3 x float> %[[RD0]], float %[[RD1]], <3 x float> %[[RD2]], float %[[RD3]]) + call void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<513, 0>"* %rayQuery, %dx.types.Handle %6, i32 1, i32 2, %struct.RayDesc* %0), !dbg !39 ; line:17 col:3 ret float 0.000000e+00, !dbg !40 ; line:18 col:3 }