Skip to content

Commit 27f0e73

Browse files
author
Greg Roth
committed
Vector to scalar raw buf load lowering pass
Native vector loads and stores are generated for 6.9 targets and above. This includes the 6.x target used when compiling to libraries. This adds a pass run when linking that will lower the vector operations to scalar operations for shader models that don't have native vector support. This allows libraries compiled for supportive shader models to be linked to targets without support.
1 parent 0c95d20 commit 27f0e73

6 files changed

Lines changed: 731 additions & 0 deletions

File tree

include/dxc/HLSL/DxilGenerationPass.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ ModulePass *createResumePassesPass();
8181
FunctionPass *createMatrixBitcastLowerPass();
8282
ModulePass *createDxilCleanupAddrSpaceCastPass();
8383
ModulePass *createDxilRenameResourcesPass();
84+
ModulePass *createDxilScalarizeVectorLoadStoresPass();
8485

8586
void initializeDxilLowerCreateHandleForLibPass(llvm::PassRegistry &);
8687
void initializeDxilAllocateResourcesForLibPass(llvm::PassRegistry &);
@@ -115,6 +116,7 @@ void initializeResumePassesPass(llvm::PassRegistry &);
115116
void initializeMatrixBitcastLowerPassPass(llvm::PassRegistry &);
116117
void initializeDxilCleanupAddrSpaceCastPass(llvm::PassRegistry &);
117118
void initializeDxilRenameResourcesPass(llvm::PassRegistry &);
119+
void initializeDxilScalarizeVectorLoadStoresPass(llvm::PassRegistry &);
118120

119121
ModulePass *createDxilValidateWaveSensitivityPass();
120122
void initializeDxilValidateWaveSensitivityPass(llvm::PassRegistry &);

lib/HLSL/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ add_llvm_library(LLVMHLSL
2525
DxilNoops.cpp
2626
DxilPreserveAllOutputs.cpp
2727
DxilRenameResourcesPass.cpp
28+
DxilScalarizeVectorLoadStores.cpp
2829
DxilSimpleGVNHoist.cpp
2930
DxilSignatureValidation.cpp
3031
DxilTargetLowering.cpp

lib/HLSL/DxilLinker.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,10 @@ void DxilLinkJob::RunPreparePass(Module &M) {
12471247
PM.add(createDxilReinsertNopsPass());
12481248
PM.add(createAlwaysInlinerPass(/*InsertLifeTime*/ false));
12491249

1250+
// If we need SROA and dynamicindexvector to array,
1251+
// do it early to allow following scalarization to go forward.
1252+
PM.add(createDxilScalarizeVectorLoadStoresPass());
1253+
12501254
// Remove unused functions.
12511255
PM.add(createDxilDeadFunctionEliminationPass());
12521256

@@ -1272,6 +1276,7 @@ void DxilLinkJob::RunPreparePass(Module &M) {
12721276

12731277
// Clean up vectors, and run mem2reg again
12741278
PM.add(createScalarizerPass());
1279+
12751280
PM.add(createPromoteMemoryToRegisterPass());
12761281

12771282
PM.add(createSimplifyInstPass());
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
///////////////////////////////////////////////////////////////////////////////
2+
// //
3+
// DxilScalarizeVectorLoadStores.cpp //
4+
// Copyright (C) Microsoft Corporation. All rights reserved. //
5+
// This file is distributed under the University of Illinois Open Source //
6+
// License. See LICENSE.TXT for details. //
7+
// //
8+
// Lowers native vector load stores to potentially multiple scalar calls. //
9+
// //
10+
///////////////////////////////////////////////////////////////////////////////
11+
12+
#include "dxc/DXIL/DxilInstructions.h"
13+
#include "dxc/DXIL/DxilModule.h"
14+
#include "dxc/HLSL/DxilGenerationPass.h"
15+
16+
#include "llvm/ADT/StringRef.h"
17+
#include "llvm/IR/Function.h"
18+
#include "llvm/IR/IRBuilder.h"
19+
#include "llvm/IR/Instructions.h"
20+
#include "llvm/IR/Module.h"
21+
#include "llvm/IR/PassManager.h"
22+
#include "llvm/Pass.h"
23+
24+
using namespace llvm;
25+
using namespace hlsl;
26+
27+
class DxilScalarizeVectorLoadStores : public ModulePass {
28+
private:
29+
DxilModule *m_DM;
30+
31+
void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL,
32+
CallInst *CI);
33+
void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL,
34+
CallInst *CI);
35+
36+
public:
37+
static char ID; // Pass identification, replacement for typeid
38+
explicit DxilScalarizeVectorLoadStores() : ModulePass(ID) {}
39+
40+
StringRef getPassName() const override {
41+
return "DXIL scalarize vector load/stores";
42+
}
43+
44+
bool runOnModule(Module &M) override {
45+
DxilModule &DM = M.GetOrCreateDxilModule();
46+
m_DM = &DM;
47+
48+
// Shader Model 6.9 allows native vectors and doesn't need this pass.
49+
if (DM.GetShaderModel()->IsSM69Plus())
50+
return false;
51+
52+
bool Changed = false;
53+
54+
hlsl::OP *HlslOP = DM.GetOP();
55+
for (auto F = M.functions().begin(), E = M.functions().end(); F != E;) {
56+
Function *Func = &*(F++);
57+
DXIL::OpCodeClass OpClass;
58+
if (HlslOP->GetOpCodeClass(Func, OpClass)) {
59+
if (OpClass == DXIL::OpCodeClass::RawBufferVectorLoad) {
60+
for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) {
61+
CallInst *CI = cast<CallInst>(*(U++));
62+
scalarizeVectorLoad(HlslOP, M.getDataLayout(), CI);
63+
Changed = true;
64+
}
65+
Func->eraseFromParent();
66+
} else if (OpClass == DXIL::OpCodeClass::RawBufferVectorStore) {
67+
for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) {
68+
CallInst *CI = cast<CallInst>(*(U++));
69+
scalarizeVectorStore(HlslOP, M.getDataLayout(), CI);
70+
Changed = true;
71+
}
72+
Func->eraseFromParent();
73+
}
74+
}
75+
}
76+
return Changed;
77+
}
78+
};
79+
80+
static unsigned GetRawBufferMask(unsigned NumComponents) {
81+
switch (NumComponents) {
82+
case 0:
83+
return 0;
84+
case 1:
85+
return DXIL::kCompMask_X;
86+
case 2:
87+
return DXIL::kCompMask_X | DXIL::kCompMask_Y;
88+
case 3:
89+
return DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
90+
case 4:
91+
default:
92+
return DXIL::kCompMask_All;
93+
}
94+
return DXIL::kCompMask_All;
95+
}
96+
97+
void DxilScalarizeVectorLoadStores::scalarizeVectorLoad(hlsl::OP *HlslOP,
98+
const DataLayout &DL,
99+
CallInst *CI) {
100+
IRBuilder<> Builder(CI);
101+
// Collect the information required to break this into scalar ops from args.
102+
DxilInst_RawBufferVectorLoad VecLd(CI);
103+
OP::OpCode OpCode = OP::OpCode::RawBufferLoad;
104+
llvm::Constant *opArg = Builder.getInt32((unsigned)OpCode);
105+
SmallVector<Value *, 10> Args;
106+
Args.emplace_back(opArg); // opcode @0.
107+
Args.emplace_back(VecLd.get_buf()); // Resource handle @1.
108+
Args.emplace_back(VecLd.get_index()); // Index @2.
109+
Args.emplace_back(VecLd.get_elementOffset()); // Offset @3.
110+
Args.emplace_back(nullptr); // Mask to be set later @4.
111+
Args.emplace_back(VecLd.get_alignment()); // Alignment @5.
112+
113+
// Set offset to increment depending on whether the real offset is defined.
114+
unsigned OffsetIdx = 0;
115+
if (isa<UndefValue>(VecLd.get_elementOffset()))
116+
// Byte Address Buffers can't use offset, so use index.
117+
OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
118+
else
119+
OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
120+
121+
StructType *ResRetTy = cast<StructType>(CI->getType());
122+
Type *Ty = ResRetTy->getElementType(0);
123+
unsigned NumComponents = Ty->getVectorNumElements();
124+
Type *EltTy = Ty->getScalarType();
125+
unsigned EltSize = DL.getTypeAllocSize(EltTy);
126+
127+
const unsigned MaxElemCount = 4;
128+
SmallVector<Value *, 4> Elts(NumComponents);
129+
Value *Ld = nullptr;
130+
for (unsigned EIx = 0; EIx < NumComponents;) {
131+
// Load 4 elements or however many less than 4 are left to load.
132+
unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount);
133+
Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
134+
HlslOP->GetI8Const(GetRawBufferMask(ChunkSize));
135+
// If we've loaded a chunk already, update offset to next chunk.
136+
if (EIx > 0)
137+
Args[OffsetIdx] =
138+
Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize));
139+
Function *F = HlslOP->GetOpFunc(OpCode, EltTy);
140+
Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(OpCode));
141+
for (unsigned ChIx = 0; ChIx < ChunkSize; ChIx++, EIx++)
142+
Elts[EIx] = Builder.CreateExtractValue(Ld, ChIx);
143+
}
144+
145+
Value *RetValNew = UndefValue::get(VectorType::get(EltTy, NumComponents));
146+
for (unsigned ElIx = 0; ElIx < NumComponents; ElIx++)
147+
RetValNew = Builder.CreateInsertElement(RetValNew, Elts[ElIx], ElIx);
148+
149+
// Replace users of the vector extracted from the vector load resret.
150+
Value *Status = nullptr;
151+
for (auto CU = CI->user_begin(), CE = CI->user_end(); CU != CE;) {
152+
auto EV = cast<ExtractValueInst>(*(CU++));
153+
unsigned Ix = EV->getIndices()[0];
154+
if (Ix == 0) {
155+
// Handle value uses.
156+
EV->replaceAllUsesWith(RetValNew);
157+
} else if (Ix == 1) {
158+
// Handle status uses.
159+
if (!Status)
160+
Status = Builder.CreateExtractValue(Ld, DXIL::kResRetStatusIndex);
161+
EV->replaceAllUsesWith(Status);
162+
}
163+
EV->eraseFromParent();
164+
}
165+
CI->eraseFromParent();
166+
}
167+
168+
void DxilScalarizeVectorLoadStores::scalarizeVectorStore(hlsl::OP *HlslOP,
169+
const DataLayout &DL,
170+
CallInst *CI) {
171+
IRBuilder<> Builder(CI);
172+
// Collect the information required to break this into scalar ops from args.
173+
DxilInst_RawBufferVectorStore VecSt(CI);
174+
OP::OpCode OpCode = OP::OpCode::RawBufferStore;
175+
llvm::Constant *opArg = Builder.getInt32((unsigned)OpCode);
176+
SmallVector<Value *, 10> Args;
177+
Args.emplace_back(opArg); // opcode @0.
178+
Args.emplace_back(VecSt.get_uav()); // Resource handle @1.
179+
Args.emplace_back(VecSt.get_index()); // Index @2.
180+
Args.emplace_back(VecSt.get_elementOffset()); // Offset @3.
181+
Args.emplace_back(nullptr); // Val0 to be set later @4.
182+
Args.emplace_back(nullptr); // Val1 to be set later @5.
183+
Args.emplace_back(nullptr); // Val2 to be set later @6.
184+
Args.emplace_back(nullptr); // Val3 to be set later @7.
185+
Args.emplace_back(nullptr); // Mask to be set later @8.
186+
Args.emplace_back(VecSt.get_alignment()); // Alignment @9.
187+
188+
// Set offset to increment depending on whether the real offset is defined.
189+
unsigned OffsetIdx = 0;
190+
if (isa<UndefValue>(VecSt.get_elementOffset()))
191+
// Byte Address Buffers can't use offset, so use index.
192+
OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
193+
else
194+
OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
195+
196+
Value *VecVal = VecSt.get_value0();
197+
198+
const unsigned MaxElemCount = 4;
199+
Type *Ty = VecVal->getType();
200+
const unsigned NumComponents = Ty->getVectorNumElements();
201+
Type *EltTy = Ty->getScalarType();
202+
Value *UndefVal = UndefValue::get(EltTy);
203+
unsigned EltSize = DL.getTypeAllocSize(EltTy);
204+
Function *F = HlslOP->GetOpFunc(OpCode, EltTy);
205+
for (unsigned EIx = 0; EIx < NumComponents;) {
206+
// Store 4 elements or however many less than 4 are left to store.
207+
unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount);
208+
// For second and subsequent store calls, increment the resource-appropriate
209+
// index or offset parameter.
210+
if (EIx > 0)
211+
Args[OffsetIdx] =
212+
Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize));
213+
// Populate all value arguments either with the vector or undefs.
214+
uint8_t Mask = 0;
215+
unsigned ChIx = 0;
216+
for (; ChIx < ChunkSize; ChIx++, EIx++) {
217+
Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] =
218+
Builder.CreateExtractElement(VecVal, EIx);
219+
Mask |= (1 << ChIx);
220+
}
221+
for (; ChIx < MaxElemCount; ChIx++)
222+
Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = UndefVal;
223+
224+
Args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
225+
HlslOP->GetU8Const(Mask);
226+
Builder.CreateCall(F, Args);
227+
}
228+
CI->eraseFromParent();
229+
}
230+
231+
char DxilScalarizeVectorLoadStores::ID = 0;
232+
233+
ModulePass *llvm::createDxilScalarizeVectorLoadStoresPass() {
234+
return new DxilScalarizeVectorLoadStores();
235+
}
236+
237+
INITIALIZE_PASS(DxilScalarizeVectorLoadStores,
238+
"hlsl-dxil-scalarize-vector-load-stores",
239+
"DXIL scalarize vector load/stores", false, false)

0 commit comments

Comments
 (0)