|
| 1 | +/////////////////////////////////////////////////////////////////////////////// |
| 2 | +// // |
| 3 | +// DxilScalarizeVectorLoadStores.cpp // |
| 4 | +// Copyright (C) Microsoft Corporation. All rights reserved. // |
| 5 | +// This file is distributed under the University of Illinois Open Source // |
| 6 | +// License. See LICENSE.TXT for details. // |
| 7 | +// // |
| 8 | +// Lowers native vector load stores to potentially multiple scalar calls. // |
| 9 | +// // |
| 10 | +/////////////////////////////////////////////////////////////////////////////// |
| 11 | + |
| 12 | +#include "dxc/DXIL/DxilInstructions.h" |
| 13 | +#include "dxc/DXIL/DxilModule.h" |
| 14 | +#include "dxc/HLSL/DxilGenerationPass.h" |
| 15 | + |
| 16 | +#include "llvm/ADT/StringRef.h" |
| 17 | +#include "llvm/IR/Function.h" |
| 18 | +#include "llvm/IR/IRBuilder.h" |
| 19 | +#include "llvm/IR/Instructions.h" |
| 20 | +#include "llvm/IR/Module.h" |
| 21 | +#include "llvm/IR/PassManager.h" |
| 22 | +#include "llvm/Pass.h" |
| 23 | + |
| 24 | +using namespace llvm; |
| 25 | +using namespace hlsl; |
| 26 | + |
| 27 | +class DxilScalarizeVectorLoadStores : public ModulePass { |
| 28 | +private: |
| 29 | + DxilModule *m_DM; |
| 30 | + |
| 31 | + void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL, |
| 32 | + CallInst *CI); |
| 33 | + void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL, |
| 34 | + CallInst *CI); |
| 35 | + |
| 36 | +public: |
| 37 | + static char ID; // Pass identification, replacement for typeid |
| 38 | + explicit DxilScalarizeVectorLoadStores() : ModulePass(ID) {} |
| 39 | + |
| 40 | + StringRef getPassName() const override { |
| 41 | + return "DXIL scalarize vector load/stores"; |
| 42 | + } |
| 43 | + |
| 44 | + bool runOnModule(Module &M) override { |
| 45 | + DxilModule &DM = M.GetOrCreateDxilModule(); |
| 46 | + m_DM = &DM; |
| 47 | + |
| 48 | + // Shader Model 6.9 allows native vectors and doesn't need this pass. |
| 49 | + if (DM.GetShaderModel()->IsSM69Plus()) |
| 50 | + return false; |
| 51 | + |
| 52 | + bool Changed = false; |
| 53 | + |
| 54 | + hlsl::OP *HlslOP = DM.GetOP(); |
| 55 | + for (auto F = M.functions().begin(), E = M.functions().end(); F != E;) { |
| 56 | + Function *Func = &*(F++); |
| 57 | + DXIL::OpCodeClass OpClass; |
| 58 | + if (HlslOP->GetOpCodeClass(Func, OpClass)) { |
| 59 | + if (OpClass == DXIL::OpCodeClass::RawBufferVectorLoad) { |
| 60 | + for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) { |
| 61 | + CallInst *CI = cast<CallInst>(*(U++)); |
| 62 | + scalarizeVectorLoad(HlslOP, M.getDataLayout(), CI); |
| 63 | + Changed = true; |
| 64 | + } |
| 65 | + Func->eraseFromParent(); |
| 66 | + } else if (OpClass == DXIL::OpCodeClass::RawBufferVectorStore) { |
| 67 | + for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) { |
| 68 | + CallInst *CI = cast<CallInst>(*(U++)); |
| 69 | + scalarizeVectorStore(HlslOP, M.getDataLayout(), CI); |
| 70 | + Changed = true; |
| 71 | + } |
| 72 | + Func->eraseFromParent(); |
| 73 | + } |
| 74 | + } |
| 75 | + } |
| 76 | + return Changed; |
| 77 | + } |
| 78 | +}; |
| 79 | + |
| 80 | +static unsigned GetRawBufferMask(unsigned NumComponents) { |
| 81 | + switch (NumComponents) { |
| 82 | + case 0: |
| 83 | + return 0; |
| 84 | + case 1: |
| 85 | + return DXIL::kCompMask_X; |
| 86 | + case 2: |
| 87 | + return DXIL::kCompMask_X | DXIL::kCompMask_Y; |
| 88 | + case 3: |
| 89 | + return DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z; |
| 90 | + case 4: |
| 91 | + default: |
| 92 | + return DXIL::kCompMask_All; |
| 93 | + } |
| 94 | + return DXIL::kCompMask_All; |
| 95 | +} |
| 96 | + |
| 97 | +void DxilScalarizeVectorLoadStores::scalarizeVectorLoad(hlsl::OP *HlslOP, |
| 98 | + const DataLayout &DL, |
| 99 | + CallInst *CI) { |
| 100 | + IRBuilder<> Builder(CI); |
| 101 | + // Collect the information required to break this into scalar ops from args. |
| 102 | + DxilInst_RawBufferVectorLoad VecLd(CI); |
| 103 | + OP::OpCode OpCode = OP::OpCode::RawBufferLoad; |
| 104 | + llvm::Constant *opArg = Builder.getInt32((unsigned)OpCode); |
| 105 | + SmallVector<Value *, 10> Args; |
| 106 | + Args.emplace_back(opArg); // opcode @0. |
| 107 | + Args.emplace_back(VecLd.get_buf()); // Resource handle @1. |
| 108 | + Args.emplace_back(VecLd.get_index()); // Index @2. |
| 109 | + Args.emplace_back(VecLd.get_elementOffset()); // Offset @3. |
| 110 | + Args.emplace_back(nullptr); // Mask to be set later @4. |
| 111 | + Args.emplace_back(VecLd.get_alignment()); // Alignment @5. |
| 112 | + |
| 113 | + // Set offset to increment depending on whether the real offset is defined. |
| 114 | + unsigned OffsetIdx = 0; |
| 115 | + if (isa<UndefValue>(VecLd.get_elementOffset())) |
| 116 | + // Byte Address Buffers can't use offset, so use index. |
| 117 | + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; |
| 118 | + else |
| 119 | + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; |
| 120 | + |
| 121 | + StructType *ResRetTy = cast<StructType>(CI->getType()); |
| 122 | + Type *Ty = ResRetTy->getElementType(0); |
| 123 | + unsigned NumComponents = Ty->getVectorNumElements(); |
| 124 | + Type *EltTy = Ty->getScalarType(); |
| 125 | + unsigned EltSize = DL.getTypeAllocSize(EltTy); |
| 126 | + |
| 127 | + const unsigned MaxElemCount = 4; |
| 128 | + SmallVector<Value *, 4> Elts(NumComponents); |
| 129 | + Value *Ld = nullptr; |
| 130 | + for (unsigned EIx = 0; EIx < NumComponents;) { |
| 131 | + // Load 4 elements or however many less than 4 are left to load. |
| 132 | + unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount); |
| 133 | + Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] = |
| 134 | + HlslOP->GetI8Const(GetRawBufferMask(ChunkSize)); |
| 135 | + // If we've loaded a chunk already, update offset to next chunk. |
| 136 | + if (EIx > 0) |
| 137 | + Args[OffsetIdx] = |
| 138 | + Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize)); |
| 139 | + Function *F = HlslOP->GetOpFunc(OpCode, EltTy); |
| 140 | + Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(OpCode)); |
| 141 | + for (unsigned ChIx = 0; ChIx < ChunkSize; ChIx++, EIx++) |
| 142 | + Elts[EIx] = Builder.CreateExtractValue(Ld, ChIx); |
| 143 | + } |
| 144 | + |
| 145 | + Value *RetValNew = UndefValue::get(VectorType::get(EltTy, NumComponents)); |
| 146 | + for (unsigned ElIx = 0; ElIx < NumComponents; ElIx++) |
| 147 | + RetValNew = Builder.CreateInsertElement(RetValNew, Elts[ElIx], ElIx); |
| 148 | + |
| 149 | + // Replace users of the vector extracted from the vector load resret. |
| 150 | + Value *Status = nullptr; |
| 151 | + for (auto CU = CI->user_begin(), CE = CI->user_end(); CU != CE;) { |
| 152 | + auto EV = cast<ExtractValueInst>(*(CU++)); |
| 153 | + unsigned Ix = EV->getIndices()[0]; |
| 154 | + if (Ix == 0) { |
| 155 | + // Handle value uses. |
| 156 | + EV->replaceAllUsesWith(RetValNew); |
| 157 | + } else if (Ix == 1) { |
| 158 | + // Handle status uses. |
| 159 | + if (!Status) |
| 160 | + Status = Builder.CreateExtractValue(Ld, DXIL::kResRetStatusIndex); |
| 161 | + EV->replaceAllUsesWith(Status); |
| 162 | + } |
| 163 | + EV->eraseFromParent(); |
| 164 | + } |
| 165 | + CI->eraseFromParent(); |
| 166 | +} |
| 167 | + |
| 168 | +void DxilScalarizeVectorLoadStores::scalarizeVectorStore(hlsl::OP *HlslOP, |
| 169 | + const DataLayout &DL, |
| 170 | + CallInst *CI) { |
| 171 | + IRBuilder<> Builder(CI); |
| 172 | + // Collect the information required to break this into scalar ops from args. |
| 173 | + DxilInst_RawBufferVectorStore VecSt(CI); |
| 174 | + OP::OpCode OpCode = OP::OpCode::RawBufferStore; |
| 175 | + llvm::Constant *opArg = Builder.getInt32((unsigned)OpCode); |
| 176 | + SmallVector<Value *, 10> Args; |
| 177 | + Args.emplace_back(opArg); // opcode @0. |
| 178 | + Args.emplace_back(VecSt.get_uav()); // Resource handle @1. |
| 179 | + Args.emplace_back(VecSt.get_index()); // Index @2. |
| 180 | + Args.emplace_back(VecSt.get_elementOffset()); // Offset @3. |
| 181 | + Args.emplace_back(nullptr); // Val0 to be set later @4. |
| 182 | + Args.emplace_back(nullptr); // Val1 to be set later @5. |
| 183 | + Args.emplace_back(nullptr); // Val2 to be set later @6. |
| 184 | + Args.emplace_back(nullptr); // Val3 to be set later @7. |
| 185 | + Args.emplace_back(nullptr); // Mask to be set later @8. |
| 186 | + Args.emplace_back(VecSt.get_alignment()); // Alignment @9. |
| 187 | + |
| 188 | + // Set offset to increment depending on whether the real offset is defined. |
| 189 | + unsigned OffsetIdx = 0; |
| 190 | + if (isa<UndefValue>(VecSt.get_elementOffset())) |
| 191 | + // Byte Address Buffers can't use offset, so use index. |
| 192 | + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; |
| 193 | + else |
| 194 | + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; |
| 195 | + |
| 196 | + Value *VecVal = VecSt.get_value0(); |
| 197 | + |
| 198 | + const unsigned MaxElemCount = 4; |
| 199 | + Type *Ty = VecVal->getType(); |
| 200 | + const unsigned NumComponents = Ty->getVectorNumElements(); |
| 201 | + Type *EltTy = Ty->getScalarType(); |
| 202 | + Value *UndefVal = UndefValue::get(EltTy); |
| 203 | + unsigned EltSize = DL.getTypeAllocSize(EltTy); |
| 204 | + Function *F = HlslOP->GetOpFunc(OpCode, EltTy); |
| 205 | + for (unsigned EIx = 0; EIx < NumComponents;) { |
| 206 | + // Store 4 elements or however many less than 4 are left to store. |
| 207 | + unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount); |
| 208 | + // For second and subsequent store calls, increment the resource-appropriate |
| 209 | + // index or offset parameter. |
| 210 | + if (EIx > 0) |
| 211 | + Args[OffsetIdx] = |
| 212 | + Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize)); |
| 213 | + // Populate all value arguments either with the vector or undefs. |
| 214 | + uint8_t Mask = 0; |
| 215 | + unsigned ChIx = 0; |
| 216 | + for (; ChIx < ChunkSize; ChIx++, EIx++) { |
| 217 | + Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = |
| 218 | + Builder.CreateExtractElement(VecVal, EIx); |
| 219 | + Mask |= (1 << ChIx); |
| 220 | + } |
| 221 | + for (; ChIx < MaxElemCount; ChIx++) |
| 222 | + Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = UndefVal; |
| 223 | + |
| 224 | + Args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] = |
| 225 | + HlslOP->GetU8Const(Mask); |
| 226 | + Builder.CreateCall(F, Args); |
| 227 | + } |
| 228 | + CI->eraseFromParent(); |
| 229 | +} |
| 230 | + |
| 231 | +char DxilScalarizeVectorLoadStores::ID = 0; |
| 232 | + |
| 233 | +ModulePass *llvm::createDxilScalarizeVectorLoadStoresPass() { |
| 234 | + return new DxilScalarizeVectorLoadStores(); |
| 235 | +} |
| 236 | + |
| 237 | +INITIALIZE_PASS(DxilScalarizeVectorLoadStores, |
| 238 | + "hlsl-dxil-scalarize-vector-load-stores", |
| 239 | + "DXIL scalarize vector load/stores", false, false) |
0 commit comments