@@ -3953,6 +3953,11 @@ struct ResLoadHelper {
39533953 : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst),
39543954 addr (idx), offset(Offset), status(nullptr ), mipLevel(mip) {
39553955 opcode = LoadOpFromResKind (RK);
3956+ Type *Ty = Inst->getType ();
3957+ if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy () &&
3958+ Ty->getVectorNumElements () > 1 &&
3959+ Inst->getModule ()->GetHLModule ().GetShaderModel ()->IsSM69Plus ())
3960+ opcode = OP::OpCode::RawBufferVectorLoad;
39563961 }
39573962 OP::OpCode opcode;
39583963 IntrinsicOp intrinsicOpCode;
@@ -4022,6 +4027,14 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
40224027 if (RC == DxilResourceBase::Class::SRV)
40234028 OffsetIdx = IsMS ? HLOperandIndex::kTex2DMSLoadOffsetOpIdx
40244029 : HLOperandIndex::kTexLoadOffsetOpIdx ;
4030+ } else if (opcode == OP::OpCode::RawBufferLoad) {
4031+ // If native vectors are available and this load had a vector
4032+ // with more than one elements, convert the RawBufferLod to the
4033+ // native vector variant RawBufferVectorLoad.
4034+ Type *Ty = CI->getType ();
4035+ if (Ty->isVectorTy () && Ty->getVectorNumElements () > 1 &&
4036+ CI->getModule ()->GetHLModule ().GetShaderModel ()->IsSM69Plus ())
4037+ opcode = OP::OpCode::RawBufferVectorLoad;
40254038 }
40264039
40274040 // Set offset.
@@ -4079,7 +4092,7 @@ Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
40794092// Sets up arguments for buffer load call.
40804093static SmallVector<Value *, 10 > GetBufLoadArgs (ResLoadHelper helper,
40814094 HLResource::Kind RK,
4082- IRBuilder<> Builder, Type *EltTy,
4095+ IRBuilder<> Builder,
40834096 unsigned LdSize) {
40844097 OP::OpCode opcode = helper.opcode ;
40854098 llvm::Constant *opArg = Builder.getInt32 ((uint32_t )opcode);
@@ -4127,6 +4140,7 @@ static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
41274140 // If not TextureLoad, it could be a typed or raw buffer load.
41284141 // They have mostly similar arguments.
41294142 DXASSERT (opcode == OP::OpCode::RawBufferLoad ||
4143+ opcode == OP::OpCode::RawBufferVectorLoad ||
41304144 opcode == OP::OpCode::BufferLoad,
41314145 " Wrong opcode in get load args" );
41324146 Args.emplace_back (
@@ -4137,6 +4151,9 @@ static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
41374151 // Unlike typed buffer load, raw buffer load has mask and alignment.
41384152 Args.emplace_back (nullptr ); // Mask will be added later %4.
41394153 Args.emplace_back (alignmentVal); // alignment @5.
4154+ } else if (opcode == OP::OpCode::RawBufferVectorLoad) {
4155+ // RawBufferVectorLoad takes just alignment, no mask.
4156+ Args.emplace_back (alignmentVal); // alignment @4
41404157 }
41414158 }
41424159 return Args;
@@ -4162,18 +4179,19 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
41624179 if (isBool || (is64 && isTyped))
41634180 EltTy = Builder.getInt32Ty ();
41644181
4165- // 64-bit types are stored as int32 pairs in typed buffers .
4182+ // Adjust number of components as needed .
41664183 if (is64 && isTyped) {
4184+ // 64-bit types are stored as int32 pairs in typed buffers.
41674185 DXASSERT (NumComponents <= 2 , " Typed buffers only allow 4 dwords." );
41684186 NumComponents *= 2 ;
4187+ } else if (opcode == OP::OpCode::RawBufferVectorLoad) {
4188+ // Native vector loads only have a single vector element in ResRet.
4189+ EltTy = VectorType::get (EltTy, NumComponents);
4190+ NumComponents = 1 ;
41694191 }
41704192
41714193 unsigned LdSize = DL.getTypeAllocSize (EltTy);
4172-
4173- SmallVector<Value *, 4 > Elts (NumComponents);
4174-
4175- SmallVector<Value *, 10 > Args =
4176- GetBufLoadArgs (helper, RK, Builder, EltTy, LdSize);
4194+ SmallVector<Value *, 10 > Args = GetBufLoadArgs (helper, RK, Builder, LdSize);
41774195
41784196 // Keep track of the first load for debug info migration.
41794197 Value *FirstLd = nullptr ;
@@ -4185,9 +4203,10 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
41854203 else if (RK == DxilResource::Kind::StructuredBuffer)
41864204 OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx ;
41874205
4188- // Create calls to function object.
4206+ // Create call(s) to function object and collect results in Elts .
41894207 // Typed buffer loads are limited to one load of up to 4 32-bit values.
41904208 // Raw buffer loads might need multiple loads in chunks of 4.
4209+ SmallVector<Value *, 4 > Elts (NumComponents);
41914210 for (unsigned i = 0 ; i < NumComponents;) {
41924211 // Load 4 elements or however many less than 4 are left to load.
41934212 unsigned chunkSize = std::min (NumComponents - i, 4U );
@@ -4197,7 +4216,7 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
41974216 Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx ] =
41984217 GetRawBufferMaskForETy (EltTy, chunkSize, OP);
41994218 // If we've loaded a chunk already, update offset to next chunk.
4200- if (FirstLd != nullptr && opcode == OP::OpCode::RawBufferLoad )
4219+ if (FirstLd != nullptr )
42014220 Args[OffsetIdx] =
42024221 Builder.CreateAdd (Args[OffsetIdx], OP->GetU32Const (4 * LdSize));
42034222 }
@@ -4206,8 +4225,13 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
42064225 Value *Ld = Builder.CreateCall (F, Args, OP::GetOpCodeName (opcode));
42074226
42084227 // Extract elements from returned ResRet.
4209- for (unsigned j = 0 ; j < chunkSize; j++, i++)
4210- Elts[i] = Builder.CreateExtractValue (Ld, j);
4228+ // Native vector loads just have one vector element in the ResRet.
4229+ // Others have up to four scalars that need to be individually extracted.
4230+ if (opcode == OP::OpCode::RawBufferVectorLoad)
4231+ Elts[i++] = Builder.CreateExtractValue (Ld, 0 );
4232+ else
4233+ for (unsigned j = 0 ; j < chunkSize; j++, i++)
4234+ Elts[i] = Builder.CreateExtractValue (Ld, j);
42114235
42124236 // Update status.
42134237 UpdateStatus (Ld, helper.status , Builder, OP);
@@ -4245,9 +4269,10 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
42454269 }
42464270 }
42474271
4248- // Package elements into a vector.
4272+ // Package elements into a vector as needed .
42494273 Value *retValNew = nullptr ;
4250- if (!Ty->isVectorTy ()) {
4274+ // Scalar or native vector loads need not construct vectors from elements.
4275+ if (!Ty->isVectorTy () || opcode == OP::OpCode::RawBufferVectorLoad) {
42514276 retValNew = Elts[0 ];
42524277 } else {
42534278 retValNew = UndefValue::get (VectorType::get (EltTy, NumComponents));
@@ -4345,6 +4370,10 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
43454370 case DxilResource::Kind::StructuredBuffer:
43464371 IsTyped = false ;
43474372 opcode = OP::OpCode::RawBufferStore;
4373+ // Where shader model and type allows, use vector store intrinsic.
4374+ if (OP->GetModule ()->GetHLModule ().GetShaderModel ()->IsSM69Plus () &&
4375+ Ty->isVectorTy () && Ty->getVectorNumElements () > 1 )
4376+ opcode = OP::OpCode::RawBufferVectorStore;
43484377 break ;
43494378 case DxilResource::Kind::TypedBuffer:
43504379 opcode = OP::OpCode::BufferStore;
@@ -4387,7 +4416,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
43874416 EltTy = i32Ty;
43884417 }
43894418
4390- Function *F = OP->GetOpFunc (opcode, EltTy);
43914419 llvm::Constant *opArg = OP->GetU32Const ((unsigned )opcode);
43924420
43934421 llvm::Value *undefI =
@@ -4401,6 +4429,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
44014429
44024430 unsigned OffsetIdx = 0 ;
44034431 if (opcode == OP::OpCode::RawBufferStore ||
4432+ opcode == OP::OpCode::RawBufferVectorStore ||
44044433 opcode == OP::OpCode::BufferStore) {
44054434 // Append Coord0 (Index) value.
44064435 if (Idx->getType ()->isVectorTy ()) {
@@ -4420,7 +4449,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
44204449 OffsetIdx = storeArgs.size () - 1 ;
44214450
44224451 // Coord1 (Offset).
4423- // Only relevant when storing more than 4 elements to structured buffers.
44244452 storeArgs.emplace_back (offset);
44254453 } else {
44264454 // texture store
@@ -4441,6 +4469,16 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
44414469 // TODO: support mip for texture ST
44424470 }
44434471
4472+ // RawBufferVectorStore only takes a single value and alignment arguments.
4473+ if (opcode == DXIL::OpCode::RawBufferVectorStore) {
4474+ storeArgs.emplace_back (val);
4475+ storeArgs.emplace_back (Alignment);
4476+ Function *F = OP->GetOpFunc (DXIL::OpCode::RawBufferVectorStore, Ty);
4477+ Builder.CreateCall (F, storeArgs);
4478+ return ;
4479+ }
4480+ Function *F = OP->GetOpFunc (opcode, EltTy);
4481+
44444482 constexpr unsigned MaxStoreElemCount = 4 ;
44454483 const unsigned CompCount = Ty->isVectorTy () ? Ty->getVectorNumElements () : 1 ;
44464484 const unsigned StoreInstCount =
0 commit comments