Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions lib/HLSL/HLOperationLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4338,9 +4338,20 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
Type *EltTy = Ty->getScalarType();
const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy());
const bool isBool = EltTy->isIntegerTy(1);
// Check for min precision types: their alloc size (from data layout padding
// like i16:32, f16:32) exceeds their primitive size. RawBufferVectorLoad
// should use the widened type (i32/f32) to match how pre-SM6.9
// RawBufferLoad handles min precision (load i32, then trunc to i16).
Comment thread
alsepkow marked this conversation as resolved.
Outdated
const bool isMinPrec = !isBool && DL.getTypeAllocSizeInBits(EltTy) >
EltTy->getPrimitiveSizeInBits();
Comment thread
alsepkow marked this conversation as resolved.
Outdated
Type *OrigEltTy = EltTy;
// Values will be loaded in memory representations.
if (isBool || (is64 && isTyped))
EltTy = Builder.getInt32Ty();
if (isBool || (is64 && isTyped) || isMinPrec) {
if (isMinPrec && EltTy->isFloatingPointTy())
EltTy = Builder.getFloatTy();
else
EltTy = Builder.getInt32Ty();
}
Comment thread
alsepkow marked this conversation as resolved.
Outdated

// Calculate load size with the scalar memory element type.
unsigned LdSize = DL.getTypeAllocSize(EltTy);
Expand Down Expand Up @@ -4454,6 +4465,16 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
retValNew = Builder.CreateICmpNE(
retValNew, Constant::getNullValue(retValNew->getType()));

// Truncate widened min precision loads back to original type.
// e.g., <3 x i32> from rawBufferVectorLoad.v3i32 -> <3 x i16>
if (isMinPrec) {
Type *TargetTy = Ty;
Comment thread
alsepkow marked this conversation as resolved.
Outdated
if (OrigEltTy->isIntegerTy())
retValNew = Builder.CreateTrunc(retValNew, TargetTy);
else
retValNew = Builder.CreateFPTrunc(retValNew, TargetTy);
}

helper.retVal->replaceAllUsesWith(retValNew);
helper.retVal = retValNew;

Expand Down Expand Up @@ -4574,6 +4595,27 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
val = Builder.CreateZExt(val, Ty);
}

// Widen min precision types to i32/f32 for RawBufferVectorStore, matching
// how pre-SM6.9 RawBufferStore handles min precision (store as i32).
Comment thread
alsepkow marked this conversation as resolved.
Outdated
if (opcode == OP::OpCode::RawBufferVectorStore) {
const DataLayout &DL =
Comment thread
alsepkow marked this conversation as resolved.
OP->GetModule()->GetHLModule().GetModule()->getDataLayout();
if (DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits()) {
Type *WideTy = EltTy->isFloatingPointTy() ? (Type *)Builder.getFloatTy()
: (Type *)i32Ty;
Type *WideVecTy =
Ty->isVectorTy()
? (Type *)VectorType::get(WideTy, Ty->getVectorNumElements())
: WideTy;
if (EltTy->isFloatingPointTy())
val = Builder.CreateFPExt(val, WideVecTy);
else
val = Builder.CreateSExt(val, WideVecTy);
EltTy = WideTy;
Ty = WideVecTy;
}
}

// If RawBuffer store of 64-bit value, don't set alignment to 8,
// since buffer alignment isn't known to be anything over 4.
unsigned alignValue = OP->GetAllocSizeForType(EltTy);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// RUN: %dxc -E main -T cs_6_9 %s | FileCheck %s

// Regression test for min precision rawBufferVectorLoad/Store.
// Min precision types should use i32/f32 vector operations (not i16/f16)
// to match how pre-SM6.9 RawBufferLoad handles min precision.
Comment thread
alsepkow marked this conversation as resolved.
Outdated

RWByteAddressBuffer g_buf : register(u0);

[numthreads(1,1,1)]
void main() {
// min16int: should load as v3i32, not v3i16
// CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
min16int3 vi = g_buf.Load< min16int3 >(0);
// CHECK: call void @dx.op.rawBufferVectorStore.v3i32
g_buf.Store< min16int3 >(12, vi);

// min16uint: should load as v3i32, not v3i16
// CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
min16uint3 vu = g_buf.Load< min16uint3 >(24);
// CHECK: call void @dx.op.rawBufferVectorStore.v3i32
g_buf.Store< min16uint3 >(36, vu);

// min16float: should load as v3f32, not v3f16
// CHECK: call %dx.types.ResRet.v3f32 @dx.op.rawBufferVectorLoad.v3f32
// CHECK: fptrunc <3 x float> {{.*}} to <3 x half>
min16float3 vf = g_buf.Load< min16float3 >(48);
// CHECK: fpext <3 x half> {{.*}} to <3 x float>
// CHECK: call void @dx.op.rawBufferVectorStore.v3f32
g_buf.Store< min16float3 >(60, vf);

// Verify i16/f16 vector ops are NOT used.
// CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16
// CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16
// CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16
// CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16
}
Loading