Skip to content

Commit 41f37f9

Browse files
alsepkowCopilot
andcommitted
Address review: refactor widenMinPrecisionType, extend to RawBufferStore
- widenMinPrecisionType now takes a single Type* (vector or scalar) and LLVMContext instead of separate EltTy/VecOrScalarTy and IRBuilder. - Load path uses widenMinPrecisionType upfront, eliminating the else-if-isMinPrec branch per tex3d's suggestion. - Store widening now covers both RawBufferStore and RawBufferVectorStore. Without this, scalar min-precision stores crash with a cast type mismatch. - Added scalar RawBufferLoad/Store test cases for all 3 min-precision types. - Added TODO(#8314) for SExt/ZExt signedness issue. Co-authored-by: Copilot <[email protected]>
1 parent 8e7c6fb commit 41f37f9

2 files changed

Lines changed: 53 additions & 28 deletions

File tree

lib/HLSL/HLOperationLower.cpp

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4327,16 +4327,18 @@ static bool isMinPrecisionType(Type *EltTy, const DataLayout &DL) {
43274327
DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits();
43284328
}
43294329

4330-
// Widens a min precision element type to its 32-bit equivalent (i32 or f32).
4331-
// Returns the original type if not min precision.
4332-
static Type *widenMinPrecisionType(Type *EltTy, Type *VecOrScalarTy,
4333-
IRBuilder<> &Builder, const DataLayout &DL) {
4330+
// Widens a min precision type to its 32-bit equivalent (i32 or f32).
4331+
// Accepts vector or scalar types. Returns the original type if not min
4332+
// precision.
4333+
static Type *widenMinPrecisionType(Type *Ty, LLVMContext &Ctx,
4334+
const DataLayout &DL) {
4335+
Type *EltTy = Ty->getScalarType();
43344336
if (!isMinPrecisionType(EltTy, DL))
4335-
return VecOrScalarTy;
4336-
Type *WideTy = EltTy->isFloatingPointTy() ? (Type *)Builder.getFloatTy()
4337-
: (Type *)Builder.getInt32Ty();
4338-
if (VecOrScalarTy->isVectorTy())
4339-
return VectorType::get(WideTy, VecOrScalarTy->getVectorNumElements());
4337+
return Ty;
4338+
Type *WideTy = EltTy->isFloatingPointTy() ? Type::getFloatTy(Ctx)
4339+
: Type::getInt32Ty(Ctx);
4340+
if (Ty->isVectorTy())
4341+
return VectorType::get(WideTy, Ty->getVectorNumElements());
43404342
return WideTy;
43414343
}
43424344

@@ -4353,24 +4355,16 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
43534355
NumComponents = Ty->getVectorNumElements();
43544356

43554357
const bool isTyped = DXIL::IsTyped(RK);
4356-
Type *EltTy = Ty->getScalarType();
4358+
Type *OrigEltTy = Ty->getScalarType();
4359+
Type *WidenedTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
4360+
Type *EltTy = WidenedTy->getScalarType();
4361+
const bool isMinPrec = (WidenedTy != Ty);
43574362
const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy());
43584363
const bool isBool = EltTy->isIntegerTy(1);
4359-
// Min precision alloc size exceeds prim size. Use the widened type.
4360-
const bool isMinPrec = isMinPrecisionType(EltTy, DL);
4361-
Type *OrigEltTy = EltTy;
4362-
// Values will be loaded in memory representations.
43634364
// If bool (i1), load from memory-representation (i32),
43644365
// or if 64-bits and typed, load i32 chunks, then reconstruct values.
4365-
if (isBool || (is64 && isTyped)) {
4366+
if (isBool || (is64 && isTyped))
43664367
EltTy = Builder.getInt32Ty();
4367-
} else if (isMinPrec) {
4368-
// If min-precision, load raw value as 32-bit type.
4369-
if (EltTy->isFloatingPointTy())
4370-
EltTy = Builder.getFloatTy();
4371-
else
4372-
EltTy = Builder.getInt32Ty();
4373-
}
43744368

43754369
// Calculate load size with the scalar memory element type.
43764370
unsigned LdSize = DL.getTypeAllocSize(EltTy);
@@ -4612,15 +4606,20 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
46124606
val = Builder.CreateZExt(val, Ty);
46134607
}
46144608

4615-
// Widen min precision types to i32/f32 for RawBufferVectorStore.
4616-
if (opcode == OP::OpCode::RawBufferVectorStore) {
4609+
// Widen min precision types to i32/f32 for raw buffer stores.
4610+
// Min precision types have 32-bit alloc size, so the address math and
4611+
// store intrinsic must use 32-bit values to match.
4612+
if (opcode == OP::OpCode::RawBufferStore ||
4613+
opcode == OP::OpCode::RawBufferVectorStore) {
46174614
const DataLayout &DL =
46184615
OP->GetModule()->GetHLModule().GetModule()->getDataLayout();
4619-
Type *WideTy = widenMinPrecisionType(EltTy, Ty, Builder, DL);
4616+
Type *WideTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
46204617
if (WideTy != Ty) {
46214618
if (EltTy->isFloatingPointTy())
46224619
val = Builder.CreateFPExt(val, WideTy);
46234620
else
4621+
// TODO(#8314): Signedness info is lost by this point; SExt is wrong
4622+
// for min16uint. Front-end should widen during Clang CodeGen instead.
46244623
val = Builder.CreateSExt(val, WideTy);
46254624
EltTy = WideTy->getScalarType();
46264625
Ty = WideTy;

tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_vector_load_store.hlsl

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
// RUN: %dxc -E main -T cs_6_9 %s | FileCheck %s
22

3-
// Regression test for min precision rawBufferVectorLoad/Store.
4-
// Min precision types should use i32/f32 vector operations (not i16/f16)
3+
// Regression test for min precision rawBufferLoad/Store.
4+
// Min precision types should use i32/f32 operations (not i16/f16)
55
// to match how pre-SM6.9 RawBufferLoad handles min precision.
66

77
RWByteAddressBuffer g_buf : register(u0);
88

99
[numthreads(1,1,1)]
1010
void main() {
11+
// === Vector loads/stores (RawBufferVectorLoad/Store) ===
12+
1113
// min16int: should load as v3i32, not v3i16
1214
// CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
1315
min16int3 vi = g_buf.Load< min16int3 >(0);
@@ -28,9 +30,33 @@ void main() {
2830
// CHECK: call void @dx.op.rawBufferVectorStore.v3f32
2931
g_buf.Store< min16float3 >(60, vf);
3032

31-
// Verify i16/f16 vector ops are NOT used.
33+
// === Scalar loads/stores (RawBufferLoad/Store) ===
34+
35+
// min16int scalar: should use i32 rawBufferStore
36+
// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
37+
min16int si = g_buf.Load< min16int >(72);
38+
// CHECK: call void @dx.op.rawBufferStore.i32
39+
g_buf.Store< min16int >(76, si);
40+
41+
// min16uint scalar: should use i32 rawBufferStore
42+
// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
43+
min16uint su = g_buf.Load< min16uint >(80);
44+
// CHECK: call void @dx.op.rawBufferStore.i32
45+
g_buf.Store< min16uint >(84, su);
46+
47+
// min16float scalar: should use f32 rawBufferStore
48+
// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32
49+
min16float sf = g_buf.Load< min16float >(88);
50+
// CHECK: call void @dx.op.rawBufferStore.f32
51+
g_buf.Store< min16float >(92, sf);
52+
53+
// Verify i16/f16 ops are NOT used.
3254
// CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16
3355
// CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16
3456
// CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16
3557
// CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16
58+
// CHECK-NOT: rawBufferLoad.i16
59+
// CHECK-NOT: rawBufferStore.i16
60+
// CHECK-NOT: rawBufferLoad.f16
61+
// CHECK-NOT: rawBufferStore.f16
3662
}

0 commit comments

Comments
 (0)