Skip to content

Commit 9b2f583

Browse files
alsepkowCopilottex3d
authored
[Release SM 6.9] Cherry-Pick Fix rawBufferVectorLoad/Store to widen min precision types to 32-bit (#8369)
Cherry-pick PR (#8274) and revert of out-of-scope changes PR (#8321) Assisted by gh copilot. SHA [dc4354b](dc4354b) SHA [71aa195](71aa195) --------- Co-authored-by: Copilot <[email protected]> Co-authored-by: Tex Riddell <[email protected]>
1 parent b83b317 commit 9b2f583

5 files changed

Lines changed: 111 additions & 26 deletions

File tree

lib/HLSL/HLOperationLower.cpp

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4308,6 +4308,23 @@ static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
43084308
return Args;
43094309
}
43104310

4311+
static bool isMinPrecisionType(Type *EltTy, const DataLayout &DL) {
4312+
return !EltTy->isIntegerTy(1) &&
4313+
DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits();
4314+
}
4315+
4316+
static Type *widenMinPrecisionType(Type *Ty, LLVMContext &Ctx,
4317+
const DataLayout &DL) {
4318+
Type *EltTy = Ty->getScalarType();
4319+
if (!isMinPrecisionType(EltTy, DL))
4320+
return Ty;
4321+
Type *WideTy = EltTy->isFloatingPointTy() ? Type::getFloatTy(Ctx)
4322+
: Type::getInt32Ty(Ctx);
4323+
if (Ty->isVectorTy())
4324+
return VectorType::get(WideTy, Ty->getVectorNumElements());
4325+
return WideTy;
4326+
}
4327+
43114328
// Emits as many calls as needed to load the full vector
43124329
// Performs any needed extractions and conversions of the results.
43134330
Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
@@ -4321,10 +4338,13 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
43214338
NumComponents = Ty->getVectorNumElements();
43224339

43234340
const bool isTyped = DXIL::IsTyped(RK);
4324-
Type *EltTy = Ty->getScalarType();
4341+
Type *OrigEltTy = Ty->getScalarType();
4342+
Type *WidenedTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
4343+
Type *EltTy = WidenedTy->getScalarType();
4344+
const bool isMinPrec = (WidenedTy != Ty);
43254345
const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy());
43264346
const bool isBool = EltTy->isIntegerTy(1);
4327-
// Values will be loaded in memory representations.
4347+
// DXIL buffer loads require i32; narrow types are reconverted after load.
43284348
if (isBool || (is64 && isTyped))
43294349
EltTy = Builder.getInt32Ty();
43304350

@@ -4440,6 +4460,14 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
44404460
retValNew = Builder.CreateICmpNE(
44414461
retValNew, Constant::getNullValue(retValNew->getType()));
44424462

4463+
// DXIL loads min precision as 32-bit; narrow back to original IR type.
4464+
if (isMinPrec) {
4465+
if (OrigEltTy->isIntegerTy())
4466+
retValNew = Builder.CreateTrunc(retValNew, Ty);
4467+
else
4468+
retValNew = Builder.CreateFPTrunc(retValNew, Ty);
4469+
}
4470+
44434471
helper.retVal->replaceAllUsesWith(retValNew);
44444472
helper.retVal = retValNew;
44454473

@@ -4560,6 +4588,25 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
45604588
val = Builder.CreateZExt(val, Ty);
45614589
}
45624590

4591+
// Min precision alloc size is 32-bit; widen to match store intrinsic.
4592+
// Scalar RawBufferStore widening is handled by TranslateMinPrecisionRawBuffer
4593+
// in DxilGenerationPass, which has signedness info from struct annotations.
4594+
if (opcode == OP::OpCode::RawBufferVectorStore) {
4595+
const DataLayout &DL =
4596+
OP->GetModule()->GetHLModule().GetModule()->getDataLayout();
4597+
Type *WideTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
4598+
if (WideTy != Ty) {
4599+
if (EltTy->isFloatingPointTy())
4600+
val = Builder.CreateFPExt(val, WideTy);
4601+
else
4602+
// TODO(#8314): Signedness info is lost by this point; SExt is wrong
4603+
// for min16uint. Front-end should widen during Clang CodeGen instead.
4604+
val = Builder.CreateSExt(val, WideTy);
4605+
EltTy = WideTy->getScalarType();
4606+
Ty = WideTy;
4607+
}
4608+
}
4609+
45634610
// If RawBuffer store of 64-bit value, don't set alignment to 8,
45644611
// since buffer alignment isn't known to be anything over 4.
45654612
unsigned alignValue = OP->GetAllocSizeForType(EltTy);

tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16float_vec.hlsl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,20 @@ void main()
1616
{
1717
Foo foo = buf[0];
1818
// foo.m_B.x
19-
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20-
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
19+
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20+
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
2121

2222
// foo.m_B.y
23-
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24-
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
23+
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24+
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
2525

2626
// foo.m_B.z
27-
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28-
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
27+
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28+
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
2929

3030
// foo.m_A.x
31-
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32-
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
31+
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32+
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
3333

3434
min16float value1 = foo.m_B.x;
3535
min16float value2 = foo.m_B.y;

tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16int_vec.hlsl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,20 @@ void main()
1616
{
1717
Foo foo = buf[0];
1818
// foo.m_B.x
19-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
19+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
2121

2222
// foo.m_B.y
23-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
23+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
2525

2626
// foo.m_B.z
27-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
27+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
2929

3030
// foo.m_A.x
31-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
31+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
3333

3434
min16int value1 = foo.m_B.x;
3535
min16int value2 = foo.m_B.y;

tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16uint_vec.hlsl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,20 @@ void main()
1616
{
1717
Foo foo = buf[0];
1818
// foo.m_B.x
19-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
19+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
2121

2222
// foo.m_B.y
23-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
23+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
2525

2626
// foo.m_B.z
27-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
27+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
2929

3030
// foo.m_A.x
31-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
31+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
3333

3434
min16int value1 = foo.m_B.x;
3535
min16int value2 = foo.m_B.y;
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// RUN: %dxc -E main -T cs_6_9 %s | FileCheck %s
2+
3+
// Regression test for min precision rawBufferLoad/Store.
4+
// Min precision types should use i32/f32 operations (not i16/f16)
5+
// to match how pre-SM6.9 RawBufferLoad handles min precision.
6+
7+
RWByteAddressBuffer g_buf : register(u0);
8+
9+
[numthreads(1,1,1)]
10+
void main() {
11+
// === Vector loads/stores (RawBufferVectorLoad/Store) ===
12+
13+
// min16int: should load as v3i32, not v3i16
14+
// CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
15+
min16int3 vi = g_buf.Load< min16int3 >(0);
16+
// CHECK: call void @dx.op.rawBufferVectorStore.v3i32
17+
g_buf.Store< min16int3 >(12, vi);
18+
19+
// min16uint: should load as v3i32, not v3i16
20+
// CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
21+
min16uint3 vu = g_buf.Load< min16uint3 >(24);
22+
// CHECK: call void @dx.op.rawBufferVectorStore.v3i32
23+
g_buf.Store< min16uint3 >(36, vu);
24+
25+
// min16float: should load as v3f32, not v3f16
26+
// CHECK: call %dx.types.ResRet.v3f32 @dx.op.rawBufferVectorLoad.v3f32
27+
// CHECK: fptrunc <3 x float> {{.*}} to <3 x half>
28+
min16float3 vf = g_buf.Load< min16float3 >(48);
29+
// CHECK: fpext <3 x half> {{.*}} to <3 x float>
30+
// CHECK: call void @dx.op.rawBufferVectorStore.v3f32
31+
g_buf.Store< min16float3 >(60, vf);
32+
33+
// Verify i16/f16 ops are NOT used for vector loads/stores.
34+
// CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16
35+
// CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16
36+
// CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16
37+
// CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16
38+
}

0 commit comments

Comments
 (0)