Skip to content

Commit dc4354b

Browse files
alsepkowCopilottex3d
authored
Fix rawBufferVectorLoad/Store to widen min precision types to 32-bit (microsoft#8274)
## Summary Fixes `RawBufferVectorLoad`/`Store` to use 32-bit element types (`i32`/`f32`) for min precision types (`min16int`, `min16uint`, `min16float`) instead of 16-bit (`i16`/`f16`). This matches how pre-SM6.9 `RawBufferLoad` handles min precision. Resolves microsoft#8273 ## Root Cause `TranslateBufLoad` in `HLOperationLower.cpp` creates the vector type directly from the min precision element type (`i16`/`f16`) without widening to `i32`/`f32`. This causes WARP (and potentially other drivers) to load/store 2 bytes per element instead of 4, mismatching the buffer layout. ## Fix Apply the same widening pattern used for bool types: - **Load**: Load as `v_i32`/`v_f32`, then trunc/fptrunc back to `i16`/`half` - **Store**: `sext`/`fpext` to `i32`/`f32`, then store as `v_i32`/`v_f32` ## Testing Added FileCheck test verifying all 3 min precision types produce `i32`/`f32` vector load/store ops. Co-authored-by: Copilot <[email protected]> --------- Co-authored-by: Copilot <[email protected]> Co-authored-by: Tex Riddell <[email protected]>
1 parent 097ab6b commit dc4354b

6 files changed

Lines changed: 180 additions & 56 deletions

File tree

lib/HLSL/DxilGenerationPass.cpp

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -993,11 +993,10 @@ void ReplaceMinPrecisionRawBufferStoreByType(
993993
Args.emplace_back(NewV);
994994
}
995995
} else if (FromTy->isIntegerTy()) {
996-
// This case only applies to typed buffer since Store operation of byte
997-
// address buffer for min precision is handled by implicit conversion on
998-
// intrinsic call. Since we are extending integer, we have to know if we
999-
// should sign ext or zero ext. We can do this by iterating checking the
1000-
// size of the element at struct type and comp type at type annotation
996+
// Since we are extending integer, we have to know if we should sign ext
997+
// or zero ext. For StructuredBuffers we get signedness from the struct
998+
// type annotation. For ByteAddressBuffer (raw buffers) there is no struct
999+
// annotation, so we fall back to sext as a conservative default.
10011000
CallInst *handleCI = dyn_cast<CallInst>(
10021001
CI->getArgOperand(DxilInst_RawBufferStore::arg_uav));
10031002
DXASSERT(handleCI,
@@ -1007,34 +1006,50 @@ void ReplaceMinPrecisionRawBufferStoreByType(
10071006
"otherwise fail to handle for buffer store lost its retTy");
10081007
StructType *STy = dyn_cast<StructType>(resTyIt->second);
10091008

1010-
STy = cast<StructType>(STy->getElementType(0));
1011-
DxilStructAnnotation *SAnnot = typeSys.GetStructAnnotation(STy);
1012-
ConstantInt *offsetInt = dyn_cast<ConstantInt>(
1013-
CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset));
1014-
unsigned offset = offsetInt->getSExtValue();
1015-
unsigned currentOffset = 0;
1016-
for (DxilStructTypeIterator iter = begin(STy, SAnnot),
1017-
ItEnd = end(STy, SAnnot);
1018-
iter != ItEnd; ++iter) {
1019-
std::pair<Type *, DxilFieldAnnotation *> pair = *iter;
1020-
currentOffset += DL.getTypeAllocSize(pair.first);
1021-
if (currentOffset > offset) {
1022-
if (pair.second->GetCompType().IsUIntTy()) {
1023-
for (unsigned i = 4; i < 8; ++i) {
1024-
Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy);
1025-
Args.emplace_back(NewV);
1009+
StructType *InnerSTy =
1010+
STy ? dyn_cast<StructType>(STy->getElementType(0)) : nullptr;
1011+
DxilStructAnnotation *SAnnot =
1012+
InnerSTy ? typeSys.GetStructAnnotation(InnerSTy) : nullptr;
1013+
1014+
if (SAnnot) {
1015+
// StructuredBuffer path: use struct annotation to determine signedness.
1016+
ConstantInt *offsetInt = dyn_cast<ConstantInt>(
1017+
CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset));
1018+
unsigned offset = offsetInt->getSExtValue();
1019+
unsigned currentOffset = 0;
1020+
for (DxilStructTypeIterator iter = begin(InnerSTy, SAnnot),
1021+
ItEnd = end(InnerSTy, SAnnot);
1022+
iter != ItEnd; ++iter) {
1023+
std::pair<Type *, DxilFieldAnnotation *> pair = *iter;
1024+
currentOffset += DL.getTypeAllocSize(pair.first);
1025+
if (currentOffset > offset) {
1026+
if (pair.second->GetCompType().IsUIntTy()) {
1027+
for (unsigned i = 4; i < 8; ++i) {
1028+
Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy);
1029+
Args.emplace_back(NewV);
1030+
}
1031+
break;
1032+
} else if (pair.second->GetCompType().IsIntTy()) {
1033+
for (unsigned i = 4; i < 8; ++i) {
1034+
Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy);
1035+
Args.emplace_back(NewV);
1036+
}
1037+
break;
1038+
} else {
1039+
DXASSERT(false, "Invalid comp type");
10261040
}
1027-
break;
1028-
} else if (pair.second->GetCompType().IsIntTy()) {
1029-
for (unsigned i = 4; i < 8; ++i) {
1030-
Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy);
1031-
Args.emplace_back(NewV);
1032-
}
1033-
break;
1034-
} else {
1035-
DXASSERT(false, "Invalid comp type");
10361041
}
10371042
}
1043+
} else {
1044+
// ByteAddressBuffer path: no struct annotation available, so
1045+
// signedness is unknown. Default to sext.
1046+
for (unsigned i = 4; i < 8; ++i) {
1047+
Value *Arg = CI->getArgOperand(i);
1048+
if (isa<UndefValue>(Arg))
1049+
Args.emplace_back(UndefValue::get(ToTy));
1050+
else
1051+
Args.emplace_back(CIBuilder.CreateSExt(Arg, ToTy));
1052+
}
10381053
}
10391054
}
10401055

lib/HLSL/HLOperationLower.cpp

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4322,6 +4322,23 @@ static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
43224322
return Args;
43234323
}
43244324

4325+
static bool isMinPrecisionType(Type *EltTy, const DataLayout &DL) {
4326+
return !EltTy->isIntegerTy(1) &&
4327+
DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits();
4328+
}
4329+
4330+
static Type *widenMinPrecisionType(Type *Ty, LLVMContext &Ctx,
4331+
const DataLayout &DL) {
4332+
Type *EltTy = Ty->getScalarType();
4333+
if (!isMinPrecisionType(EltTy, DL))
4334+
return Ty;
4335+
Type *WideTy = EltTy->isFloatingPointTy() ? Type::getFloatTy(Ctx)
4336+
: Type::getInt32Ty(Ctx);
4337+
if (Ty->isVectorTy())
4338+
return VectorType::get(WideTy, Ty->getVectorNumElements());
4339+
return WideTy;
4340+
}
4341+
43254342
// Emits as many calls as needed to load the full vector
43264343
// Performs any needed extractions and conversions of the results.
43274344
Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
@@ -4335,10 +4352,13 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
43354352
NumComponents = Ty->getVectorNumElements();
43364353

43374354
const bool isTyped = DXIL::IsTyped(RK);
4338-
Type *EltTy = Ty->getScalarType();
4355+
Type *OrigEltTy = Ty->getScalarType();
4356+
Type *WidenedTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
4357+
Type *EltTy = WidenedTy->getScalarType();
4358+
const bool isMinPrec = (WidenedTy != Ty);
43394359
const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy());
43404360
const bool isBool = EltTy->isIntegerTy(1);
4341-
// Values will be loaded in memory representations.
4361+
// DXIL buffer loads require i32; narrow types are reconverted after load.
43424362
if (isBool || (is64 && isTyped))
43434363
EltTy = Builder.getInt32Ty();
43444364

@@ -4454,6 +4474,14 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
44544474
retValNew = Builder.CreateICmpNE(
44554475
retValNew, Constant::getNullValue(retValNew->getType()));
44564476

4477+
// DXIL loads min precision as 32-bit; narrow back to original IR type.
4478+
if (isMinPrec) {
4479+
if (OrigEltTy->isIntegerTy())
4480+
retValNew = Builder.CreateTrunc(retValNew, Ty);
4481+
else
4482+
retValNew = Builder.CreateFPTrunc(retValNew, Ty);
4483+
}
4484+
44574485
helper.retVal->replaceAllUsesWith(retValNew);
44584486
helper.retVal = retValNew;
44594487

@@ -4574,6 +4602,25 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
45744602
val = Builder.CreateZExt(val, Ty);
45754603
}
45764604

4605+
// Min precision alloc size is 32-bit; widen to match store intrinsic.
4606+
// Scalar RawBufferStore widening is handled by TranslateMinPrecisionRawBuffer
4607+
// in DxilGenerationPass, which has signedness info from struct annotations.
4608+
if (opcode == OP::OpCode::RawBufferVectorStore) {
4609+
const DataLayout &DL =
4610+
OP->GetModule()->GetHLModule().GetModule()->getDataLayout();
4611+
Type *WideTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
4612+
if (WideTy != Ty) {
4613+
if (EltTy->isFloatingPointTy())
4614+
val = Builder.CreateFPExt(val, WideTy);
4615+
else
4616+
// TODO(#8314): Signedness info is lost by this point; SExt is wrong
4617+
// for min16uint. Front-end should widen during Clang CodeGen instead.
4618+
val = Builder.CreateSExt(val, WideTy);
4619+
EltTy = WideTy->getScalarType();
4620+
Ty = WideTy;
4621+
}
4622+
}
4623+
45774624
// If RawBuffer store of 64-bit value, don't set alignment to 8,
45784625
// since buffer alignment isn't known to be anything over 4.
45794626
unsigned alignValue = OP->GetAllocSizeForType(EltTy);

tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16float_vec.hlsl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,20 @@ void main()
1616
{
1717
Foo foo = buf[0];
1818
// foo.m_B.x
19-
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20-
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
19+
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20+
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
2121

2222
// foo.m_B.y
23-
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24-
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
23+
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24+
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
2525

2626
// foo.m_B.z
27-
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28-
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
27+
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28+
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
2929

3030
// foo.m_A.x
31-
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32-
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
31+
// CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32+
// CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
3333

3434
min16float value1 = foo.m_B.x;
3535
min16float value2 = foo.m_B.y;

tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16int_vec.hlsl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,20 @@ void main()
1616
{
1717
Foo foo = buf[0];
1818
// foo.m_B.x
19-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
19+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
2121

2222
// foo.m_B.y
23-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
23+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
2525

2626
// foo.m_B.z
27-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
27+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
2929

3030
// foo.m_A.x
31-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
31+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
3333

3434
min16int value1 = foo.m_B.x;
3535
min16int value2 = foo.m_B.y;

tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16uint_vec.hlsl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,20 @@ void main()
1616
{
1717
Foo foo = buf[0];
1818
// foo.m_B.x
19-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
19+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
20+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
2121

2222
// foo.m_B.y
23-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
23+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
24+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
2525

2626
// foo.m_B.z
27-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
27+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
28+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
2929

3030
// foo.m_A.x
31-
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32-
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
31+
// CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
32+
// CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
3333

3434
min16int value1 = foo.m_B.x;
3535
min16int value2 = foo.m_B.y;
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// RUN: %dxc -E main -T cs_6_9 %s | FileCheck %s
2+
3+
// Regression test for min precision rawBufferLoad/Store.
4+
// Min precision types should use i32/f32 operations (not i16/f16)
5+
// to match how pre-SM6.9 RawBufferLoad handles min precision.
6+
7+
RWByteAddressBuffer g_buf : register(u0);
8+
9+
[numthreads(1,1,1)]
10+
void main() {
11+
// === Vector loads/stores (RawBufferVectorLoad/Store) ===
12+
13+
// min16int: should load as v3i32, not v3i16
14+
// CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
15+
min16int3 vi = g_buf.Load< min16int3 >(0);
16+
// CHECK: call void @dx.op.rawBufferVectorStore.v3i32
17+
g_buf.Store< min16int3 >(12, vi);
18+
19+
// min16uint: should load as v3i32, not v3i16
20+
// CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
21+
min16uint3 vu = g_buf.Load< min16uint3 >(24);
22+
// CHECK: call void @dx.op.rawBufferVectorStore.v3i32
23+
g_buf.Store< min16uint3 >(36, vu);
24+
25+
// min16float: should load as v3f32, not v3f16
26+
// CHECK: call %dx.types.ResRet.v3f32 @dx.op.rawBufferVectorLoad.v3f32
27+
// CHECK: fptrunc <3 x float> {{.*}} to <3 x half>
28+
min16float3 vf = g_buf.Load< min16float3 >(48);
29+
// CHECK: fpext <3 x half> {{.*}} to <3 x float>
30+
// CHECK: call void @dx.op.rawBufferVectorStore.v3f32
31+
g_buf.Store< min16float3 >(60, vf);
32+
33+
// === Scalar loads/stores (RawBufferLoad/Store) ===
34+
35+
// min16int scalar: should use i32 rawBufferStore
36+
// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
37+
min16int si = g_buf.Load< min16int >(72);
38+
// CHECK: call void @dx.op.rawBufferStore.i32
39+
g_buf.Store< min16int >(76, si);
40+
41+
// min16uint scalar: should use i32 rawBufferStore
42+
// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
43+
min16uint su = g_buf.Load< min16uint >(80);
44+
// CHECK: call void @dx.op.rawBufferStore.i32
45+
g_buf.Store< min16uint >(84, su);
46+
47+
// min16float scalar: should use f32 rawBufferStore
48+
// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32
49+
min16float sf = g_buf.Load< min16float >(88);
50+
// CHECK: call void @dx.op.rawBufferStore.f32
51+
g_buf.Store< min16float >(92, sf);
52+
53+
// Verify i16/f16 ops are NOT used.
54+
// CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16
55+
// CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16
56+
// CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16
57+
// CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16
58+
// CHECK-NOT: rawBufferLoad.i16
59+
// CHECK-NOT: rawBufferStore.i16
60+
// CHECK-NOT: rawBufferLoad.f16
61+
// CHECK-NOT: rawBufferStore.f16
62+
}

0 commit comments

Comments
 (0)