Address review: refactor widenMinPrecisionType, extend to RawBufferStore

alsepkow · Copilot · alsepkow · commit 41f37f9cde53 · 2026-03-30T16:22:33.000-07:00
- widenMinPrecisionType now takes a single Type* (vector or scalar) and LLVMContext instead of separate EltTy/VecOrScalarTy and IRBuilder. - Load path uses widenMinPrecisionType upfront, eliminating the else-if-isMinPrec branch per tex3d's suggestion. - Store widening now covers both RawBufferStore and RawBufferVectorStore. Without this, scalar min-precision stores crash with a cast type mismatch. - Added scalar RawBufferLoad/Store test cases for all 3 min-precision types. - Added TODO(#8314) for SExt/ZExt signedness issue. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
@@ -4327,16 +4327,18 @@ static bool isMinPrecisionType(Type *EltTy, const DataLayout &DL) {
          DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits();
 }
 
-// Widens a min precision element type to its 32-bit equivalent (i32 or f32).
-// Returns the original type if not min precision.
-static Type *widenMinPrecisionType(Type *EltTy, Type *VecOrScalarTy,
-                                   IRBuilder<> &Builder, const DataLayout &DL) {
+// Widens a min precision type to its 32-bit equivalent (i32 or f32).
+// Accepts vector or scalar types. Returns the original type if not min
+// precision.
+static Type *widenMinPrecisionType(Type *Ty, LLVMContext &Ctx,
+                                   const DataLayout &DL) {
+  Type *EltTy = Ty->getScalarType();
   if (!isMinPrecisionType(EltTy, DL))
-    return VecOrScalarTy;
-  Type *WideTy = EltTy->isFloatingPointTy() ? (Type *)Builder.getFloatTy()
-                                            : (Type *)Builder.getInt32Ty();
-  if (VecOrScalarTy->isVectorTy())
-    return VectorType::get(WideTy, VecOrScalarTy->getVectorNumElements());
+    return Ty;
+  Type *WideTy = EltTy->isFloatingPointTy() ? Type::getFloatTy(Ctx)
+                                            : Type::getInt32Ty(Ctx);
+  if (Ty->isVectorTy())
+    return VectorType::get(WideTy, Ty->getVectorNumElements());
   return WideTy;
 }
 
@@ -4353,24 +4355,16 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
     NumComponents = Ty->getVectorNumElements();
 
   const bool isTyped = DXIL::IsTyped(RK);
-  Type *EltTy = Ty->getScalarType();
+  Type *OrigEltTy = Ty->getScalarType();
+  Type *WidenedTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
+  Type *EltTy = WidenedTy->getScalarType();
+  const bool isMinPrec = (WidenedTy != Ty);
   const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy());
   const bool isBool = EltTy->isIntegerTy(1);
-  // Min precision alloc size exceeds prim size. Use the widened type.
-  const bool isMinPrec = isMinPrecisionType(EltTy, DL);
-  Type *OrigEltTy = EltTy;
-  // Values will be loaded in memory representations.
   // If bool (i1), load from memory-representation (i32),
   // or if 64-bits and typed, load i32 chunks, then reconstruct values.
-  if (isBool || (is64 && isTyped)) {
+  if (isBool || (is64 && isTyped))
     EltTy = Builder.getInt32Ty();
-  } else if (isMinPrec) {
-    // If min-precision, load raw value as 32-bit type.
-    if (EltTy->isFloatingPointTy())
-      EltTy = Builder.getFloatTy();
-    else
-      EltTy = Builder.getInt32Ty();
-  }
 
   // Calculate load size with the scalar memory element type.
   unsigned LdSize = DL.getTypeAllocSize(EltTy);
@@ -4612,15 +4606,20 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     val = Builder.CreateZExt(val, Ty);
   }
 
-  // Widen min precision types to i32/f32 for RawBufferVectorStore.
-  if (opcode == OP::OpCode::RawBufferVectorStore) {
+  // Widen min precision types to i32/f32 for raw buffer stores.
+  // Min precision types have 32-bit alloc size, so the address math and
+  // store intrinsic must use 32-bit values to match.
+  if (opcode == OP::OpCode::RawBufferStore ||
+      opcode == OP::OpCode::RawBufferVectorStore) {
     const DataLayout &DL =
         OP->GetModule()->GetHLModule().GetModule()->getDataLayout();
-    Type *WideTy = widenMinPrecisionType(EltTy, Ty, Builder, DL);
+    Type *WideTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
     if (WideTy != Ty) {
       if (EltTy->isFloatingPointTy())
         val = Builder.CreateFPExt(val, WideTy);
       else
+        // TODO(#8314): Signedness info is lost by this point; SExt is wrong
+        // for min16uint. Front-end should widen during Clang CodeGen instead.
         val = Builder.CreateSExt(val, WideTy);
       EltTy = WideTy->getScalarType();
       Ty = WideTy;
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_vector_load_store.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_vector_load_store.hlsl
@@ -1,13 +1,15 @@
 // RUN: %dxc -E main -T cs_6_9 %s | FileCheck %s
 
-// Regression test for min precision rawBufferVectorLoad/Store.
-// Min precision types should use i32/f32 vector operations (not i16/f16)
+// Regression test for min precision rawBufferLoad/Store.
+// Min precision types should use i32/f32 operations (not i16/f16)
 // to match how pre-SM6.9 RawBufferLoad handles min precision.
 
 RWByteAddressBuffer g_buf : register(u0);
 
 [numthreads(1,1,1)]
 void main() {
+  // === Vector loads/stores (RawBufferVectorLoad/Store) ===
+
   // min16int: should load as v3i32, not v3i16
   // CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
   min16int3 vi = g_buf.Load< min16int3 >(0);
@@ -28,9 +30,33 @@ void main() {
   // CHECK: call void @dx.op.rawBufferVectorStore.v3f32
   g_buf.Store< min16float3 >(60, vf);
 
-  // Verify i16/f16 vector ops are NOT used.
+  // === Scalar loads/stores (RawBufferLoad/Store) ===
+
+  // min16int scalar: should use i32 rawBufferStore
+  // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+  min16int si = g_buf.Load< min16int >(72);
+  // CHECK: call void @dx.op.rawBufferStore.i32
+  g_buf.Store< min16int >(76, si);
+
+  // min16uint scalar: should use i32 rawBufferStore
+  // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+  min16uint su = g_buf.Load< min16uint >(80);
+  // CHECK: call void @dx.op.rawBufferStore.i32
+  g_buf.Store< min16uint >(84, su);
+
+  // min16float scalar: should use f32 rawBufferStore
+  // CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32
+  min16float sf = g_buf.Load< min16float >(88);
+  // CHECK: call void @dx.op.rawBufferStore.f32
+  g_buf.Store< min16float >(92, sf);
+
+  // Verify i16/f16 ops are NOT used.
   // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16
   // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16
   // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16
   // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16
+  // CHECK-NOT: rawBufferLoad.i16
+  // CHECK-NOT: rawBufferStore.i16
+  // CHECK-NOT: rawBufferLoad.f16
+  // CHECK-NOT: rawBufferStore.f16
 }