Skip to content

Commit 1448249

Browse files
alsepkowCopilot
andcommitted
Fix SROA incorrect vector element index for padded min precision types
SROA's getNaturalGEPRecursively used getTypeSizeInBits (primitive size) for vector element offsets, but GEP offset calculation uses getTypeAllocSize (padded size). With DXC's data layout (i16:32, f16:32), this mismatch caused byte offset 4 (element 1) to map to vector index 2 instead of index 1 (4/2=2 vs 4/4=1). This led SROA to misplace or eliminate stores to vector elements 1 and 2, producing incorrect code. Fix: Use getTypeAllocSizeInBits consistently for vector element sizes in getNaturalGEPRecursively, isVectorPromotionViable, and the AllocaSliceRewriter constructor. Co-authored-by: Copilot <[email protected]>
1 parent b9ffe55 commit 1448249

1 file changed

Lines changed: 10 additions & 4 deletions

File tree

lib/Transforms/Scalar/SROA.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,7 +1671,11 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
16711671
// extremely poorly defined currently. The long-term goal is to remove GEPing
16721672
// over a vector from the IR completely.
16731673
if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
1674-
unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
1674+
// HLSL Change: Use alloc size instead of primitive type size for vector
1675+
// elements. DXC's data layout pads min precision types (i16:32, f16:32),
1676+
// so getTypeAllocSize matches the GEP offset stride while
1677+
// getTypeSizeInBits returns the unpadded primitive width.
1678+
unsigned ElementSizeInBits = DL.getTypeAllocSizeInBits(VecTy->getScalarType());
16751679
if (ElementSizeInBits % 8 != 0) {
16761680
// GEPs over non-multiple of 8 size vector elements are invalid.
16771681
return nullptr;
@@ -2134,7 +2138,8 @@ static VectorType *isVectorPromotionViable(AllocaSlices::Partition &P,
21342138

21352139
// Try each vector type, and return the one which works.
21362140
auto CheckVectorTypeForPromotion = [&](VectorType *VTy) {
2137-
uint64_t ElementSize = DL.getTypeSizeInBits(VTy->getElementType());
2141+
// HLSL Change: Use alloc size to match GEP offset stride for padded types.
2142+
uint64_t ElementSize = DL.getTypeAllocSizeInBits(VTy->getElementType());
21382143

21392144
// While the definition of LLVM vectors is bitpacked, we don't support sizes
21402145
// that aren't byte sized.
@@ -2492,12 +2497,13 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
24922497
: nullptr),
24932498
VecTy(PromotableVecTy),
24942499
ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2495-
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
2500+
// HLSL Change: Use alloc size to match GEP offset stride for padded types.
2501+
ElementSize(VecTy ? DL.getTypeAllocSizeInBits(ElementTy) / 8 : 0),
24962502
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
24972503
OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
24982504
IRB(NewAI.getContext(), ConstantFolder()) {
24992505
if (VecTy) {
2500-
assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
2506+
assert((DL.getTypeAllocSizeInBits(ElementTy) % 8) == 0 &&
25012507
"Only multiple-of-8 sized vector elements are viable");
25022508
++NumVectorized;
25032509
}

0 commit comments

Comments
 (0)