Skip to content

Commit 4897501

Browse files
authored
[SM6.10] LinAlg: Fix thread-scope InterlockedAccumulate params (#8367)
The thread-scope overload wasn't supposed to have Stride and Layout, since the Layout must be OuterProductOptimal for thread scope. Additionally, Align is device-dependent for OuterProductOptimal, so there's no point supplying it from HLSL or DXIL. This change removes these three parameters from this function in the HLSL Header, supplying 0 for Stride, MatrixLayout::OuterProductOptimal for Layout, and 0 for Align to the builtin operation which will pass these values along to the DXIL operation. Fixes #8360
1 parent 464f750 commit 4897501

2 files changed

Lines changed: 5 additions & 7 deletions

File tree

tools/clang/lib/Headers/hlsl/dx/linalg.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -420,11 +420,9 @@ class Matrix<ComponentTy, M, N, Use, MatrixScope::Thread> {
420420
template <MatrixUseEnum UseLocal = Use>
421421
typename hlsl::enable_if<Use == MatrixUse::Accumulator && UseLocal == Use,
422422
void>::type
423-
InterlockedAccumulate(RWByteAddressBuffer Res, uint StartOffset, uint Stride,
424-
MatrixLayoutEnum Layout,
425-
uint Align = sizeof(ElementType)) {
426-
__builtin_LinAlg_MatrixAccumulateToDescriptor(__handle, Res, StartOffset,
427-
Stride, Layout, Align);
423+
InterlockedAccumulate(RWByteAddressBuffer Res, uint StartOffset) {
424+
__builtin_LinAlg_MatrixAccumulateToDescriptor(
425+
__handle, Res, StartOffset, 0, MatrixLayout::OuterProductOptimal, 0);
428426
}
429427
};
430428

tools/clang/test/CodeGenDXIL/hlsl/linalg/api/matrix-class.hlsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,12 @@ void main(uint ID : SV_GroupID)
182182
//
183183
// CHECK: %[[TSACCUM:.*]] = call %dx.types.LinAlgMatrixC9M4N4U2S0 @dx.op.linAlgMatrixOuterProduct.mC9M4N4U2S0.v4f32.v4f32
184184
// CHECK: call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC9M4N4U2S0(i32 -2147483621,
185-
// CHECK-SAME: %dx.types.LinAlgMatrixC9M4N4U2S0 %[[TSACCUM]], %dx.types.Handle %{{[0-9]+}}, i32 0, i32 16, i32 1, i32 4)
185+
// CHECK-SAME: %dx.types.LinAlgMatrixC9M4N4U2S0 %[[TSACCUM]], %dx.types.Handle %{{[0-9]+}}, i32 0, i32 0, i32 4, i32 0)
186186
// CHECK-SAME: ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align)
187187
vector<float, 4> vec1 = 1.0f;
188188
vector<float, 4> vec2 = 2.0f;
189189
TSMatrixAccumTy TSMatAccum = OuterProduct<ComponentType::F32>(vec1, vec2);
190-
TSMatAccum.InterlockedAccumulate(RWBAB, 0, 16, MatrixLayoutEnum::ColMajor);
190+
TSMatAccum.InterlockedAccumulate(RWBAB, 0);
191191

192192
// CHECK: call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout()
193193
MatrixUseEnum layout = AccumulatorLayout();

0 commit comments

Comments
 (0)