Skip to content

Commit 19ea49f

Browse files
Fix missing ThreadIdx increment in OuterProduct reference
1 parent 757d59d commit 19ea49f

1 file changed

Lines changed: 2 additions & 1 deletion

File tree

tools/clang/unittests/HLSLExec/ExecutionTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13095,7 +13095,8 @@ void main(uint threadIdx : SV_GroupThreadID)
1309513095
for (int ThreadIdx = 0; ThreadIdx < Config.NumThreads; ++ThreadIdx) {
1309613096
for (int M = 0; M < Config.DimM; ++M) {
1309713097
for (int N = 0; N < Config.DimN; ++N) {
13098-
float Acc = InputVector1FP32[M] * InputVector2FP32[N];
13098+
float Acc = InputVector1FP32[ThreadIdx * Config.DimM + M] *
13099+
InputVector2FP32[ThreadIdx * Config.DimN + N];
1309913100
ExpectedOutputBuffer[M * Config.DimN + N] += Acc;
1310013101
}
1310113102
}

0 commit comments

Comments
 (0)