Skip to content

Commit 90b5414

Browse files
committed
[SM6.10][Exec][Bugfix] Thread mats should be OuterProductOptimal layout
1 parent 4ad9834 commit 90b5414

1 file changed

Lines changed: 9 additions & 7 deletions

File tree

tools/clang/unittests/HLSLExec/LinAlgTests.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,13 @@ struct MatrixParams {
7373
bool Enable16Bit;
7474
bool EmulateTest;
7575

76-
size_t strideBytes() const {
76+
size_t rowStride() const {
7777
uint32_t ES = elementSize(CompType);
7878
if (Layout == LinalgMatrixLayout::RowMajor)
7979
return N * ES;
80-
return M * ES;
80+
if (Layout == LinalgMatrixLayout::ColumnMajor)
81+
return M * ES;
82+
return 0;
8183
}
8284

8385
size_t totalElements() const { return M * N; }
@@ -94,7 +96,7 @@ static std::string buildCompilerArgs(const MatrixParams &Params,
9496
SS << " -DN_DIM=" << Params.N;
9597
SS << " -DUSE=" << static_cast<int>(Params.Use);
9698
SS << " -DSCOPE=" << static_cast<int>(Params.Scope);
97-
SS << " -DSTRIDE=" << Params.strideBytes();
99+
SS << " -DSTRIDE=" << Params.rowStride();
98100
SS << " -DLAYOUT=" << static_cast<int>(Params.Layout);
99101
SS << " -DELEM_SIZE=" << static_cast<int>(elementSize(Params.CompType));
100102
SS << " -DNUMTHREADS=" << Params.NumThreads;
@@ -620,7 +622,7 @@ void DxilConf_SM610_LinAlg::AccumulateDescriptor_Thread_16x16_F16() {
620622
Params.N = 16;
621623
Params.Use = MatrixUse::Accumulator;
622624
Params.Scope = MatrixScope::Thread;
623-
Params.Layout = LinalgMatrixLayout::RowMajor;
625+
Params.Layout = LinalgMatrixLayout::OuterProductOptimal;
624626
Params.NumThreads = 1;
625627
Params.Enable16Bit = true;
626628
runAccumulateDescriptor(D3DDevice, DxcSupport, Params, 19, VerboseLogging);
@@ -1220,7 +1222,7 @@ void DxilConf_SM610_LinAlg::MatVecMul_Thread_16x16_F16() {
12201222
Params.M = 16;
12211223
Params.N = 16;
12221224
Params.Scope = MatrixScope::Thread;
1223-
Params.Layout = LinalgMatrixLayout::RowMajor;
1225+
Params.Layout = LinalgMatrixLayout::OuterProductOptimal;
12241226
Params.NumThreads = 1;
12251227
Params.Enable16Bit = true;
12261228
runMatVecMul(D3DDevice, DxcSupport, Params, VerboseLogging,
@@ -1315,7 +1317,7 @@ void DxilConf_SM610_LinAlg::MatVecMulAdd_Thread_16x16_F16() {
13151317
Params.M = 16;
13161318
Params.N = 16;
13171319
Params.Scope = MatrixScope::Thread;
1318-
Params.Layout = LinalgMatrixLayout::RowMajor;
1320+
Params.Layout = LinalgMatrixLayout::OuterProductOptimal;
13191321
Params.NumThreads = 1;
13201322
Params.Enable16Bit = true;
13211323
runMatVecMulAdd(D3DDevice, DxcSupport, Params, VerboseLogging,
@@ -1399,7 +1401,7 @@ void DxilConf_SM610_LinAlg::OuterProduct_Thread_16x16_F16() {
13991401
Params.M = 16;
14001402
Params.N = 16;
14011403
Params.Scope = MatrixScope::Thread;
1402-
Params.Layout = LinalgMatrixLayout::RowMajor;
1404+
Params.Layout = LinalgMatrixLayout::OuterProductOptimal;
14031405
Params.NumThreads = 1;
14041406
Params.Enable16Bit = true;
14051407
runOuterProduct(D3DDevice, DxcSupport, Params, VerboseLogging);

0 commit comments

Comments
 (0)