@@ -73,11 +73,13 @@ struct MatrixParams {
7373 bool Enable16Bit;
7474 bool EmulateTest;
7575
76- size_t strideBytes () const {
76+ size_t rowStride () const {
7777 uint32_t ES = elementSize (CompType);
7878 if (Layout == LinalgMatrixLayout::RowMajor)
7979 return N * ES;
80- return M * ES;
80+ if (Layout == LinalgMatrixLayout::ColumnMajor)
81+ return M * ES;
82+ return 0 ;
8183 }
8284
8385 size_t totalElements () const { return M * N; }
@@ -94,7 +96,7 @@ static std::string buildCompilerArgs(const MatrixParams &Params,
9496 SS << " -DN_DIM=" << Params.N ;
9597 SS << " -DUSE=" << static_cast <int >(Params.Use );
9698 SS << " -DSCOPE=" << static_cast <int >(Params.Scope );
97- SS << " -DSTRIDE=" << Params.strideBytes ();
99+ SS << " -DSTRIDE=" << Params.rowStride ();
98100 SS << " -DLAYOUT=" << static_cast <int >(Params.Layout );
99101 SS << " -DELEM_SIZE=" << static_cast <int >(elementSize (Params.CompType ));
100102 SS << " -DNUMTHREADS=" << Params.NumThreads ;
@@ -620,7 +622,7 @@ void DxilConf_SM610_LinAlg::AccumulateDescriptor_Thread_16x16_F16() {
620622 Params.N = 16 ;
621623 Params.Use = MatrixUse::Accumulator;
622624 Params.Scope = MatrixScope::Thread;
623- Params.Layout = LinalgMatrixLayout::RowMajor ;
625+ Params.Layout = LinalgMatrixLayout::OuterProductOptimal ;
624626 Params.NumThreads = 1 ;
625627 Params.Enable16Bit = true ;
626628 runAccumulateDescriptor (D3DDevice, DxcSupport, Params, 19 , VerboseLogging);
@@ -1220,7 +1222,7 @@ void DxilConf_SM610_LinAlg::MatVecMul_Thread_16x16_F16() {
12201222 Params.M = 16 ;
12211223 Params.N = 16 ;
12221224 Params.Scope = MatrixScope::Thread;
1223- Params.Layout = LinalgMatrixLayout::RowMajor ;
1225+ Params.Layout = LinalgMatrixLayout::OuterProductOptimal ;
12241226 Params.NumThreads = 1 ;
12251227 Params.Enable16Bit = true ;
12261228 runMatVecMul (D3DDevice, DxcSupport, Params, VerboseLogging,
@@ -1315,7 +1317,7 @@ void DxilConf_SM610_LinAlg::MatVecMulAdd_Thread_16x16_F16() {
13151317 Params.M = 16 ;
13161318 Params.N = 16 ;
13171319 Params.Scope = MatrixScope::Thread;
1318- Params.Layout = LinalgMatrixLayout::RowMajor ;
1320+ Params.Layout = LinalgMatrixLayout::OuterProductOptimal ;
13191321 Params.NumThreads = 1 ;
13201322 Params.Enable16Bit = true ;
13211323 runMatVecMulAdd (D3DDevice, DxcSupport, Params, VerboseLogging,
@@ -1399,7 +1401,7 @@ void DxilConf_SM610_LinAlg::OuterProduct_Thread_16x16_F16() {
13991401 Params.M = 16 ;
14001402 Params.N = 16 ;
14011403 Params.Scope = MatrixScope::Thread;
1402- Params.Layout = LinalgMatrixLayout::RowMajor ;
1404+ Params.Layout = LinalgMatrixLayout::OuterProductOptimal ;
14031405 Params.NumThreads = 1 ;
14041406 Params.Enable16Bit = true ;
14051407 runOuterProduct (D3DDevice, DxcSupport, Params, VerboseLogging);
0 commit comments