@@ -333,6 +333,9 @@ class DxilConf_SM610_LinAlg {
333333 TEST_METHOD (MatVecMulAdd_Thread_16x16_F16);
334334 TEST_METHOD (OuterProduct_Thread_16x16_F16);
335335
336+ // Query Accumulator Layout
337+ TEST_METHOD (QueryAccumLayout);
338+
336339private:
337340 CComPtr<ID3D12Device> D3DDevice;
338341 dxc::SpecificDllLoader DxcSupport;
@@ -1291,4 +1294,43 @@ void DxilConf_SM610_LinAlg::OuterProduct_Thread_16x16_F16() {
12911294 runOuterProduct (D3DDevice, DxcSupport, Params, VerboseLogging);
12921295}
12931296
1297+ static const char QueryAccumLayoutShader[] = R"(
1298+ RWByteAddressBuffer Output : register(u0);
1299+
1300+ [numthreads(1, 1, 1)]
1301+ void main() {
1302+ uint Layout = __builtin_LinAlg_MatrixQueryAccumulatorLayout();
1303+ Output.Store<uint>(0, Layout);
1304+ }
1305+ )" ;
1306+
1307+ static void runQueryAccumLayout (ID3D12Device *Device,
1308+ dxc::SpecificDllLoader &DxcSupport,
1309+ bool Verbose) {
1310+ std::string Args = " -HV 202x" ;
1311+ size_t BufferSize = elementSize (ComponentType::I32);
1312+
1313+ compileShader (DxcSupport, QueryAccumLayoutShader, " cs_6_10" , Args, Verbose);
1314+
1315+ auto Op =
1316+ createComputeOp (QueryAccumLayoutShader, " cs_6_10" , " UAV(u0)" , Args.c_str ());
1317+ addUAVBuffer (Op.get (), " Output" , BufferSize, true );
1318+ addRootUAV (Op.get (), 0 , " Output" );
1319+
1320+ auto Result = runShaderOp (Device, DxcSupport, std::move (Op));
1321+
1322+ MappedData OutData;
1323+ Result->Test ->GetReadBackData (" Output" , &OutData);
1324+ const uint32_t *Out = static_cast <const uint32_t *>(OutData.data ());
1325+
1326+ // Accum Layout must be A or B
1327+ VERIFY_IS_TRUE (Out[0 ] == static_cast <uint32_t >(MatrixUse::A) || Out[0 ] == static_cast <uint32_t >(MatrixUse::B));
1328+ if (Verbose)
1329+ hlsl_test::LogCommentFmt (L" AccumulatorLayout = %u" , Out[0 ]);
1330+ }
1331+
1332+ void DxilConf_SM610_LinAlg::QueryAccumLayout () {
1333+ runQueryAccumLayout (D3DDevice, DxcSupport, VerboseLogging);
1334+ }
1335+
12941336} // namespace LinAlg
0 commit comments