Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 12 additions & 19 deletions tools/clang/unittests/HLSLExec/LinAlgTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ struct MatrixParams {
uint32_t ES = elementSize(CompType);
if (Layout == LinalgMatrixLayout::RowMajor)
return N * ES;
return M * ES;
if (Layout == LinalgMatrixLayout::ColumnMajor)
return M * ES;
// If not Row/Col major, spec says to use 0
return 0;
}

size_t totalElements() const { return M * N; }
Expand Down Expand Up @@ -320,7 +323,6 @@ class DxilConf_SM610_LinAlg {
TEST_METHOD(LoadStoreDescriptor_Wave_16x16_F16);
TEST_METHOD(SplatStore_Wave_16x16_F16);
TEST_METHOD(AccumulateDescriptor_Wave_16x16_F16);
TEST_METHOD(AccumulateDescriptor_Thread_16x16_F16);

// Load/Store/Accumulate Memory
TEST_METHOD(LoadMemory_Wave_16x16_F16);
Expand All @@ -343,7 +345,9 @@ class DxilConf_SM610_LinAlg {
// Matrix Vector Arithmetic
TEST_METHOD(MatVecMul_Thread_16x16_F16);
TEST_METHOD(MatVecMulAdd_Thread_16x16_F16);
#if 0
TEST_METHOD(OuterProduct_Thread_16x16_F16);
#endif

// Query Accumulator Layout
TEST_METHOD(QueryAccumLayout);
Expand Down Expand Up @@ -613,27 +617,14 @@ void DxilConf_SM610_LinAlg::AccumulateDescriptor_Wave_16x16_F16() {
runAccumulateDescriptor(D3DDevice, DxcSupport, Params, 12, VerboseLogging);
}

void DxilConf_SM610_LinAlg::AccumulateDescriptor_Thread_16x16_F16() {
MatrixParams Params = {};
Params.CompType = ComponentType::F16;
Params.M = 16;
Params.N = 16;
Params.Use = MatrixUse::Accumulator;
Params.Scope = MatrixScope::Thread;
Params.Layout = LinalgMatrixLayout::RowMajor;
Params.NumThreads = 1;
Params.Enable16Bit = true;
runAccumulateDescriptor(D3DDevice, DxcSupport, Params, 19, VerboseLogging);
}

static const char ElementAccessShader[] = R"(
RWByteAddressBuffer Input : register(u0);
RWByteAddressBuffer Output : register(u1);

// flatten the 2D index into a 1D index then scale by element size
// Always store row-major and work it out in the test runner
uint coordToByteOffset(uint2 coord) {
return (coord.y * N_DIM + coord.x) * ELEM_SIZE;
return (coord.y * M_DIM + coord.x) * ELEM_SIZE;
}

[WaveSize(4, 64)]
Expand Down Expand Up @@ -1323,6 +1314,7 @@ void DxilConf_SM610_LinAlg::MatVecMulAdd_Thread_16x16_F16() {
ComponentType::F16);
}

#if 0
static const char OuterProductShader[] = R"(
#define USE_A 0
#define SCOPE_THREAD 0
Expand Down Expand Up @@ -1404,6 +1396,7 @@ void DxilConf_SM610_LinAlg::OuterProduct_Thread_16x16_F16() {
Params.Enable16Bit = true;
runOuterProduct(D3DDevice, DxcSupport, Params, VerboseLogging);
Comment thread
tex3d marked this conversation as resolved.
}
#endif

static const char QueryAccumLayoutShader[] = R"(
RWByteAddressBuffer Output : register(u0);
Expand Down Expand Up @@ -1469,7 +1462,7 @@ static const char LoadMemoryShader[] = R"(
[[__LinAlgMatrix_Attributes(COMP_TYPE, M_DIM, N_DIM, USE, SCOPE)]]
Mat;
__builtin_LinAlg_MatrixLoadFromMemory(
Mat, GsData, OFFSET, STRIDE, LAYOUT);
Mat, GsData, OFFSET / ELEM_SIZE, STRIDE / ELEM_SIZE, LAYOUT);
__builtin_LinAlg_MatrixStoreToDescriptor(
Mat, Output, OFFSET, STRIDE, LAYOUT, 128);
}
Expand Down Expand Up @@ -1542,7 +1535,7 @@ static const char StoreMemoryShader[] = R"(
__builtin_LinAlg_FillMatrix(Mat, FILL_VALUE);

__builtin_LinAlg_MatrixStoreToMemory(
Mat, GsData, OFFSET, STRIDE, LAYOUT);
Mat, GsData, OFFSET / ELEM_SIZE, STRIDE / ELEM_SIZE, LAYOUT);

for (uint I = 0; I < M_DIM*N_DIM; ++I) {
Output.Store<ELEM_TYPE>(I*ELEM_SIZE, GsData[I]);
Expand Down Expand Up @@ -1622,7 +1615,7 @@ static const char AccumulateMemoryShader[] = R"(
__builtin_LinAlg_FillMatrix(Mat, FILL_VALUE);

__builtin_LinAlg_MatrixAccumulateToMemory(
Mat, GsData, OFFSET, STRIDE, LAYOUT);
Mat, GsData, OFFSET / ELEM_SIZE, STRIDE / ELEM_SIZE, LAYOUT);

for (uint I = 0; I < M_DIM*N_DIM; ++I) {
Output.Store<ELEM_TYPE>(I*ELEM_SIZE, GsData[I]);
Expand Down
Loading