From e520d0a5d052788c42ed9a5b8567dd9e52fb779a Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 19 Feb 2026 15:37:50 -0700 Subject: [PATCH 1/8] [SM6.10] Implement groupshared Builtins Implements the Load/Store/Accumulate to memory groupshared builtins following the pattern of the previous builtins --- include/dxc/DXIL/DxilInstructions.h | 18 +++--- include/dxc/DXIL/DxilOperations.h | 1 + lib/DXIL/DxilOperations.cpp | 46 ++++++++++----- lib/HLSL/HLOperationLower.cpp | 56 ++++++++++++++++++- .../matrixaccumulatetomemory/nominal.hlsl | 19 +++++++ .../matrixloadfrommemory/nominal.hlsl | 19 +++++++ .../builtins/matrixstoretomemory/nominal.hlsl | 19 +++++++ .../matrixaccumulatetomemory/ast.hlsl | 24 ++++++++ .../unavailable_pre_sm610.hlsl | 15 +++++ .../builtins/matrixloadfrommemory/ast.hlsl | 24 ++++++++ .../unavailable_pre_sm610.hlsl | 15 +++++ .../builtins/matrixstoretomemory/ast.hlsl | 24 ++++++++ .../unavailable_pre_sm610.hlsl | 15 +++++ .../hlsl/linalg/builtins/stage-errors.hlsl | 16 ++++++ utils/hct/gen_intrin_main.txt | 6 +- utils/hct/hctdb.py | 20 +++---- 16 files changed, 297 insertions(+), 40 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 8c48202ce0..941eab6474 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10651,14 +10651,14 @@ struct DxilInst_LinAlgMatrixLoadFromMemory { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_groupsharedArr = 1, + arg_memory = 1, arg_offset = 2, arg_stride = 3, arg_layout = 4, }; // Accessors - llvm::Value *get_groupsharedArr() const { return Instr->getOperand(1); } - void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_memory() const { return Instr->getOperand(1); } + void set_memory(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_offset() const { return Instr->getOperand(2); } void set_offset(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_stride() const { return Instr->getOperand(3); } @@ -10854,7 +10854,7 @@ struct DxilInst_LinAlgMatrixStoreToMemory { // Operand indexes enum OperandIdx { arg_matrix = 1, - arg_groupsharedArr = 2, + arg_memory = 2, arg_offset = 3, arg_stride = 4, arg_layout = 5, @@ -10862,8 +10862,8 @@ struct DxilInst_LinAlgMatrixStoreToMemory { // Accessors llvm::Value *get_matrix() const { return Instr->getOperand(1); } void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_groupsharedArr() const { return Instr->getOperand(2); } - void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_memory() const { return Instr->getOperand(2); } + void set_memory(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_offset() const { return Instr->getOperand(3); } void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } llvm::Value *get_stride() const { return Instr->getOperand(4); } @@ -11091,7 +11091,7 @@ struct DxilInst_LinAlgMatrixAccumulateToMemory { // Operand indexes enum OperandIdx { arg_matrix = 1, - arg_groupsharedArr = 2, + arg_memory = 2, arg_offset = 3, arg_stride = 4, arg_layout = 5, @@ -11099,8 +11099,8 @@ struct DxilInst_LinAlgMatrixAccumulateToMemory { // Accessors llvm::Value *get_matrix() const { return Instr->getOperand(1); } void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_groupsharedArr() const { return Instr->getOperand(2); } - void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_memory() const { return Instr->getOperand(2); } + void set_memory(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_offset() const { return Instr->getOperand(3); } void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } llvm::Value *get_stride() const { return Instr->getOperand(4); } diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index bab4bffc6e..85df375b3a 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -212,6 +212,7 @@ class OP { TS_UDT = 8, // Ex: %"struct.MyStruct" * TS_Object = 9, // Ex: %"class.StructuredBuffer" TS_Vector = 10, // Ex: <8 x i16> + TS_Array = 11, // Ex: [8 x float] TS_MaskBitCount, // Types used in Mask end here // TS_Extended is only used to identify the unnamed struct type used to wrap // multiple overloads when using GetTypeSlot. diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 4138b3d930..02dcfe65a0 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2863,8 +2863,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixLoadFromMemory", Attribute::None, 2, - {{0x200}, {0x63}}, - {{0x0}, {0x0}}}, // Overloads: o,hfwi + {{0x200}, {0x800}}, + {{0x0}, {0x0}}}, // Overloads: o,a {OC::LinAlgMatrixLength, "LinAlgMatrixLength", OCC::LinAlgMatrixLength, @@ -2911,8 +2911,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixStoreToMemory", Attribute::None, 2, - {{0x200}, {0x63}}, - {{0x0}, {0x0}}}, // Overloads: o,hfwi + {{0x200}, {0x800}}, + {{0x0}, {0x0}}}, // Overloads: o,a {OC::LinAlgMatrixQueryAccumulatorLayout, "LinAlgMatrixQueryAccumulatorLayout", OCC::LinAlgMatrixQueryAccumulatorLayout, @@ -2967,8 +2967,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixAccumulateToMemory", Attribute::None, 2, - {{0x200}, {0x63}}, - {{0x0}, {0x0}}}, // Overloads: o,hfwi + {{0x200}, {0x800}}, + {{0x0}, {0x0}}}, // Overloads: o,a {OC::LinAlgMatrixOuterProduct, "LinAlgMatrixOuterProduct", OCC::LinAlgMatrixOuterProduct, @@ -3152,6 +3152,8 @@ unsigned OP::GetTypeSlot(Type *pType) { return TS_Extended; case Type::VectorTyID: return TS_Vector; + case Type::ArrayTyID: + return TS_Array; default: break; } @@ -3166,26 +3168,39 @@ const char *OP::GetOverloadTypeName(unsigned TypeSlot) { StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { DXASSERT(!Ty->isVoidTy(), "must not pass void type here"); unsigned TypeSlot = OP::GetTypeSlot(Ty); + if (TypeSlot < TS_BasicCount) { return GetOverloadTypeName(TypeSlot); - } else if (TypeSlot == TS_UDT) { + } + + switch (TypeSlot) { + case TS_UDT: { if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); StructType *ST = cast(Ty); return ST->getStructName(); - } else if (TypeSlot == TS_Object) { + } + case TS_Object: { StructType *ST = cast(Ty); if (dxilutil::IsHLSLLinAlgMatrixType(Ty)) return (Twine("m") + Twine(dxilutil::GetHLSLLinAlgMatrixTypeMangling(ST))) .toStringRef(Storage); return ST->getStructName(); - } else if (TypeSlot == TS_Vector) { + } + case TS_Vector: { VectorType *VecTy = cast(Ty); return (Twine("v") + Twine(VecTy->getNumElements()) + Twine( GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) .toStringRef(Storage); - } else if (TypeSlot == TS_Extended) { + } + case TS_Array: { + if (Ty->isPointerTy()) + Ty = Ty->getPointerElementType(); + ArrayType *ArrTy = cast(Ty); + return GetOverloadTypeName(OP::GetTypeSlot(ArrTy->getArrayElementType())); + } + case TS_Extended: { DXASSERT(isa(Ty), "otherwise, extended overload type not wrapped in struct type."); StructType *ST = cast(Ty); @@ -3200,11 +3215,14 @@ StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { OS << GetTypeName(ST->getElementType(I), TempStr); } return OS.str(); - } else { - raw_svector_ostream OS(Storage); - Ty->print(OS); - return OS.str(); } + default: + break; + } + + raw_svector_ostream OS(Storage); + Ty->print(OS); + return OS.str(); } StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode, diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 4f22a4598d..9ea6166f36 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7226,6 +7226,53 @@ Value *TranslateLinAlgCopyConvertMatrix(CallInst *CI, IntrinsicOp IOP, return nullptr; } +Value *TranslateLinAlgMatrixLoadFromMemory( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *MatrixPtr = CI->getArgOperand(1); + DXASSERT_NOMSG(isa(MatrixPtr->getType())); + Type *MatrixType = MatrixPtr->getType()->getPointerElementType(); + + Value *Arr = CI->getArgOperand(2); + Value *Offset = CI->getArgOperand(3); + Value *Stride = CI->getArgOperand(4); + Value *Layout = CI->getArgOperand(5); + + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, Arr->getType()}); + + Value *Matrix = + Builder.CreateCall(DxilFunc, {OpArg, Arr, Offset, Stride, Layout}); + Builder.CreateStore(Matrix, MatrixPtr); + + return nullptr; +} + +Value *TranslateLinAlgMatrixAccumStoreToMemory( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *Matrix = CI->getArgOperand(1); + Value *Arr = CI->getArgOperand(2); + Value *Offset = CI->getArgOperand(3); + Value *Stride = CI->getArgOperand(4); + Value *Layout = CI->getArgOperand(5); + + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); + Function *DxilFunc = + HlslOp->GetOpFunc(OpCode, {Matrix->getType(), Arr->getType()}); + + return Builder.CreateCall(DxilFunc, + {OpArg, Matrix, Arr, Offset, Stride, Layout}); +} + } // namespace // Lower table. @@ -7989,14 +8036,16 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromDescriptor, TranslateLinAlgMatrixLoadFromDescriptor, DXIL::OpCode::LinAlgMatrixLoadFromDescriptor}, - {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromMemory, EmptyLower, + {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromMemory, + TranslateLinAlgMatrixLoadFromMemory, DXIL::OpCode::LinAlgMatrixLoadFromMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixSetElement, TranslateLinAlgMatrixSetElement, DXIL::OpCode::LinAlgMatrixSetElement}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToDescriptor, TranslateLinAlgMatrixAccumStoreToDescriptor, DXIL::OpCode::LinAlgMatrixStoreToDescriptor}, - {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToMemory, EmptyLower, + {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToMemory, + TranslateLinAlgMatrixAccumStoreToMemory, DXIL::OpCode::LinAlgMatrixStoreToMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulate, TranslateLinAlgMatrixAccumulate, DXIL::OpCode::LinAlgMatrixAccumulate}, @@ -8010,7 +8059,8 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToDescriptor, TranslateLinAlgMatrixAccumStoreToDescriptor, DXIL::OpCode::LinAlgMatrixAccumulateToDescriptor}, - {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToMemory, EmptyLower, + {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToMemory, + TranslateLinAlgMatrixAccumStoreToMemory, DXIL::OpCode::LinAlgMatrixAccumulateToMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixOuterProduct, TranslateLinAlgMatrixOuterProduct, DXIL::OpCode::LinAlgMatrixOuterProduct}, diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl new file mode 100644 index 0000000000..5461600016 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); +} + +// CHECK: @{{.*}} = external addrspace(3) global [64 x float] + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + fn(SharedArr); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl new file mode 100644 index 0000000000..a5dd722f1b --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); +} + +// CHECK: @{{.*}} = external addrspace(3) global [64 x float] + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + fn(SharedArr); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl new file mode 100644 index 0000000000..f6c38536a3 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); +} + +// CHECK: @{{.*}} = external addrspace(3) global [64 x float] + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl new file mode 100644 index 0000000000..e3694e1eb4 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl @@ -0,0 +1,24 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 420 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); +} + +[shader("compute")] +[numthreads(1,1,1)] +void main() { + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl new file mode 100644 index 0000000000..8048e22922 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -T cs_6_9 -HV 202x -E main %s -verify + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64], float F) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + + // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixAccumulateToMemory potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); +} + +[numthreads(4,1,1)] +void main() { + fn(SharedArr, 6.0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl new file mode 100644 index 0000000000..2874ba3c37 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl @@ -0,0 +1,24 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 411 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); +} + +[shader("compute")] +[numthreads(1,1,1)] +void main() { + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl new file mode 100644 index 0000000000..af3dd3b846 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -T cs_6_9 -HV 202x -E main %s -verify + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64], float F) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + + // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixLoadFromMemory potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); +} + +[numthreads(4,1,1)] +void main() { + fn(SharedArr, 6.0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl new file mode 100644 index 0000000000..1c2520fe6c --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl @@ -0,0 +1,24 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 414 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); +} + +[shader("compute")] +[numthreads(1,1,1)] +void main() { + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl new file mode 100644 index 0000000000..934963f5dc --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -T cs_6_9 -HV 202x -E main %s -verify + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64], float F) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + + // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixStoreToMemory potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); +} + +[numthreads(4,1,1)] +void main() { + fn(SharedArr, 6.0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl index fbec113e81..c9ebd7adf8 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl @@ -8,8 +8,12 @@ // RUN: %dxc -T lib_6_10 -DMATRIX_STORE_TO_DESCRIPTOR %s -verify // RUN: %dxc -T lib_6_10 -DMATRIX_LENGTH %s -verify // RUN: %dxc -T lib_6_10 -DMATRIX_ACCUMULATE %s -verify +// RUN: %dxc -T lib_6_10 -DMATRIX_LOAD_FROM_MEMORY %s -verify +// RUN: %dxc -T lib_6_10 -DMATRIX_STORE_TO_MEMORY %s -verify +// RUN: %dxc -T lib_6_10 -DMATRIX_ACCUMULATE_TO_MEMORY %s -verify RWByteAddressBuffer buf; +groupshared float gs_arr[64]; void CallFunction() { @@ -62,6 +66,18 @@ void CallFunction() #define DO_FUNC __builtin_LinAlg_MatrixAccumulate(mat1, mat2, mat3); #endif +#ifdef MATRIX_LOAD_FROM_MEMORY + #define DO_FUNC __builtin_LinAlg_MatrixLoadFromMemory(mat1, gs_arr, 0, 0, 0); +#endif + +#ifdef MATRIX_STORE_TO_MEMORY + #define DO_FUNC __builtin_LinAlg_MatrixStoreToMemory(mat1, gs_arr, 0, 0, 0); +#endif + +#ifdef MATRIX_ACCUMULATE_TO_MEMORY + #define DO_FUNC __builtin_LinAlg_MatrixAccumulateToMemory(mat1, gs_arr, 0, 0, 0); +#endif + // The builtins below are allowed in all stages, if they raise an error // then the test will fail with "saw unexpected diagnostic" uint layout = __builtin_LinAlg_MatrixQueryAccumulatorLayout(); diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 49aa2f151b..4810442a7e 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -402,13 +402,13 @@ void [[min_sm=6.10]] __builtin_LinAlg_FillMatrix(out LinAlgMatrix ret, in numeri void [[min_sm=6.10]] __builtin_LinAlg_CopyConvertMatrix(out LinAlgMatrix ret, in LinAlgMatrix source, in bool transpose); void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in ByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromMemory(out LinAlgMatrix ret, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromMemory(out LinAlgMatrix ret, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); uint [[min_sm=6.10]] __builtin_LinAlg_MatrixLength(in LinAlgMatrix matrix); uint<2> [[min_sm=6.10]] __builtin_LinAlg_MatrixGetCoordinate(in LinAlgMatrix matrix, in uint threadLocalIndex); void [[min_sm=6.10]] __builtin_LinAlg_MatrixGetElement(out numeric ret, in LinAlgMatrix matrix, in uint threadLocalIndex); void [[min_sm=6.10]] __builtin_LinAlg_MatrixSetElement(out LinAlgMatrix ret, in LinAlgMatrix matrix, in uint threadLocalIndex, in numeric value); void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToMemory(in LinAlgMatrix matrix, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); uint [[min_sm=6.10]] __builtin_LinAlg_MatrixQueryAccumulatorLayout(); void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiply(out LinAlgMatrix matrixC, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB); void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(out LinAlgMatrix matrixR, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB, in LinAlgMatrix matrixC); @@ -416,7 +416,7 @@ void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulate(out LinAlgMatrix matrixC, void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiply(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp); void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiplyAdd(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp, in numeric<> bias, in uint bias_interp); void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(out LinAlgMatrix ret, in numeric<> vecA, in numeric<> vecB); } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 71f035e059..5dbb59102f 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -52,9 +52,10 @@ # - "," is used to separate multiple overload dimensions. # - When used, only $x0, $x1, etc. are supported for overloaded parameter # types. +# - "a" is for any array ([n x Ty]) # dxil_all_user_oload_chars must be kept in sync with the indices in # hlsl::OP::TypeSlot in DxilOperations.h. -dxil_all_user_oload_chars = "hfd18wiluo<" +dxil_all_user_oload_chars = "hfd18wiluo Date: Tue, 10 Mar 2026 15:32:04 -0600 Subject: [PATCH 2/8] Address comments --- .../builtins/matrixaccumulatetomemory/nominal.hlsl | 13 ++++--------- .../builtins/matrixloadfrommemory/nominal.hlsl | 13 ++++--------- .../builtins/matrixstoretomemory/nominal.hlsl | 13 ++++--------- .../builtins/matrixaccumulatetomemory/ast.hlsl | 8 ++------ .../unavailable_pre_sm610.hlsl | 10 +++------- .../linalg/builtins/matrixloadfrommemory/ast.hlsl | 8 ++------ .../matrixloadfrommemory/unavailable_pre_sm610.hlsl | 10 +++------- .../linalg/builtins/matrixstoretomemory/ast.hlsl | 8 ++------ .../matrixstoretomemory/unavailable_pre_sm610.hlsl | 10 +++------- 9 files changed, 27 insertions(+), 66 deletions(-) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl index 5461600016..cfdac39028 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -1,19 +1,14 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -groupshared float SharedArr[64]; - -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); -} - // CHECK: @{{.*}} = external addrspace(3) global [64 x float] +groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - fn(SharedArr); + // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl index a5dd722f1b..a3e383ca58 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -1,19 +1,14 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -groupshared float SharedArr[64]; - -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); -} - // CHECK: @{{.*}} = external addrspace(3) global [64 x float] +groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - fn(SharedArr); + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl index f6c38536a3..4b5b50c357 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -1,19 +1,14 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -groupshared float SharedArr[64]; - -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); -} - // CHECK: @{{.*}} = external addrspace(3) global [64 x float] +groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - fn(SharedArr); + // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl index e3694e1eb4..d300796b67 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl @@ -12,13 +12,9 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); -} - [shader("compute")] [numthreads(1,1,1)] void main() { - fn(SharedArr); + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl index 8048e22922..e5a9ea4895 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl @@ -2,14 +2,10 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64], float F) { +[numthreads(4,1,1)] +void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixAccumulateToMemory potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); -} - -[numthreads(4,1,1)] -void main() { - fn(SharedArr, 6.0); + __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl index 2874ba3c37..3ac0de3880 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl @@ -12,13 +12,9 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); -} - [shader("compute")] [numthreads(1,1,1)] void main() { - fn(SharedArr); + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl index af3dd3b846..d8472ad92b 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl @@ -2,14 +2,10 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64], float F) { +[numthreads(4,1,1)] +void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixLoadFromMemory potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); -} - -[numthreads(4,1,1)] -void main() { - fn(SharedArr, 6.0); + __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl index 1c2520fe6c..c726d119eb 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl @@ -12,13 +12,9 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); -} - [shader("compute")] [numthreads(1,1,1)] void main() { - fn(SharedArr); + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl index 934963f5dc..d3468a2a02 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl @@ -2,14 +2,10 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64], float F) { +[numthreads(4,1,1)] +void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixStoreToMemory potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); -} - -[numthreads(4,1,1)] -void main() { - fn(SharedArr, 6.0); + __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 0, 0, 0); } From 6ff022c0aa01b25464d11cc8139e96900135dcb2 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 11 Mar 2026 19:34:20 -0600 Subject: [PATCH 3/8] Rework based on feedback --- include/dxc/DXIL/DxilOperations.h | 1 - lib/DXIL/DxilOperations.cpp | 123 +++++++++--------- lib/HLSL/HLOperationLower.cpp | 17 ++- .../matrixaccumulatetomemory/nominal.hlsl | 2 +- .../matrixloadfrommemory/nominal.hlsl | 2 +- .../builtins/matrixstoretomemory/nominal.hlsl | 2 +- utils/hct/hctdb.py | 22 ++-- utils/hct/hctdb_instrhelp.py | 39 ++++-- 8 files changed, 117 insertions(+), 91 deletions(-) diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index 85df375b3a..bab4bffc6e 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -212,7 +212,6 @@ class OP { TS_UDT = 8, // Ex: %"struct.MyStruct" * TS_Object = 9, // Ex: %"class.StructuredBuffer" TS_Vector = 10, // Ex: <8 x i16> - TS_Array = 11, // Ex: [8 x float] TS_MaskBitCount, // Types used in Mask end here // TS_Extended is only used to identify the unnamed struct type used to wrap // multiple overloads when using GetTypeSlot. diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 02dcfe65a0..ffff4eccd9 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2863,8 +2863,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixLoadFromMemory", Attribute::None, 2, - {{0x200}, {0x800}}, - {{0x0}, {0x0}}}, // Overloads: o,a + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::LinAlgMatrixLength, "LinAlgMatrixLength", OCC::LinAlgMatrixLength, @@ -2911,8 +2911,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixStoreToMemory", Attribute::None, 2, - {{0x200}, {0x800}}, - {{0x0}, {0x0}}}, // Overloads: o,a + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::LinAlgMatrixQueryAccumulatorLayout, "LinAlgMatrixQueryAccumulatorLayout", OCC::LinAlgMatrixQueryAccumulatorLayout, @@ -2967,8 +2967,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixAccumulateToMemory", Attribute::None, 2, - {{0x200}, {0x800}}, - {{0x0}, {0x0}}}, // Overloads: o,a + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::LinAlgMatrixOuterProduct, "LinAlgMatrixOuterProduct", OCC::LinAlgMatrixOuterProduct, @@ -3152,8 +3152,6 @@ unsigned OP::GetTypeSlot(Type *pType) { return TS_Extended; case Type::VectorTyID: return TS_Vector; - case Type::ArrayTyID: - return TS_Array; default: break; } @@ -3194,12 +3192,6 @@ StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) .toStringRef(Storage); } - case TS_Array: { - if (Ty->isPointerTy()) - Ty = Ty->getPointerElementType(); - ArrayType *ArrTy = cast(Ty); - return GetOverloadTypeName(OP::GetTypeSlot(ArrTy->getArrayElementType())); - } case TS_Extended: { DXASSERT(isa(Ty), "otherwise, extended overload type not wrapped in struct type."); @@ -4332,9 +4324,10 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { #define VEC2(_y) A(VectorType::get(_y, 2)) #define VEC4(_y) A(GetStructVectorType(4, _y)) #define VEC9(_y) A(VectorType::get(_y, 9)) +#define TGSM(_y) A(PointerType::get(_y, DXIL::kTGSMAddrSpace)) // Extended Overload types are wrapped in an anonymous struct -#define EXT(_y) A(cast(pOverloadType)->getElementType(_y)) +#define EXT(_y) cast(pOverloadType)->getElementType(_y) /* hctdb_instrhelp.get_oloads_funcs()*/ switch (opCode) { // return opCode @@ -6445,9 +6438,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { // Linear Algebra Operations case OpCode::MatVecMul: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI1); A(pI32); A(pRes); @@ -6461,9 +6454,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI1); break; case OpCode::MatVecMulAdd: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI1); A(pI32); A(pRes); @@ -6482,8 +6475,8 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { case OpCode::OuterProductAccumulate: A(pV); A(pI32); - EXT(0); - EXT(1); + A(EXT(0)); + A(EXT(1)); A(pRes); A(pI32); A(pI32); @@ -6586,21 +6579,21 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { // Linear Algebra Operations case OpCode::LinAlgMatrixMultiplyAccumulate: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); - EXT(3); + A(EXT(1)); + A(EXT(2)); + A(EXT(3)); break; case OpCode::LinAlgFillMatrix: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); break; case OpCode::LinAlgCopyConvertMatrix: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI1); break; case OpCode::LinAlgMatrixLoadFromDescriptor: @@ -6612,9 +6605,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); break; case OpCode::LinAlgMatrixLoadFromMemory: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + TGSM(EXT(1)); A(pI32); A(pI32); A(pI32); @@ -6631,17 +6624,17 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); break; case OpCode::LinAlgMatrixGetElement: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI32); break; case OpCode::LinAlgMatrixSetElement: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI32); - EXT(2); + A(EXT(2)); break; case OpCode::LinAlgMatrixStoreToDescriptor: A(pV); @@ -6655,8 +6648,8 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { case OpCode::LinAlgMatrixStoreToMemory: A(pV); A(pI32); - EXT(0); - EXT(1); + A(EXT(0)); + TGSM(EXT(1)); A(pI32); A(pI32); A(pI32); @@ -6666,31 +6659,31 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); break; case OpCode::LinAlgMatrixMultiply: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); break; case OpCode::LinAlgMatrixAccumulate: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); break; case OpCode::LinAlgMatVecMul: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); A(pI32); break; case OpCode::LinAlgMatVecMulAdd: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); A(pI32); - EXT(3); + A(EXT(3)); A(pI32); break; case OpCode::LinAlgMatrixAccumulateToDescriptor: @@ -6705,17 +6698,17 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { case OpCode::LinAlgMatrixAccumulateToMemory: A(pV); A(pI32); - EXT(0); - EXT(1); + A(EXT(0)); + TGSM(EXT(1)); A(pI32); A(pI32); A(pI32); break; case OpCode::LinAlgMatrixOuterProduct: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); break; // @@ -7082,7 +7075,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::MatVecMulAdd: case OpCode::LinAlgFillMatrix: case OpCode::LinAlgCopyConvertMatrix: - case OpCode::LinAlgMatrixLoadFromMemory: case OpCode::LinAlgMatrixGetElement: if (FT->getNumParams() < 2) return nullptr; @@ -7090,8 +7082,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { {FT->getReturnType(), FT->getParamType(1)}); case OpCode::OuterProductAccumulate: - case OpCode::LinAlgMatrixStoreToMemory: - case OpCode::LinAlgMatrixAccumulateToMemory: if (FT->getNumParams() < 3) return nullptr; return llvm::StructType::get(Ctx, @@ -7104,12 +7094,27 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { {FT->getReturnType(), FT->getParamType(1), FT->getParamType(2), FT->getParamType(3)}); + case OpCode::LinAlgMatrixLoadFromMemory: + if (FT->getNumParams() < 2) + return nullptr; + return llvm::StructType::get( + Ctx, + {FT->getReturnType(), FT->getParamType(1)->getPointerElementType()}); + case OpCode::LinAlgMatrixSetElement: if (FT->getNumParams() < 4) return nullptr; return llvm::StructType::get( Ctx, {FT->getReturnType(), FT->getParamType(1), FT->getParamType(3)}); + case OpCode::LinAlgMatrixStoreToMemory: + case OpCode::LinAlgMatrixAccumulateToMemory: + if (FT->getNumParams() < 3) + return nullptr; + return llvm::StructType::get( + Ctx, + {FT->getParamType(1), FT->getParamType(2)->getPointerElementType()}); + case OpCode::LinAlgMatrixMultiply: case OpCode::LinAlgMatrixAccumulate: case OpCode::LinAlgMatVecMul: diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 9ea6166f36..6d718257d4 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7242,11 +7242,15 @@ Value *TranslateLinAlgMatrixLoadFromMemory( Value *Stride = CI->getArgOperand(4); Value *Layout = CI->getArgOperand(5); + Value *Zero = Builder.getInt32(0); + Value *ArrPtr = Builder.CreateGEP(Arr, {Zero, Zero}); + Type *ArrEltTy = ArrPtr->getType()->getPointerElementType(); + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); - Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, Arr->getType()}); + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, ArrEltTy}); Value *Matrix = - Builder.CreateCall(DxilFunc, {OpArg, Arr, Offset, Stride, Layout}); + Builder.CreateCall(DxilFunc, {OpArg, ArrPtr, Offset, Stride, Layout}); Builder.CreateStore(Matrix, MatrixPtr); return nullptr; @@ -7265,12 +7269,15 @@ Value *TranslateLinAlgMatrixAccumStoreToMemory( Value *Stride = CI->getArgOperand(4); Value *Layout = CI->getArgOperand(5); + Value *Zero = Builder.getInt32(0); + Value *ArrPtr = Builder.CreateGEP(Arr, {Zero, Zero}); + Type *ArrEltTy = ArrPtr->getType()->getPointerElementType(); + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); - Function *DxilFunc = - HlslOp->GetOpFunc(OpCode, {Matrix->getType(), Arr->getType()}); + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {Matrix->getType(), ArrEltTy}); return Builder.CreateCall(DxilFunc, - {OpArg, Matrix, Arr, Offset, Stride, Layout}); + {OpArg, Matrix, ArrPtr, Offset, Stride, Layout}); } } // namespace diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl index cfdac39028..f05366d62f 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -8,7 +8,7 @@ groupshared float SharedArr[64]; void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl index a3e383ca58..9c1e8303b2 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -8,7 +8,7 @@ groupshared float SharedArr[64]; void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl index 4b5b50c357..07a4fa38e5 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -8,7 +8,7 @@ groupshared float SharedArr[64]; void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 5dbb59102f..b72006efa9 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -55,7 +55,7 @@ # - "a" is for any array ([n x Ty]) # dxil_all_user_oload_chars must be kept in sync with the indices in # hlsl::OP::TypeSlot in DxilOperations.h. -dxil_all_user_oload_chars = "hfd18wiluo $x7 + ty = i.llvm_type.replace("_gs", "") + if ty.startswith("$x"): + if ty != "$x" + str(next_oload_idx): raise ValueError( "Extended overloads are not sequentially referenced in " f"DXIL op {self.name}: {i.llvm_type} != $x{next_oload_idx}" @@ -6407,12 +6411,12 @@ def populate_ExperimentalOps(self): "LinAlgMatrixLoadFromMemory", "LinAlgMatrixLoadFromMemory", "fills a matrix with data from a groupshared array", - "o,a", + "o,hfwi", "", [ db_dxil_param(0, "$x0", "", "resulting matrix"), db_dxil_param( - 2, "$x1", "memory", "groupshared array to fill matrix with" + 2, "$x_gs1", "memory", "groupshared array to fill matrix with" ), db_dxil_param(3, "i32", "offset", "starting offset in the array"), db_dxil_param( @@ -6508,13 +6512,13 @@ def populate_ExperimentalOps(self): "LinAlgMatrixStoreToMemory", "LinAlgMatrixStoreToMemory", "stores a matrix to groupshared memory", - "o,a", + "o,hfwi", "", [ db_dxil_param(0, "v", "", ""), db_dxil_param(2, "$x0", "matrix", "matrix to be stored"), db_dxil_param( - 3, "$x1", "memory", "groupshared array to store into" + 3, "$x_gs1", "memory", "groupshared array to store into" ), db_dxil_param(4, "i32", "offset", "starting offset in the array"), db_dxil_param( @@ -6625,13 +6629,13 @@ def populate_ExperimentalOps(self): "LinAlgMatrixAccumulateToMemory", "LinAlgMatrixAccumulateToMemory", "accumulates a matrix to groupshared memory", - "o,a", + "o,hfwi", "", [ db_dxil_param(0, "v", "", ""), db_dxil_param(2, "$x0", "matrix", "Accumulator matrix"), db_dxil_param( - 3, "$x1", "memory", "groupshared array to accumulate into" + 3, "$x_gs1", "memory", "groupshared array to accumulate into" ), db_dxil_param(4, "i32", "offset", "starting offset in the array"), db_dxil_param( diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 5e09578af7..91636c309e 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -644,10 +644,15 @@ def print_opfunc_table(self): "noderecordproperty": "A(nodeRecordProperty);", "hit_object": "A(pHit);", # Extended overload slots, extend as needed: - "$x0": "EXT(0);", - "$x1": "EXT(1);", - "$x2": "EXT(2);", - "$x3": "EXT(3);", + "$x0": "A(EXT(0));", + "$x1": "A(EXT(1));", + "$x2": "A(EXT(2));", + "$x3": "A(EXT(3));", + # Groupshared pointers to extended overloads: + "$x_gs0": "TGSM(EXT(0));", + "$x_gs1": "TGSM(EXT(1));", + "$x_gs2": "TGSM(EXT(2));", + "$x_gs3": "TGSM(EXT(3));", } last_category = None for i in self.db.get_dxil_ops(): @@ -679,6 +684,7 @@ def print_opfunc_oload_type(self): vec_ty = "$vec" gsptr_ty = "$gsptr" extended_ty = "$x" + extended_gs_ty = "$x_gs" last_category = None index_dict = collections.OrderedDict() @@ -846,7 +852,7 @@ def print_opfunc_oload_type(self): # indices the key, and add the opcode to a list of opcodes for that # key. Indices start with 0 for return type, and 1 for the first # function parameter, which is the DXIL OpCode. - indices = [] + indices = [] # (op.pos, unwrap_pointer) pairs for index, op in enumerate(instr.ops): # Skip dxil opcode. if op.pos == 1: @@ -854,8 +860,10 @@ def print_opfunc_oload_type(self): op_type = op.llvm_type if op_type.startswith(extended_ty): + gs_ptr = op_type.startswith(extended_gs_ty) + prefix_len = len(extended_gs_ty) if gs_ptr else len(extended_ty) try: - extended_index = int(op_type[2:]) + extended_index = int(op_type[prefix_len:]) except: raise ValueError( "Error parsing extended operand type " @@ -866,7 +874,7 @@ def print_opfunc_oload_type(self): f"'$x{extended_index}' is not in sequential " + f"order for DXIL op '{instr.name}'" ) - indices.append(op.pos) + indices.append((op.pos, gs_ptr)) if len(indices) != instr.num_oloads: raise ValueError( @@ -875,23 +883,26 @@ def print_opfunc_oload_type(self): ) extended_dict.setdefault(tuple(indices), []).append(instr.name) - def get_type_at_index(index): - if index == 0: - return "FT->getReturnType()" - return f"FT->getParamType({index - 1})" + def get_type_at_index(index, unwrap_pointer): + result = "FT->getReturnType()" + if index > 0: + result = f"FT->getParamType({index - 1})" + if unwrap_pointer: + result = result + "->getPointerElementType()" + return result for index_tuple, opcodes in extended_dict.items(): line = "" for opcode in opcodes: line = line + f"case OpCode::{opcode}:\n" - if index_tuple[-1] > 0: + if index_tuple[-1][0] > 0: line += ( - f" if (FT->getNumParams() < {index_tuple[-1]})\n" + f" if (FT->getNumParams() < {index_tuple[-1][0]})\n" + " return nullptr;\n" ) line += ( " return llvm::StructType::get(Ctx, {" - + ", ".join([get_type_at_index(index) for index in index_tuple]) + + ", ".join([get_type_at_index(index, unwrap_pointer) for index, unwrap_pointer in index_tuple]) + "});\n" ) print(line) From 36b93d5f2d3cf7f148e0d96f876833390bf89609 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 11 Mar 2026 19:46:59 -0600 Subject: [PATCH 4/8] Cleanup comments --- utils/hct/hctdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index b72006efa9..6e9d2920e1 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -51,8 +51,8 @@ # processing. # - "," is used to separate multiple overload dimensions. # - When used, only $x0, $x1, etc. are supported for overloaded parameter -# types. -# - "a" is for any array ([n x Ty]) +# types. $x_gs0, $x_gs1, etc work like $xN except the overload will be a +# pointer to groupshared memory. # dxil_all_user_oload_chars must be kept in sync with the indices in # hlsl::OP::TypeSlot in DxilOperations.h. dxil_all_user_oload_chars = "hfd18wiluo<" From 2ab314620db5041b462695d920c21e2a481083d8 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Wed, 11 Mar 2026 16:13:25 -0700 Subject: [PATCH 5/8] [SM6.10] Add `-fcgl` variant to LinAlg Matrix codegen tests --- .../builtins/copyconvertmatrix/nominal.hlsl | 8 +++++++- .../hlsl/linalg/builtins/fillmatrix/nominal.hlsl | 4 ++++ .../builtins/matrixaccumulate/nominal.hlsl | 8 +++++++- .../matrixaccumulatetodescriptor/nominal.hlsl | 8 +++++++- .../builtins/matrixgetcoordinate/nominal.hlsl | 8 +++++++- .../builtins/matrixgetelement/nominal.hlsl | 16 ++++++++++++++-- .../linalg/builtins/matrixlength/nominal.hlsl | 7 ++++++- .../matrixloadfromdescriptor/nominal.hlsl | 8 +++++++- .../builtins/matrixmatrixmultiply/nominal.hlsl | 8 +++++++- .../matrixmatrixmultiplyaccumulate/nominal.hlsl | 16 +++++++++++++--- .../builtins/matrixouterproduct/nominal.hlsl | 8 +++++++- .../matrixqueryaccumulatorlayout/nominal.hlsl | 3 +++ .../builtins/matrixsetelement/nominal.hlsl | 9 ++++++++- .../matrixstoretodescriptor/nominal.hlsl | 8 +++++++- .../builtins/matrixvectormultiply/nominal.hlsl | 8 +++++++- .../matrixvectormultiplyadd/nominal.hlsl | 10 +++++++++- 16 files changed, 120 insertions(+), 17 deletions(-) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/copyconvertmatrix/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/copyconvertmatrix/nominal.hlsl index 6aa841f53b..7b2edd4106 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/copyconvertmatrix/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/copyconvertmatrix/nominal.hlsl @@ -1,11 +1,17 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M5N4U1S2.mC2M5N4U1S2(i32 -2147483635, %dx.types.LinAlgMatrixC2M5N4U1S2 {{.*}}, i1 false) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M5N4U1S2.mC2M5N4U1S2 + // CHECK-SAME: (i32 -2147483635, %dx.types.LinAlgMatrixC2M5N4U1S2 {{.*}}, i1 false) + // CHECK-SAME: ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, %dx.types.LinAlgMatrixC2M5N4U1S2, i1)" + // CHECK2-SAME: (i32 405, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, %dx.types.LinAlgMatrixC2M5N4U1S2 {{.*}}, i1 false) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(2, 5, 4, 1, 2)]] mat1; __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat2; __builtin_LinAlg_CopyConvertMatrix(mat2, mat1, false); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl index 09fc6629c3..896572d79d 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl @@ -1,14 +1,18 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { // CHECK-LABEL: define void @main() // CHECK: %{{.*}} = call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgFillMatrix.mC4M5N4U1S2.i32(i32 -2147483636, i32 {{.*}}) ; LinAlgFillMatrix(value) + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, i32)"(i32 406, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, i32 5), __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1; __builtin_LinAlg_FillMatrix(mat1, 5); + // CHECK: %{{.*}} = call %dx.types.LinAlgMatrixC5M3N4U0S0 @dx.op.linAlgFillMatrix.mC5M3N4U0S0.f32(i32 -2147483636, float {{.*}}) ; LinAlgFillMatrix(value) + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC5M3N4U0S0*, float)"(i32 406, %dx.types.LinAlgMatrixC5M3N4U0S0* {{.*}}, float 0x40091EB860000000) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(5, 3, 4, 0, 0)]] mat2; __builtin_LinAlg_FillMatrix(mat2, 3.14); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl index 13832992de..c910e34c87 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl @@ -1,5 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { @@ -8,7 +9,12 @@ void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(5, 3, 4, 0, 0)]] mat1; __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 1, 1, 0, 0)]] mat2; __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(2, 2, 2, 2, 2)]] mat3; + + // CHECK: call %dx.types.LinAlgMatrixC2M2N2U2S2 @dx.op.linAlgMatrixAccumulate.mC2M2N2U2S2.mC1M1N1U0S0.mC5M3N4U0S + // CHECK-SAME: (i32 -2147483624, %dx.types.LinAlgMatrixC1M1N1U0S0 {{.*}}, %dx.types.LinAlgMatrixC5M3N4U0S0 {{.*}}) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - // CHECK: call %dx.types.LinAlgMatrixC2M2N2U2S2 @dx.op.linAlgMatrixAccumulate.mC2M2N2U2S2.mC1M1N1U0S0.mC5M3N4U0S0(i32 -2147483624, %dx.types.LinAlgMatrixC1M1N1U0S0 {{.*}}, %dx.types.LinAlgMatrixC5M3N4U0S0 {{.*}}) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC2M2N2U2S2*, %dx.types.LinAlgMatrixC1M1N1U0S0, + // CHECK2" %dx.types.LinAlgMatrixC5M3N4U0S0)"(i32 415, %dx.types.LinAlgMatrixC2M2N2U2S2* {{.*}}, + // CHECK2" %dx.types.LinAlgMatrixC1M1N1U0S0 {{.*}}, %dx.types.LinAlgMatrixC5M3N4U0S0 {{.*}}) __builtin_LinAlg_MatrixAccumulate(mat3, mat2, mat1); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl index 1b40c200de..0e5a9af7f9 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl @@ -1,5 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 RWByteAddressBuffer outbuf; @@ -7,7 +8,12 @@ RWByteAddressBuffer outbuf; void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U1S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle %{{.*}}, i32 5, i32 5, i32 5) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + // CHECK: call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U1S2(i32 -2147483621, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle %{{.*}}, i32 5, i32 5, i32 5) + // CHECK-SAME: ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, %dx.types.Handle, i32, i32, i32)" + // CHECK2-SAME: (i32 419, %dx.types.LinAlgMatrixC4M5N4U1S2 %2, %dx.types.Handle {{.*}}, i32 5, i32 5, i32 5) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixAccumulateToDescriptor(mat, outbuf, 5, 5, 5); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetcoordinate/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetcoordinate/nominal.hlsl index 5dd14cabd0..43de77d686 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetcoordinate/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetcoordinate/nominal.hlsl @@ -1,11 +1,17 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U1S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + // CHECK: call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U1S2(i32 -2147483631, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1) + // CHECK-SAME: ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + + // CHECK2: call <2 x i32> @"dx.hl.op..<2 x i32> (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, i32)" + // CHECK2-SAME: (i32 407, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; uint2 coord = __builtin_LinAlg_MatrixGetCoordinate(mat, 1); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl index 118037f825..d556ad2978 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl @@ -1,5 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { @@ -7,10 +8,21 @@ void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - // CHECK: call i32 @dx.op.linAlgMatrixGetElement.i32.mC4M5N4U1S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + // CHECK: call i32 @dx.op.linAlgMatrixGetElement.i32.mC4M5N4U1S2(i32 -2147483630, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 0) + // CHECK-SAME: ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + // CHECK2: call void @"dx.hl.op..void (i32, i32*, %dx.types.LinAlgMatrixC4M5N4U1S2, i32)" + // CHECK2-SAME: (i32 408, i32* %elem1, %dx.types.LinAlgMatrixC4M5N4U1S2 %3, i32 0) uint elem1; __builtin_LinAlg_MatrixGetElement(elem1, mat, 0); - // CHECK: call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U1S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + // CHECK: call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U1S2(i32 -2147483630, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1) + // CHECK-SAME: ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + // CHECK2: call void @"dx.hl.op..void (i32, float*, %dx.types.LinAlgMatrixC4M5N4U1S2, i32)" + // CHECK2-SAME: (i32 408, float* %elem2, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1) float elem2; __builtin_LinAlg_MatrixGetElement(elem2, mat, 1); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixlength/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixlength/nominal.hlsl index 1939b5681e..5b79dfd237 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixlength/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixlength/nominal.hlsl @@ -1,11 +1,16 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call i32 @dx.op.linAlgMatrixLength.mC4M5N4U1S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}) ; LinAlgMatrixLength(matrix) + // CHECK: call i32 @dx.op.linAlgMatrixLength.mC4M5N4U1S2(i32 -2147483632, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}) ; LinAlgMatrixLength(matrix) + + // CHECK2: call i32 @"dx.hl.op..i32 (i32, %dx.types.LinAlgMatrixC4M5N4U1S2)" + // CHECK2-SAME: (i32 409, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; uint len = __builtin_LinAlg_MatrixLength(mat); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl index 4685ef0476..4a7e5f30ea 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl @@ -1,5 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 ByteAddressBuffer inbuf; @@ -7,7 +8,12 @@ ByteAddressBuffer inbuf; void main() { // CHECK-LABEL: define void @main() - // CHECK: %{{.*}} = call %dx.types.LinAlgMatrixC1M1N1U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC1M1N1U0S0(i32 -2147483634, %dx.types.Handle %{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + // CHECK: %{{.*}} = call %dx.types.LinAlgMatrixC1M1N1U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC1M1N1U0S0 + // CHECK-SAME: (i32 -2147483634, %dx.types.Handle %{{.*}}, i32 0, i32 0, i32 0) + // CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC1M1N1U0S0*, %dx.types.Handle, i32, i32, i32) + // CHECK2-SAME: "(i32 410, %dx.types.LinAlgMatrixC1M1N1U0S0* %mat, %dx.types.Handle {{.*}}, i32 0, i32 0, i32 0) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 1, 1, 0, 0)]] mat; __builtin_LinAlg_MatrixLoadFromDescriptor(mat, inbuf, 0, 0, 0); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiply/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiply/nominal.hlsl index 560882eccd..60fabd8337 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiply/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiply/nominal.hlsl @@ -1,11 +1,17 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U1S2.mC4M5N4U1S2.mC4M5N4U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}) ; LinAlgMatrixMultiply(matrixA,matrixB) + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U1S2.mC4M5N4U1S2.mC4M5N4U1S2(i32 -2147483625, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}) ; LinAlgMatrixMultiply(matrixA,matrixB) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, %dx.types.LinAlgMatrixC4M5N4U1S2, + // CHECK2-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2)"(i32 416, %dx.types.LinAlgMatrixC4M5N4U1S2* %mat2, + // CHECK2-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 %{{[0-9]+}}, %dx.types.LinAlgMatrixC4M5N4U1S2 %{{[0-9]+}}) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1; __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat2; __builtin_LinAlg_MatrixMatrixMultiply(mat2, mat1, mat1); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/nominal.hlsl index 6f9317056f..87d03d0a30 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/nominal.hlsl @@ -1,12 +1,22 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U1S2.mC4M5N4U1S2.mC4M5N4U1S2.mC4M5N4U1S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + // CHECK: call %dx.types.LinAlgMatrixC4M5N3U1S2 + // CHECK-SAME: @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N3U1S2.mC4M5N4U1S2.mC4M4N3U1S2.mC4M5N3U1S2 + // CHECK-SAME: (i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U1S2 undef, %dx.types.LinAlgMatrixC4M4N3U1S2 undef, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N3U1S2 undef) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N3U1S2*, %dx.types.LinAlgMatrixC4M5N4U1S2, + // CHECK2-SAME: %dx.types.LinAlgMatrixC4M4N3U1S2, %dx.types.LinAlgMatrixC4M5N3U1S2)"(i32 417, + // CHECK2-SAME: %dx.types.LinAlgMatrixC4M5N3U1S2* {{.*}}, %dx.types.LinAlgMatrixC4M5N4U1S2 %{{[0-9]+}}, + // CHECK2-SAME: %dx.types.LinAlgMatrixC4M4N3U1S2 %{{[0-9]+}}, %dx.types.LinAlgMatrixC4M5N3U1S2 %{{[0-9]+}}) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1; - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat2; - __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(mat2, mat1, mat1, mat1); + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 4, 3, 1, 2)]] mat2; + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 3, 1, 2)]] mat3; + __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(mat3, mat1, mat2, mat3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixouterproduct/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixouterproduct/nominal.hlsl index 18cc908980..1dfc5bd28e 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixouterproduct/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixouterproduct/nominal.hlsl @@ -1,5 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { @@ -7,7 +8,12 @@ void main() { float4 lhs = {1,2,3,4}; float4 rhs = {4,3,2,1}; - // CHECK: call %dx.types.LinAlgMatrixC2M2N2U2S2 @dx.op.linAlgMatrixOuterProduct.mC2M2N2U2S2.v4f32.v4f32(i32 -2147483619, <4 x float> {{.*}}, <4 x float> {{.*}}) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + // CHECK: call %dx.types.LinAlgMatrixC2M2N2U2S2 @dx.op.linAlgMatrixOuterProduct.mC2M2N2U2S2.v4f32.v4f32 + // CHECK-SAME: (i32 -2147483619, <4 x float> {{.*}}, <4 x float> {{.*}}) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC2M2N2U2S2*, <4 x float>, <4 x float>)" + // CHECK2: (i32 421, %dx.types.LinAlgMatrixC2M2N2U2S2* {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(2, 2, 2, 2, 2)]] mat; __builtin_LinAlg_MatrixOuterProduct(mat, lhs, rhs); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/nominal.hlsl index bf0b299da6..9d0cdaf097 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/nominal.hlsl @@ -1,10 +1,13 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { // CHECK-LABEL: define void @main() // CHECK: call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + + // CHECK2: call i32 @"dx.hl.op..i32 (i32)"(i32 418) uint layout = __builtin_LinAlg_MatrixQueryAccumulatorLayout(); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixsetelement/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixsetelement/nominal.hlsl index caf1938980..0ab34c4a8d 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixsetelement/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixsetelement/nominal.hlsl @@ -1,5 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 ByteAddressBuffer inbuf; RWByteAddressBuffer outbuf; @@ -8,7 +9,13 @@ RWByteAddressBuffer outbuf; void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U1S2.mC4M5N4U1S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1, i32 5) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U1S2.mC4M5N4U1S2.i32 + // CHECK-SAME: (i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1, i32 5) + // CHECK-SAME: ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, %dx.types.LinAlgMatrixC4M5N4U1S2, i32, i32) + // CHECK2-SAME: "(i32 412, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1, i32 5) + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1; __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat2; __builtin_LinAlg_MatrixSetElement(mat2, mat1, 1, 5); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl index a168a8a813..049ec1fe5e 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl @@ -1,5 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 RWByteAddressBuffer outbuf; @@ -7,7 +8,12 @@ RWByteAddressBuffer outbuf; void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U1S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle %{{.*}}, i32 1, i32 1, i32 0) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + // CHECK: call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U1S2(i32 -2147483628, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle %{{.*}}, i32 1, i32 1, i32 0) + // CHECK-SAME: ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, %dx.types.Handle, i32, i32, i32) + // CHECK2-SAME: "(i32 413, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle {{.*}}, i32 1, i32 1, i32 0) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1; __builtin_LinAlg_MatrixStoreToDescriptor(mat1, outbuf, 1, 1, 0); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl index 4809cbf2df..784c601585 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl @@ -1,5 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { @@ -9,6 +10,11 @@ void main() { float4 vec = {1,2,3,4}; float4 result; - // CHECK: call <4 x float> @dx.op.linAlgMatVecMul.v4f32.mC4M5N4U1S2.v4f32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, <4 x float> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + // CHECK: call <4 x float> @dx.op.linAlgMatVecMul.v4f32.mC4M5N4U1S2.v4f32(i32 -2147483623, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, <4 x float> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + + // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>*, %dx.types.LinAlgMatrixC4M5N4U1S2, <4 x float>, i32) + // CHECK2-SAME: "(i32 422, <4 x float>* %result, %dx.types.LinAlgMatrixC4M5N4U1S2 %5, <4 x float> %3, i32 1) __builtin_LinAlg_MatrixVectorMultiply(result, mat, vec, 1); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl index 91fc94c9e8..1efc17cf88 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl @@ -1,5 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 [numthreads(1,1,1)] void main() { @@ -8,6 +9,13 @@ void main() { float4 vec = {1,2,3,4}; float4 result; - // CHECK: call <4 x float> @dx.op.linAlgMatVecMulAdd.v4f32.mC5M3N4U0S0.v4f32.v4f32(i32 -2147483622, %dx.types.LinAlgMatrixC5M3N4U0S0 {{.*}}, <4 x float> , i32 1, <4 x float> {{.*}}, i32 0) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + // CHECK: call <4 x float> @dx.op.linAlgMatVecMulAdd.v4f32.mC5M3N4U0S0.v4f32.v4f32(i32 -2147483622, + // CHECK-SAME: %dx.types.LinAlgMatrixC5M3N4U0S0 {{.*}}, <4 x float> , i32 1, <4 x float> {{.*}}, i32 0) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>*, %dx.types.LinAlgMatrixC5M3N4U0S0, <4 x float>, i32, + // CHECK2-SAME: <4 x float>, i32)"(i32 423, <4 x float>* %result, %dx.types.LinAlgMatrixC5M3N4U0S0 %6, + // CHECK2-SAME: <4 x float> %4, i32 1, <4 x float> %3, i32 0) __builtin_LinAlg_MatrixVectorMultiplyAdd(result, mat, vec, 1, result, 0); } From 28770b62b5277d7c886f36f55af14c3803354eec Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Wed, 11 Mar 2026 16:41:35 -0700 Subject: [PATCH 6/8] few more changes --- .../hlsl/linalg/builtins/fillmatrix/nominal.hlsl | 14 ++++++++++---- .../linalg/builtins/matrixaccumulate/nominal.hlsl | 4 ++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl index 896572d79d..3ff7357f08 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl @@ -6,13 +6,19 @@ void main() { // CHECK-LABEL: define void @main() - // CHECK: %{{.*}} = call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgFillMatrix.mC4M5N4U1S2.i32(i32 -2147483636, i32 {{.*}}) ; LinAlgFillMatrix(value) - // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, i32)"(i32 406, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, i32 5), + // CHECK: %{{.*}} = call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgFillMatrix.mC4M5N4U1S2.i32 + // CHECK-SAME: (i32 -2147483636, i32 {{.*}}) ; LinAlgFillMatrix(value) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, i32)" + // CHECK2-SAME: (i32 406, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, i32 5), __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1; __builtin_LinAlg_FillMatrix(mat1, 5); - // CHECK: %{{.*}} = call %dx.types.LinAlgMatrixC5M3N4U0S0 @dx.op.linAlgFillMatrix.mC5M3N4U0S0.f32(i32 -2147483636, float {{.*}}) ; LinAlgFillMatrix(value) - // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC5M3N4U0S0*, float)"(i32 406, %dx.types.LinAlgMatrixC5M3N4U0S0* {{.*}}, float 0x40091EB860000000) + // CHECK: %{{.*}} = call %dx.types.LinAlgMatrixC5M3N4U0S0 @dx.op.linAlgFillMatrix.mC5M3N4U0S0.f32 + // CHECK-SAME: (i32 -2147483636, float {{.*}}) ; LinAlgFillMatrix(value) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC5M3N4U0S0*, float)" + // CHECK2-SAME: (i32 406, %dx.types.LinAlgMatrixC5M3N4U0S0* {{.*}}, float 0x40091EB860000000) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(5, 3, 4, 0, 0)]] mat2; __builtin_LinAlg_FillMatrix(mat2, 3.14); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl index c910e34c87..56bde166c6 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl @@ -14,7 +14,7 @@ void main() { // CHECK-SAME: (i32 -2147483624, %dx.types.LinAlgMatrixC1M1N1U0S0 {{.*}}, %dx.types.LinAlgMatrixC5M3N4U0S0 {{.*}}) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC2M2N2U2S2*, %dx.types.LinAlgMatrixC1M1N1U0S0, - // CHECK2" %dx.types.LinAlgMatrixC5M3N4U0S0)"(i32 415, %dx.types.LinAlgMatrixC2M2N2U2S2* {{.*}}, - // CHECK2" %dx.types.LinAlgMatrixC1M1N1U0S0 {{.*}}, %dx.types.LinAlgMatrixC5M3N4U0S0 {{.*}}) + // CHECK2-SAME: %dx.types.LinAlgMatrixC5M3N4U0S0)"(i32 415, %dx.types.LinAlgMatrixC2M2N2U2S2* %mat3, + // CHECK2-SAME: %dx.types.LinAlgMatrixC1M1N1U0S0 %{{[0-9]+}}, %dx.types.LinAlgMatrixC5M3N4U0S0 %{{[0-9]+}}) __builtin_LinAlg_MatrixAccumulate(mat3, mat2, mat1); } From 3aeecb40f2c2a6b16138216a7e29474316a3913d Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Fri, 13 Mar 2026 15:02:48 -0700 Subject: [PATCH 7/8] Add remaining 3 ops and fix test breaks --- .../matrixaccumulatetodescriptor/nominal.hlsl | 2 +- .../builtins/matrixaccumulatetomemory/nominal.hlsl | 13 +++++++++++-- .../linalg/builtins/matrixgetelement/nominal.hlsl | 2 +- .../builtins/matrixloadfrommemory/nominal.hlsl | 12 ++++++++++-- .../builtins/matrixstoretomemory/nominal.hlsl | 7 +++++-- .../builtins/matrixvectormultiply/nominal.hlsl | 2 +- .../builtins/matrixvectormultiplyadd/nominal.hlsl | 7 ++++--- 7 files changed, 33 insertions(+), 12 deletions(-) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl index 0e5a9af7f9..de0ebd0123 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl @@ -13,7 +13,7 @@ void main() { // CHECK-SAME: ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, %dx.types.Handle, i32, i32, i32)" - // CHECK2-SAME: (i32 419, %dx.types.LinAlgMatrixC4M5N4U1S2 %2, %dx.types.Handle {{.*}}, i32 5, i32 5, i32 5) + // CHECK2-SAME: (i32 419, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle {{.*}}, i32 5, i32 5, i32 5) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixAccumulateToDescriptor(mat, outbuf, 5, 5, 5); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl index f05366d62f..2d8b57159c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -1,14 +1,23 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 -// CHECK: @{{.*}} = external addrspace(3) global [64 x float] +// CHECK: @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr inbounds ([64 x float], + // CHECK-SAME: [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 1, i32 2, i32 3) + // CHECK-SAME: ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, + // CHECK2-SAME: [64 x float] addrspace(3)*, i32, i32, i32)"(i32 420, + // CHECK2-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", + // CHECK2-SAME: i32 1, i32 2, i32 3) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl index d556ad2978..f3a057737c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl @@ -13,7 +13,7 @@ void main() { // CHECK-SAME: ; LinAlgMatrixGetElement(matrix,threadLocalIndex) // CHECK2: call void @"dx.hl.op..void (i32, i32*, %dx.types.LinAlgMatrixC4M5N4U1S2, i32)" - // CHECK2-SAME: (i32 408, i32* %elem1, %dx.types.LinAlgMatrixC4M5N4U1S2 %3, i32 0) + // CHECK2-SAME: (i32 408, i32* %elem1, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 0) uint elem1; __builtin_LinAlg_MatrixGetElement(elem1, mat, 0); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl index 9c1e8303b2..c96c1cfa9a 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -1,14 +1,22 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 -// CHECK: @{{.*}} = external addrspace(3) global [64 x float] +// CHECK: @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32 + // CHECK-SAME; (i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], + // CHECK-SAME: [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 1, i32 2, i32 3) + // CHECK-SAME: ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, [64 x float] addrspace(3)*, + // CHECK2-SAME: i32, i32, i32)"(i32 411, %dx.types.LinAlgMatrixC4M5N4U1S2* %mat, [64 x float] addrspace(3)* + // CHECK2-SAME: @"\01?SharedArr@@3PAMA", i32 1, i32 2, i32 3) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl index 07a4fa38e5..0871e31f90 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -1,14 +1,17 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -// CHECK: @{{.*}} = external addrspace(3) global [64 x float] +// CHECK: @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float] groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr + // CHECK-SAME: inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", + // CHECK-SAME: i32 0, i32 0), i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl index 784c601585..203026f9ed 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl @@ -15,6 +15,6 @@ void main() { // CHECK-SAME: float 3.000000e+00, float 4.000000e+00>, i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>*, %dx.types.LinAlgMatrixC4M5N4U1S2, <4 x float>, i32) - // CHECK2-SAME: "(i32 422, <4 x float>* %result, %dx.types.LinAlgMatrixC4M5N4U1S2 %5, <4 x float> %3, i32 1) + // CHECK2-SAME: "(i32 422, <4 x float>* %result, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, <4 x float> {{.*}}, i32 1) __builtin_LinAlg_MatrixVectorMultiply(result, mat, vec, 1); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl index 1efc17cf88..d0be14a452 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl @@ -14,8 +14,9 @@ void main() { // CHECK-SAME: float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 1, <4 x float> {{.*}}, i32 0) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) - // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>*, %dx.types.LinAlgMatrixC5M3N4U0S0, <4 x float>, i32, - // CHECK2-SAME: <4 x float>, i32)"(i32 423, <4 x float>* %result, %dx.types.LinAlgMatrixC5M3N4U0S0 %6, - // CHECK2-SAME: <4 x float> %4, i32 1, <4 x float> %3, i32 0) + // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>*, %dx.types.LinAlgMatrixC5M3N4U0S0, <4 x float>, + // CHECK2-SAME: i32, <4 x float>, i32)"(i32 423, <4 x float>* %result, %dx.types.LinAlgMatrixC5M3N4U0S0 %{{[0-9]+}}, + // CHECK2-SAME: <4 x float> %{{[0-9]+}}, i32 1, <4 x float> %{{[0-9]+}}, i32 0) + __builtin_LinAlg_MatrixVectorMultiplyAdd(result, mat, vec, 1, result, 0); } From cc17cc48e3261fe729966a05f5e14021312a7ff2 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Tue, 17 Mar 2026 18:20:43 -0700 Subject: [PATCH 8/8] Update tests after merge, add missing -fcgl case --- .../linalg/builtins/matrixaccumulatetomemory/nominal.hlsl | 2 +- .../hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl | 2 +- .../hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl index 2d8b57159c..07222e0fbe 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -1,6 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 +// RUN: %dxc -T cs_6_10 -HV 202x -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 // CHECK: @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 groupshared float SharedArr[64]; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl index c96c1cfa9a..3a0e114390 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -1,6 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 +// RUN: %dxc -T cs_6_10 -HV 202x -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 // CHECK: @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 groupshared float SharedArr[64]; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl index 0871e31f90..7d6f3b120c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -1,5 +1,6 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -HV 202x -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 // CHECK: @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float] groupshared float SharedArr[64]; @@ -12,6 +13,10 @@ void main() { // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr // CHECK-SAME: inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", // CHECK-SAME: i32 0, i32 0), i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, [64 x float] addrspace(3)*, i32, i32, i32)" + // CHECK2-SAME: (i32 414, %dx.types.LinAlgMatrixC4M5N4U1S2 %{{.*}}, [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", + // CHECK2-SAME: i32 1, i32 2, i32 3) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 1, 2, 3); }