From e520d0a5d052788c42ed9a5b8567dd9e52fb779a Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 19 Feb 2026 15:37:50 -0700 Subject: [PATCH 1/7] [SM6.10] Implement groupshared Builtins Implements the Load/Store/Accumulate to memory groupshared builtins following the pattern of the previous builtins --- include/dxc/DXIL/DxilInstructions.h | 18 +++--- include/dxc/DXIL/DxilOperations.h | 1 + lib/DXIL/DxilOperations.cpp | 46 ++++++++++----- lib/HLSL/HLOperationLower.cpp | 56 ++++++++++++++++++- .../matrixaccumulatetomemory/nominal.hlsl | 19 +++++++ .../matrixloadfrommemory/nominal.hlsl | 19 +++++++ .../builtins/matrixstoretomemory/nominal.hlsl | 19 +++++++ .../matrixaccumulatetomemory/ast.hlsl | 24 ++++++++ .../unavailable_pre_sm610.hlsl | 15 +++++ .../builtins/matrixloadfrommemory/ast.hlsl | 24 ++++++++ .../unavailable_pre_sm610.hlsl | 15 +++++ .../builtins/matrixstoretomemory/ast.hlsl | 24 ++++++++ .../unavailable_pre_sm610.hlsl | 15 +++++ .../hlsl/linalg/builtins/stage-errors.hlsl | 16 ++++++ utils/hct/gen_intrin_main.txt | 6 +- utils/hct/hctdb.py | 20 +++---- 16 files changed, 297 insertions(+), 40 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 8c48202ce0..941eab6474 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10651,14 +10651,14 @@ struct DxilInst_LinAlgMatrixLoadFromMemory { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_groupsharedArr = 1, + arg_memory = 1, arg_offset = 2, arg_stride = 3, arg_layout = 4, }; // Accessors - llvm::Value *get_groupsharedArr() const { return Instr->getOperand(1); } - void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_memory() const { return Instr->getOperand(1); } + void set_memory(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_offset() const { return Instr->getOperand(2); } void set_offset(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_stride() const { return Instr->getOperand(3); } @@ -10854,7 +10854,7 @@ struct DxilInst_LinAlgMatrixStoreToMemory { // Operand indexes enum OperandIdx { arg_matrix = 1, - arg_groupsharedArr = 2, + arg_memory = 2, arg_offset = 3, arg_stride = 4, arg_layout = 5, @@ -10862,8 +10862,8 @@ struct DxilInst_LinAlgMatrixStoreToMemory { // Accessors llvm::Value *get_matrix() const { return Instr->getOperand(1); } void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_groupsharedArr() const { return Instr->getOperand(2); } - void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_memory() const { return Instr->getOperand(2); } + void set_memory(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_offset() const { return Instr->getOperand(3); } void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } llvm::Value *get_stride() const { return Instr->getOperand(4); } @@ -11091,7 +11091,7 @@ struct DxilInst_LinAlgMatrixAccumulateToMemory { // Operand indexes enum OperandIdx { arg_matrix = 1, - arg_groupsharedArr = 2, + arg_memory = 2, arg_offset = 3, arg_stride = 4, arg_layout = 5, @@ -11099,8 +11099,8 @@ struct DxilInst_LinAlgMatrixAccumulateToMemory { // Accessors llvm::Value *get_matrix() const { return Instr->getOperand(1); } void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_groupsharedArr() const { return Instr->getOperand(2); } - void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_memory() const { return Instr->getOperand(2); } + void set_memory(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_offset() const { return Instr->getOperand(3); } void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } llvm::Value *get_stride() const { return Instr->getOperand(4); } diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index bab4bffc6e..85df375b3a 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -212,6 +212,7 @@ class OP { TS_UDT = 8, // Ex: %"struct.MyStruct" * TS_Object = 9, // Ex: %"class.StructuredBuffer" TS_Vector = 10, // Ex: <8 x i16> + TS_Array = 11, // Ex: [8 x float] TS_MaskBitCount, // Types used in Mask end here // TS_Extended is only used to identify the unnamed struct type used to wrap // multiple overloads when using GetTypeSlot. diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 4138b3d930..02dcfe65a0 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2863,8 +2863,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixLoadFromMemory", Attribute::None, 2, - {{0x200}, {0x63}}, - {{0x0}, {0x0}}}, // Overloads: o,hfwi + {{0x200}, {0x800}}, + {{0x0}, {0x0}}}, // Overloads: o,a {OC::LinAlgMatrixLength, "LinAlgMatrixLength", OCC::LinAlgMatrixLength, @@ -2911,8 +2911,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixStoreToMemory", Attribute::None, 2, - {{0x200}, {0x63}}, - {{0x0}, {0x0}}}, // Overloads: o,hfwi + {{0x200}, {0x800}}, + {{0x0}, {0x0}}}, // Overloads: o,a {OC::LinAlgMatrixQueryAccumulatorLayout, "LinAlgMatrixQueryAccumulatorLayout", OCC::LinAlgMatrixQueryAccumulatorLayout, @@ -2967,8 +2967,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixAccumulateToMemory", Attribute::None, 2, - {{0x200}, {0x63}}, - {{0x0}, {0x0}}}, // Overloads: o,hfwi + {{0x200}, {0x800}}, + {{0x0}, {0x0}}}, // Overloads: o,a {OC::LinAlgMatrixOuterProduct, "LinAlgMatrixOuterProduct", OCC::LinAlgMatrixOuterProduct, @@ -3152,6 +3152,8 @@ unsigned OP::GetTypeSlot(Type *pType) { return TS_Extended; case Type::VectorTyID: return TS_Vector; + case Type::ArrayTyID: + return TS_Array; default: break; } @@ -3166,26 +3168,39 @@ const char *OP::GetOverloadTypeName(unsigned TypeSlot) { StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { DXASSERT(!Ty->isVoidTy(), "must not pass void type here"); unsigned TypeSlot = OP::GetTypeSlot(Ty); + if (TypeSlot < TS_BasicCount) { return GetOverloadTypeName(TypeSlot); - } else if (TypeSlot == TS_UDT) { + } + + switch (TypeSlot) { + case TS_UDT: { if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); StructType *ST = cast(Ty); return ST->getStructName(); - } else if (TypeSlot == TS_Object) { + } + case TS_Object: { StructType *ST = cast(Ty); if (dxilutil::IsHLSLLinAlgMatrixType(Ty)) return (Twine("m") + Twine(dxilutil::GetHLSLLinAlgMatrixTypeMangling(ST))) .toStringRef(Storage); return ST->getStructName(); - } else if (TypeSlot == TS_Vector) { + } + case TS_Vector: { VectorType *VecTy = cast(Ty); return (Twine("v") + Twine(VecTy->getNumElements()) + Twine( GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) .toStringRef(Storage); - } else if (TypeSlot == TS_Extended) { + } + case TS_Array: { + if (Ty->isPointerTy()) + Ty = Ty->getPointerElementType(); + ArrayType *ArrTy = cast(Ty); + return GetOverloadTypeName(OP::GetTypeSlot(ArrTy->getArrayElementType())); + } + case TS_Extended: { DXASSERT(isa(Ty), "otherwise, extended overload type not wrapped in struct type."); StructType *ST = cast(Ty); @@ -3200,11 +3215,14 @@ StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { OS << GetTypeName(ST->getElementType(I), TempStr); } return OS.str(); - } else { - raw_svector_ostream OS(Storage); - Ty->print(OS); - return OS.str(); } + default: + break; + } + + raw_svector_ostream OS(Storage); + Ty->print(OS); + return OS.str(); } StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode, diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 4f22a4598d..9ea6166f36 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7226,6 +7226,53 @@ Value *TranslateLinAlgCopyConvertMatrix(CallInst *CI, IntrinsicOp IOP, return nullptr; } +Value *TranslateLinAlgMatrixLoadFromMemory( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *MatrixPtr = CI->getArgOperand(1); + DXASSERT_NOMSG(isa(MatrixPtr->getType())); + Type *MatrixType = MatrixPtr->getType()->getPointerElementType(); + + Value *Arr = CI->getArgOperand(2); + Value *Offset = CI->getArgOperand(3); + Value *Stride = CI->getArgOperand(4); + Value *Layout = CI->getArgOperand(5); + + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, Arr->getType()}); + + Value *Matrix = + Builder.CreateCall(DxilFunc, {OpArg, Arr, Offset, Stride, Layout}); + Builder.CreateStore(Matrix, MatrixPtr); + + return nullptr; +} + +Value *TranslateLinAlgMatrixAccumStoreToMemory( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *Matrix = CI->getArgOperand(1); + Value *Arr = CI->getArgOperand(2); + Value *Offset = CI->getArgOperand(3); + Value *Stride = CI->getArgOperand(4); + Value *Layout = CI->getArgOperand(5); + + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); + Function *DxilFunc = + HlslOp->GetOpFunc(OpCode, {Matrix->getType(), Arr->getType()}); + + return Builder.CreateCall(DxilFunc, + {OpArg, Matrix, Arr, Offset, Stride, Layout}); +} + } // namespace // Lower table. @@ -7989,14 +8036,16 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromDescriptor, TranslateLinAlgMatrixLoadFromDescriptor, DXIL::OpCode::LinAlgMatrixLoadFromDescriptor}, - {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromMemory, EmptyLower, + {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromMemory, + TranslateLinAlgMatrixLoadFromMemory, DXIL::OpCode::LinAlgMatrixLoadFromMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixSetElement, TranslateLinAlgMatrixSetElement, DXIL::OpCode::LinAlgMatrixSetElement}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToDescriptor, TranslateLinAlgMatrixAccumStoreToDescriptor, DXIL::OpCode::LinAlgMatrixStoreToDescriptor}, - {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToMemory, EmptyLower, + {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToMemory, + TranslateLinAlgMatrixAccumStoreToMemory, DXIL::OpCode::LinAlgMatrixStoreToMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulate, TranslateLinAlgMatrixAccumulate, DXIL::OpCode::LinAlgMatrixAccumulate}, @@ -8010,7 +8059,8 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToDescriptor, TranslateLinAlgMatrixAccumStoreToDescriptor, DXIL::OpCode::LinAlgMatrixAccumulateToDescriptor}, - {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToMemory, EmptyLower, + {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToMemory, + TranslateLinAlgMatrixAccumStoreToMemory, DXIL::OpCode::LinAlgMatrixAccumulateToMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixOuterProduct, TranslateLinAlgMatrixOuterProduct, DXIL::OpCode::LinAlgMatrixOuterProduct}, diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl new file mode 100644 index 0000000000..5461600016 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); +} + +// CHECK: @{{.*}} = external addrspace(3) global [64 x float] + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + fn(SharedArr); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl new file mode 100644 index 0000000000..a5dd722f1b --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); +} + +// CHECK: @{{.*}} = external addrspace(3) global [64 x float] + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + fn(SharedArr); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl new file mode 100644 index 0000000000..f6c38536a3 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); +} + +// CHECK: @{{.*}} = external addrspace(3) global [64 x float] + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl new file mode 100644 index 0000000000..e3694e1eb4 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl @@ -0,0 +1,24 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 420 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); +} + +[shader("compute")] +[numthreads(1,1,1)] +void main() { + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl new file mode 100644 index 0000000000..8048e22922 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -T cs_6_9 -HV 202x -E main %s -verify + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64], float F) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + + // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixAccumulateToMemory potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); +} + +[numthreads(4,1,1)] +void main() { + fn(SharedArr, 6.0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl new file mode 100644 index 0000000000..2874ba3c37 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl @@ -0,0 +1,24 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 411 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); +} + +[shader("compute")] +[numthreads(1,1,1)] +void main() { + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl new file mode 100644 index 0000000000..af3dd3b846 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -T cs_6_9 -HV 202x -E main %s -verify + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64], float F) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + + // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixLoadFromMemory potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); +} + +[numthreads(4,1,1)] +void main() { + fn(SharedArr, 6.0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl new file mode 100644 index 0000000000..1c2520fe6c --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl @@ -0,0 +1,24 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 414 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); +} + +[shader("compute")] +[numthreads(1,1,1)] +void main() { + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl new file mode 100644 index 0000000000..934963f5dc --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -T cs_6_9 -HV 202x -E main %s -verify + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64], float F) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + + // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixStoreToMemory potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); +} + +[numthreads(4,1,1)] +void main() { + fn(SharedArr, 6.0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl index fbec113e81..c9ebd7adf8 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl @@ -8,8 +8,12 @@ // RUN: %dxc -T lib_6_10 -DMATRIX_STORE_TO_DESCRIPTOR %s -verify // RUN: %dxc -T lib_6_10 -DMATRIX_LENGTH %s -verify // RUN: %dxc -T lib_6_10 -DMATRIX_ACCUMULATE %s -verify +// RUN: %dxc -T lib_6_10 -DMATRIX_LOAD_FROM_MEMORY %s -verify +// RUN: %dxc -T lib_6_10 -DMATRIX_STORE_TO_MEMORY %s -verify +// RUN: %dxc -T lib_6_10 -DMATRIX_ACCUMULATE_TO_MEMORY %s -verify RWByteAddressBuffer buf; +groupshared float gs_arr[64]; void CallFunction() { @@ -62,6 +66,18 @@ void CallFunction() #define DO_FUNC __builtin_LinAlg_MatrixAccumulate(mat1, mat2, mat3); #endif +#ifdef MATRIX_LOAD_FROM_MEMORY + #define DO_FUNC __builtin_LinAlg_MatrixLoadFromMemory(mat1, gs_arr, 0, 0, 0); +#endif + +#ifdef MATRIX_STORE_TO_MEMORY + #define DO_FUNC __builtin_LinAlg_MatrixStoreToMemory(mat1, gs_arr, 0, 0, 0); +#endif + +#ifdef MATRIX_ACCUMULATE_TO_MEMORY + #define DO_FUNC __builtin_LinAlg_MatrixAccumulateToMemory(mat1, gs_arr, 0, 0, 0); +#endif + // The builtins below are allowed in all stages, if they raise an error // then the test will fail with "saw unexpected diagnostic" uint layout = __builtin_LinAlg_MatrixQueryAccumulatorLayout(); diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 49aa2f151b..4810442a7e 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -402,13 +402,13 @@ void [[min_sm=6.10]] __builtin_LinAlg_FillMatrix(out LinAlgMatrix ret, in numeri void [[min_sm=6.10]] __builtin_LinAlg_CopyConvertMatrix(out LinAlgMatrix ret, in LinAlgMatrix source, in bool transpose); void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in ByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromMemory(out LinAlgMatrix ret, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromMemory(out LinAlgMatrix ret, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); uint [[min_sm=6.10]] __builtin_LinAlg_MatrixLength(in LinAlgMatrix matrix); uint<2> [[min_sm=6.10]] __builtin_LinAlg_MatrixGetCoordinate(in LinAlgMatrix matrix, in uint threadLocalIndex); void [[min_sm=6.10]] __builtin_LinAlg_MatrixGetElement(out numeric ret, in LinAlgMatrix matrix, in uint threadLocalIndex); void [[min_sm=6.10]] __builtin_LinAlg_MatrixSetElement(out LinAlgMatrix ret, in LinAlgMatrix matrix, in uint threadLocalIndex, in numeric value); void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToMemory(in LinAlgMatrix matrix, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); uint [[min_sm=6.10]] __builtin_LinAlg_MatrixQueryAccumulatorLayout(); void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiply(out LinAlgMatrix matrixC, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB); void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(out LinAlgMatrix matrixR, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB, in LinAlgMatrix matrixC); @@ -416,7 +416,7 @@ void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulate(out LinAlgMatrix matrixC, void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiply(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp); void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiplyAdd(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp, in numeric<> bias, in uint bias_interp); void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(out LinAlgMatrix ret, in numeric<> vecA, in numeric<> vecB); } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 71f035e059..5dbb59102f 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -52,9 +52,10 @@ # - "," is used to separate multiple overload dimensions. # - When used, only $x0, $x1, etc. are supported for overloaded parameter # types. +# - "a" is for any array ([n x Ty]) # dxil_all_user_oload_chars must be kept in sync with the indices in # hlsl::OP::TypeSlot in DxilOperations.h. -dxil_all_user_oload_chars = "hfd18wiluo<" +dxil_all_user_oload_chars = "hfd18wiluo Date: Tue, 10 Mar 2026 15:32:04 -0600 Subject: [PATCH 2/7] Address comments --- .../builtins/matrixaccumulatetomemory/nominal.hlsl | 13 ++++--------- .../builtins/matrixloadfrommemory/nominal.hlsl | 13 ++++--------- .../builtins/matrixstoretomemory/nominal.hlsl | 13 ++++--------- .../builtins/matrixaccumulatetomemory/ast.hlsl | 8 ++------ .../unavailable_pre_sm610.hlsl | 10 +++------- .../linalg/builtins/matrixloadfrommemory/ast.hlsl | 8 ++------ .../matrixloadfrommemory/unavailable_pre_sm610.hlsl | 10 +++------- .../linalg/builtins/matrixstoretomemory/ast.hlsl | 8 ++------ .../matrixstoretomemory/unavailable_pre_sm610.hlsl | 10 +++------- 9 files changed, 27 insertions(+), 66 deletions(-) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl index 5461600016..cfdac39028 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -1,19 +1,14 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -groupshared float SharedArr[64]; - -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); -} - // CHECK: @{{.*}} = external addrspace(3) global [64 x float] +groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - fn(SharedArr); + // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl index a5dd722f1b..a3e383ca58 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -1,19 +1,14 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -groupshared float SharedArr[64]; - -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); -} - // CHECK: @{{.*}} = external addrspace(3) global [64 x float] +groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - fn(SharedArr); + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl index f6c38536a3..4b5b50c357 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -1,19 +1,14 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -groupshared float SharedArr[64]; - -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); -} - // CHECK: @{{.*}} = external addrspace(3) global [64 x float] +groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - fn(SharedArr); + // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl index e3694e1eb4..d300796b67 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl @@ -12,13 +12,9 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); -} - [shader("compute")] [numthreads(1,1,1)] void main() { - fn(SharedArr); + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl index 8048e22922..e5a9ea4895 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl @@ -2,14 +2,10 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64], float F) { +[numthreads(4,1,1)] +void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixAccumulateToMemory potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); -} - -[numthreads(4,1,1)] -void main() { - fn(SharedArr, 6.0); + __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl index 2874ba3c37..3ac0de3880 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl @@ -12,13 +12,9 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); -} - [shader("compute")] [numthreads(1,1,1)] void main() { - fn(SharedArr); + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl index af3dd3b846..d8472ad92b 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl @@ -2,14 +2,10 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64], float F) { +[numthreads(4,1,1)] +void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixLoadFromMemory potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); -} - -[numthreads(4,1,1)] -void main() { - fn(SharedArr, 6.0); + __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl index 1c2520fe6c..c726d119eb 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl @@ -12,13 +12,9 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); -} - [shader("compute")] [numthreads(1,1,1)] void main() { - fn(SharedArr); + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl index 934963f5dc..d3468a2a02 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl @@ -2,14 +2,10 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64], float F) { +[numthreads(4,1,1)] +void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixStoreToMemory potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); -} - -[numthreads(4,1,1)] -void main() { - fn(SharedArr, 6.0); + __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 0, 0, 0); } From 6ff022c0aa01b25464d11cc8139e96900135dcb2 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 11 Mar 2026 19:34:20 -0600 Subject: [PATCH 3/7] Rework based on feedback --- include/dxc/DXIL/DxilOperations.h | 1 - lib/DXIL/DxilOperations.cpp | 123 +++++++++--------- lib/HLSL/HLOperationLower.cpp | 17 ++- .../matrixaccumulatetomemory/nominal.hlsl | 2 +- .../matrixloadfrommemory/nominal.hlsl | 2 +- .../builtins/matrixstoretomemory/nominal.hlsl | 2 +- utils/hct/hctdb.py | 22 ++-- utils/hct/hctdb_instrhelp.py | 39 ++++-- 8 files changed, 117 insertions(+), 91 deletions(-) diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index 85df375b3a..bab4bffc6e 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -212,7 +212,6 @@ class OP { TS_UDT = 8, // Ex: %"struct.MyStruct" * TS_Object = 9, // Ex: %"class.StructuredBuffer" TS_Vector = 10, // Ex: <8 x i16> - TS_Array = 11, // Ex: [8 x float] TS_MaskBitCount, // Types used in Mask end here // TS_Extended is only used to identify the unnamed struct type used to wrap // multiple overloads when using GetTypeSlot. diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 02dcfe65a0..ffff4eccd9 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2863,8 +2863,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixLoadFromMemory", Attribute::None, 2, - {{0x200}, {0x800}}, - {{0x0}, {0x0}}}, // Overloads: o,a + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::LinAlgMatrixLength, "LinAlgMatrixLength", OCC::LinAlgMatrixLength, @@ -2911,8 +2911,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixStoreToMemory", Attribute::None, 2, - {{0x200}, {0x800}}, - {{0x0}, {0x0}}}, // Overloads: o,a + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::LinAlgMatrixQueryAccumulatorLayout, "LinAlgMatrixQueryAccumulatorLayout", OCC::LinAlgMatrixQueryAccumulatorLayout, @@ -2967,8 +2967,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixAccumulateToMemory", Attribute::None, 2, - {{0x200}, {0x800}}, - {{0x0}, {0x0}}}, // Overloads: o,a + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::LinAlgMatrixOuterProduct, "LinAlgMatrixOuterProduct", OCC::LinAlgMatrixOuterProduct, @@ -3152,8 +3152,6 @@ unsigned OP::GetTypeSlot(Type *pType) { return TS_Extended; case Type::VectorTyID: return TS_Vector; - case Type::ArrayTyID: - return TS_Array; default: break; } @@ -3194,12 +3192,6 @@ StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) .toStringRef(Storage); } - case TS_Array: { - if (Ty->isPointerTy()) - Ty = Ty->getPointerElementType(); - ArrayType *ArrTy = cast(Ty); - return GetOverloadTypeName(OP::GetTypeSlot(ArrTy->getArrayElementType())); - } case TS_Extended: { DXASSERT(isa(Ty), "otherwise, extended overload type not wrapped in struct type."); @@ -4332,9 +4324,10 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { #define VEC2(_y) A(VectorType::get(_y, 2)) #define VEC4(_y) A(GetStructVectorType(4, _y)) #define VEC9(_y) A(VectorType::get(_y, 9)) +#define TGSM(_y) A(PointerType::get(_y, DXIL::kTGSMAddrSpace)) // Extended Overload types are wrapped in an anonymous struct -#define EXT(_y) A(cast(pOverloadType)->getElementType(_y)) +#define EXT(_y) cast(pOverloadType)->getElementType(_y) /* hctdb_instrhelp.get_oloads_funcs()*/ switch (opCode) { // return opCode @@ -6445,9 +6438,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { // Linear Algebra Operations case OpCode::MatVecMul: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI1); A(pI32); A(pRes); @@ -6461,9 +6454,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI1); break; case OpCode::MatVecMulAdd: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI1); A(pI32); A(pRes); @@ -6482,8 +6475,8 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { case OpCode::OuterProductAccumulate: A(pV); A(pI32); - EXT(0); - EXT(1); + A(EXT(0)); + A(EXT(1)); A(pRes); A(pI32); A(pI32); @@ -6586,21 +6579,21 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { // Linear Algebra Operations case OpCode::LinAlgMatrixMultiplyAccumulate: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); - EXT(3); + A(EXT(1)); + A(EXT(2)); + A(EXT(3)); break; case OpCode::LinAlgFillMatrix: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); break; case OpCode::LinAlgCopyConvertMatrix: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI1); break; case OpCode::LinAlgMatrixLoadFromDescriptor: @@ -6612,9 +6605,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); break; case OpCode::LinAlgMatrixLoadFromMemory: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + TGSM(EXT(1)); A(pI32); A(pI32); A(pI32); @@ -6631,17 +6624,17 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); break; case OpCode::LinAlgMatrixGetElement: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI32); break; case OpCode::LinAlgMatrixSetElement: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI32); - EXT(2); + A(EXT(2)); break; case OpCode::LinAlgMatrixStoreToDescriptor: A(pV); @@ -6655,8 +6648,8 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { case OpCode::LinAlgMatrixStoreToMemory: A(pV); A(pI32); - EXT(0); - EXT(1); + A(EXT(0)); + TGSM(EXT(1)); A(pI32); A(pI32); A(pI32); @@ -6666,31 +6659,31 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); break; case OpCode::LinAlgMatrixMultiply: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); break; case OpCode::LinAlgMatrixAccumulate: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); break; case OpCode::LinAlgMatVecMul: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); A(pI32); break; case OpCode::LinAlgMatVecMulAdd: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); A(pI32); - EXT(3); + A(EXT(3)); A(pI32); break; case OpCode::LinAlgMatrixAccumulateToDescriptor: @@ -6705,17 +6698,17 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { case OpCode::LinAlgMatrixAccumulateToMemory: A(pV); A(pI32); - EXT(0); - EXT(1); + A(EXT(0)); + TGSM(EXT(1)); A(pI32); A(pI32); A(pI32); break; case OpCode::LinAlgMatrixOuterProduct: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); break; // @@ -7082,7 +7075,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::MatVecMulAdd: case OpCode::LinAlgFillMatrix: case OpCode::LinAlgCopyConvertMatrix: - case OpCode::LinAlgMatrixLoadFromMemory: case OpCode::LinAlgMatrixGetElement: if (FT->getNumParams() < 2) return nullptr; @@ -7090,8 +7082,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { {FT->getReturnType(), FT->getParamType(1)}); case OpCode::OuterProductAccumulate: - case OpCode::LinAlgMatrixStoreToMemory: - case OpCode::LinAlgMatrixAccumulateToMemory: if (FT->getNumParams() < 3) return nullptr; return llvm::StructType::get(Ctx, @@ -7104,12 +7094,27 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { {FT->getReturnType(), FT->getParamType(1), FT->getParamType(2), FT->getParamType(3)}); + case OpCode::LinAlgMatrixLoadFromMemory: + if (FT->getNumParams() < 2) + return nullptr; + return llvm::StructType::get( + Ctx, + {FT->getReturnType(), FT->getParamType(1)->getPointerElementType()}); + case OpCode::LinAlgMatrixSetElement: if (FT->getNumParams() < 4) return nullptr; return llvm::StructType::get( Ctx, {FT->getReturnType(), FT->getParamType(1), FT->getParamType(3)}); + case OpCode::LinAlgMatrixStoreToMemory: + case OpCode::LinAlgMatrixAccumulateToMemory: + if (FT->getNumParams() < 3) + return nullptr; + return llvm::StructType::get( + Ctx, + {FT->getParamType(1), FT->getParamType(2)->getPointerElementType()}); + case OpCode::LinAlgMatrixMultiply: case OpCode::LinAlgMatrixAccumulate: case OpCode::LinAlgMatVecMul: diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 9ea6166f36..6d718257d4 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7242,11 +7242,15 @@ Value *TranslateLinAlgMatrixLoadFromMemory( Value *Stride = CI->getArgOperand(4); Value *Layout = CI->getArgOperand(5); + Value *Zero = Builder.getInt32(0); + Value *ArrPtr = Builder.CreateGEP(Arr, {Zero, Zero}); + Type *ArrEltTy = ArrPtr->getType()->getPointerElementType(); + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); - Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, Arr->getType()}); + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, ArrEltTy}); Value *Matrix = - Builder.CreateCall(DxilFunc, {OpArg, Arr, Offset, Stride, Layout}); + Builder.CreateCall(DxilFunc, {OpArg, ArrPtr, Offset, Stride, Layout}); Builder.CreateStore(Matrix, MatrixPtr); return nullptr; @@ -7265,12 +7269,15 @@ Value *TranslateLinAlgMatrixAccumStoreToMemory( Value *Stride = CI->getArgOperand(4); Value *Layout = CI->getArgOperand(5); + Value *Zero = Builder.getInt32(0); + Value *ArrPtr = Builder.CreateGEP(Arr, {Zero, Zero}); + Type *ArrEltTy = ArrPtr->getType()->getPointerElementType(); + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); - Function *DxilFunc = - HlslOp->GetOpFunc(OpCode, {Matrix->getType(), Arr->getType()}); + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {Matrix->getType(), ArrEltTy}); return Builder.CreateCall(DxilFunc, - {OpArg, Matrix, Arr, Offset, Stride, Layout}); + {OpArg, Matrix, ArrPtr, Offset, Stride, Layout}); } } // namespace diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl index cfdac39028..f05366d62f 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -8,7 +8,7 @@ groupshared float SharedArr[64]; void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl index a3e383ca58..9c1e8303b2 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -8,7 +8,7 @@ groupshared float SharedArr[64]; void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl index 4b5b50c357..07a4fa38e5 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -8,7 +8,7 @@ groupshared float SharedArr[64]; void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 5dbb59102f..b72006efa9 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -55,7 +55,7 @@ # - "a" is for any array ([n x Ty]) # dxil_all_user_oload_chars must be kept in sync with the indices in # hlsl::OP::TypeSlot in DxilOperations.h. -dxil_all_user_oload_chars = "hfd18wiluo $x7 + ty = i.llvm_type.replace("_gs", "") + if ty.startswith("$x"): + if ty != "$x" + str(next_oload_idx): raise ValueError( "Extended overloads are not sequentially referenced in " f"DXIL op {self.name}: {i.llvm_type} != $x{next_oload_idx}" @@ -6407,12 +6411,12 @@ def populate_ExperimentalOps(self): "LinAlgMatrixLoadFromMemory", "LinAlgMatrixLoadFromMemory", "fills a matrix with data from a groupshared array", - "o,a", + "o,hfwi", "", [ db_dxil_param(0, "$x0", "", "resulting matrix"), db_dxil_param( - 2, "$x1", "memory", "groupshared array to fill matrix with" + 2, "$x_gs1", "memory", "groupshared array to fill matrix with" ), db_dxil_param(3, "i32", "offset", "starting offset in the array"), db_dxil_param( @@ -6508,13 +6512,13 @@ def populate_ExperimentalOps(self): "LinAlgMatrixStoreToMemory", "LinAlgMatrixStoreToMemory", "stores a matrix to groupshared memory", - "o,a", + "o,hfwi", "", [ db_dxil_param(0, "v", "", ""), db_dxil_param(2, "$x0", "matrix", "matrix to be stored"), db_dxil_param( - 3, "$x1", "memory", "groupshared array to store into" + 3, "$x_gs1", "memory", "groupshared array to store into" ), db_dxil_param(4, "i32", "offset", "starting offset in the array"), db_dxil_param( @@ -6625,13 +6629,13 @@ def populate_ExperimentalOps(self): "LinAlgMatrixAccumulateToMemory", "LinAlgMatrixAccumulateToMemory", "accumulates a matrix to groupshared memory", - "o,a", + "o,hfwi", "", [ db_dxil_param(0, "v", "", ""), db_dxil_param(2, "$x0", "matrix", "Accumulator matrix"), db_dxil_param( - 3, "$x1", "memory", "groupshared array to accumulate into" + 3, "$x_gs1", "memory", "groupshared array to accumulate into" ), db_dxil_param(4, "i32", "offset", "starting offset in the array"), db_dxil_param( diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 5e09578af7..91636c309e 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -644,10 +644,15 @@ def print_opfunc_table(self): "noderecordproperty": "A(nodeRecordProperty);", "hit_object": "A(pHit);", # Extended overload slots, extend as needed: - "$x0": "EXT(0);", - "$x1": "EXT(1);", - "$x2": "EXT(2);", - "$x3": "EXT(3);", + "$x0": "A(EXT(0));", + "$x1": "A(EXT(1));", + "$x2": "A(EXT(2));", + "$x3": "A(EXT(3));", + # Groupshared pointers to extended overloads: + "$x_gs0": "TGSM(EXT(0));", + "$x_gs1": "TGSM(EXT(1));", + "$x_gs2": "TGSM(EXT(2));", + "$x_gs3": "TGSM(EXT(3));", } last_category = None for i in self.db.get_dxil_ops(): @@ -679,6 +684,7 @@ def print_opfunc_oload_type(self): vec_ty = "$vec" gsptr_ty = "$gsptr" extended_ty = "$x" + extended_gs_ty = "$x_gs" last_category = None index_dict = collections.OrderedDict() @@ -846,7 +852,7 @@ def print_opfunc_oload_type(self): # indices the key, and add the opcode to a list of opcodes for that # key. Indices start with 0 for return type, and 1 for the first # function parameter, which is the DXIL OpCode. - indices = [] + indices = [] # (op.pos, unwrap_pointer) pairs for index, op in enumerate(instr.ops): # Skip dxil opcode. if op.pos == 1: @@ -854,8 +860,10 @@ def print_opfunc_oload_type(self): op_type = op.llvm_type if op_type.startswith(extended_ty): + gs_ptr = op_type.startswith(extended_gs_ty) + prefix_len = len(extended_gs_ty) if gs_ptr else len(extended_ty) try: - extended_index = int(op_type[2:]) + extended_index = int(op_type[prefix_len:]) except: raise ValueError( "Error parsing extended operand type " @@ -866,7 +874,7 @@ def print_opfunc_oload_type(self): f"'$x{extended_index}' is not in sequential " + f"order for DXIL op '{instr.name}'" ) - indices.append(op.pos) + indices.append((op.pos, gs_ptr)) if len(indices) != instr.num_oloads: raise ValueError( @@ -875,23 +883,26 @@ def print_opfunc_oload_type(self): ) extended_dict.setdefault(tuple(indices), []).append(instr.name) - def get_type_at_index(index): - if index == 0: - return "FT->getReturnType()" - return f"FT->getParamType({index - 1})" + def get_type_at_index(index, unwrap_pointer): + result = "FT->getReturnType()" + if index > 0: + result = f"FT->getParamType({index - 1})" + if unwrap_pointer: + result = result + "->getPointerElementType()" + return result for index_tuple, opcodes in extended_dict.items(): line = "" for opcode in opcodes: line = line + f"case OpCode::{opcode}:\n" - if index_tuple[-1] > 0: + if index_tuple[-1][0] > 0: line += ( - f" if (FT->getNumParams() < {index_tuple[-1]})\n" + f" if (FT->getNumParams() < {index_tuple[-1][0]})\n" + " return nullptr;\n" ) line += ( " return llvm::StructType::get(Ctx, {" - + ", ".join([get_type_at_index(index) for index in index_tuple]) + + ", ".join([get_type_at_index(index, unwrap_pointer) for index, unwrap_pointer in index_tuple]) + "});\n" ) print(line) From 36b93d5f2d3cf7f148e0d96f876833390bf89609 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 11 Mar 2026 19:46:59 -0600 Subject: [PATCH 4/7] Cleanup comments --- utils/hct/hctdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index b72006efa9..6e9d2920e1 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -51,8 +51,8 @@ # processing. # - "," is used to separate multiple overload dimensions. # - When used, only $x0, $x1, etc. are supported for overloaded parameter -# types. -# - "a" is for any array ([n x Ty]) +# types. $x_gs0, $x_gs1, etc work like $xN except the overload will be a +# pointer to groupshared memory. # dxil_all_user_oload_chars must be kept in sync with the indices in # hlsl::OP::TypeSlot in DxilOperations.h. dxil_all_user_oload_chars = "hfd18wiluo<" From 325ff0ecc5ee8e9e42c036105b6f2543986656e8 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Fri, 13 Mar 2026 14:50:34 -0700 Subject: [PATCH 5/7] [SM6.10] Combine LinAlg Matrix AST tests into one file --- .../builtins/copyconvertmatrix/ast.hlsl | 17 -- .../hlsl/linalg/builtins/fillmatrix/ast.hlsl | 15 -- .../hlsl/linalg/builtins/matrix-ops-ast.hlsl | 193 ++++++++++++++++++ .../linalg/builtins/matrixaccumulate/ast.hlsl | 19 -- .../matrixaccumulatetodescriptor/ast.hlsl | 20 -- .../matrixaccumulatetomemory/ast.hlsl | 20 -- .../builtins/matrixgetcoordinate/ast.hlsl | 15 -- .../linalg/builtins/matrixgetelement/ast.hlsl | 28 --- .../linalg/builtins/matrixlength/ast.hlsl | 14 -- .../matrixloadfromdescriptor/ast.hlsl | 20 -- .../builtins/matrixloadfrommemory/ast.hlsl | 20 -- .../builtins/matrixmatrixmultiply/ast.hlsl | 18 -- .../matrixmatrixmultiplyaccumulate/ast.hlsl | 19 -- .../builtins/matrixouterproduct/ast.hlsl | 19 -- .../matrixqueryaccumulatorlayout/ast.hlsl | 12 -- .../linalg/builtins/matrixsetelement/ast.hlsl | 18 -- .../builtins/matrixstoretodescriptor/ast.hlsl | 20 -- .../builtins/matrixstoretomemory/ast.hlsl | 20 -- .../builtins/matrixvectormultiply/ast.hlsl | 21 -- .../builtins/matrixvectormultiplyadd/ast.hlsl | 24 --- 20 files changed, 193 insertions(+), 359 deletions(-) delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/copyconvertmatrix/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/fillmatrix/ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-ops-ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulate/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixgetcoordinate/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixgetelement/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixlength/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfromdescriptor/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixmatrixmultiply/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixouterproduct/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixsetelement/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretodescriptor/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixvectormultiply/ast.hlsl delete mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixvectormultiplyadd/ast.hlsl diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/copyconvertmatrix/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/copyconvertmatrix/ast.hlsl deleted file mode 100644 index 9ec01a4520..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/copyconvertmatrix/ast.hlsl +++ /dev/null @@ -1,17 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_CopyConvertMatrix 'void (__builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, bool)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} source '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} transpose 'bool' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 405 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat1; - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat2; - __builtin_LinAlg_CopyConvertMatrix(mat2, mat1, true); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/fillmatrix/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/fillmatrix/ast.hlsl deleted file mode 100644 index dd8f8afec8..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/fillmatrix/ast.hlsl +++ /dev/null @@ -1,15 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_FillMatrix 'void (__builtin_LinAlgMatrix & {{.*}}, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix &&__restrict {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} value 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 406 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - __builtin_LinAlg_FillMatrix(mat, 15); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-ops-ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-ops-ast.hlsl new file mode 100644 index 0000000000..a6510d9dd6 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-ops-ast.hlsl @@ -0,0 +1,193 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s + +RWByteAddressBuffer buf; +groupshared float SharedArr[64]; + +[shader("compute")] +[numthreads(1,1,1)] +void main() { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat1; + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat2; + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat3; + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_CopyConvertMatrix 'void (__builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, bool)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} source '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} transpose 'bool' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 405 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_CopyConvertMatrix(mat2, mat1, true); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_FillMatrix 'void (__builtin_LinAlgMatrix & {{.*}}, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix &&__restrict {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} value 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 406 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_FillMatrix(mat1, 15); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulate 'void (__builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}})' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrixC '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} matrixLHS '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} matrixRHS '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 415 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_MatrixAccumulate(mat1, mat2, mat3); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToDescriptor 'void (__builtin_LinAlgMatrix {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 419 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_MatrixAccumulateToDescriptor(mat1, buf, 1, 2, 3); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 420 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_MatrixAccumulateToMemory(mat1, SharedArr, 0, 0, 0); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixGetCoordinate 'vector (__builtin_LinAlgMatrix {{.*}}, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 407 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + uint2 coord = __builtin_LinAlg_MatrixGetCoordinate(mat1, 0); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixGetElement 'void (unsigned int &, __builtin_LinAlgMatrix {{.*}}, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret 'unsigned int &&__restrict' +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 408 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + uint elem1; + __builtin_LinAlg_MatrixGetElement(elem1, mat1, 3); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixGetElement 'void (float &, __builtin_LinAlgMatrix {{.*}}, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret 'float &&__restrict' +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 408 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + float elem2; + __builtin_LinAlg_MatrixGetElement(elem2, mat1, 4); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLength 'unsigned int (__builtin_LinAlgMatrix {{.*}})' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 409 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_MatrixLength(mat1); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromDescriptor 'void (__builtin_LinAlgMatrix & {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix &&__restrict {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 410 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_MatrixLoadFromDescriptor(mat1, buf, 0, 0, 0); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 411 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_MatrixLoadFromMemory(mat1, SharedArr, 0, 0, 0); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixMatrixMultiply 'void (__builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}})' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrixC '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} matrixA '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} matrixB '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 416 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_MatrixMatrixMultiply(mat1, mat2, mat3); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixMatrixMultiplyAccumulate 'void (__builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}})' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrixR '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} matrixA '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} matrixB '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} matrixC '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 417 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(mat1, mat2, mat3, mat1); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixOuterProduct 'void (__builtin_LinAlgMatrix {{.*}}, vector, vector)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} vecA 'vector':'vector' +// CHECK-NEXT: ParmVarDecl {{.*}} vecB 'vector':'vector' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 421 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + int4 vecA = {1,2,3,4}; + int4 vecB = {1,2,3,4}; + __builtin_LinAlg_MatrixOuterProduct(mat1, vecA, vecB); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixQueryAccumulatorLayout 'unsigned int ()' extern +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 418 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + uint layout = __builtin_LinAlg_MatrixQueryAccumulatorLayout(); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixSetElement 'void (__builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} value 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 412 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_MatrixSetElement(mat2, mat1, 1, 1); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToDescriptor 'void (__builtin_LinAlgMatrix {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 413 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_MatrixStoreToDescriptor(mat1, buf, 1, 2, 3); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 414 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + + __builtin_LinAlg_MatrixStoreToMemory(mat1, SharedArr, 0, 0, 0); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixVectorMultiply 'void (vector &, __builtin_LinAlgMatrix {{.*}}, vector, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret 'vector &&__restrict' +// CHECK-NEXT: ParmVarDecl {{.*}} mat '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} input 'vector':'vector' +// CHECK-NEXT: ParmVarDecl {{.*}} input_interp 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 422 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + float4 vec = {1,2,3,4}; + float4 result; + __builtin_LinAlg_MatrixVectorMultiply(result, mat1, vec, 1); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixVectorMultiplyAdd 'void (vector &, __builtin_LinAlgMatrix {{.*}}, vector, unsigned int, vector, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret 'vector &&__restrict' +// CHECK-NEXT: ParmVarDecl {{.*}} mat '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} input 'vector':'vector' +// CHECK-NEXT: ParmVarDecl {{.*}} input_interp 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} bias 'vector':'vector' +// CHECK-NEXT: ParmVarDecl {{.*}} bias_interp 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 423 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + float4 input = {1,2,3,4}; + float4 bias = {5,6,7,8}; + __builtin_LinAlg_MatrixVectorMultiplyAdd(result, mat1, input, 1, bias, 2); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulate/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulate/ast.hlsl deleted file mode 100644 index a1db99833d..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulate/ast.hlsl +++ /dev/null @@ -1,19 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulate 'void (__builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}})' extern -// CHECK-NEXT: ParmVarDecl {{.*}} matrixC '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} matrixLHS '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} matrixRHS '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 415 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat2; - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat3; - __builtin_LinAlg_MatrixAccumulate(mat, mat2, mat3); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/ast.hlsl deleted file mode 100644 index 026bc5e706..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/ast.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToDescriptor 'void (__builtin_LinAlgMatrix {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' -// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 419 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -RWByteAddressBuffer outbuf; - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - __builtin_LinAlg_MatrixAccumulateToDescriptor(mat, outbuf, 1, 2, 3); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl deleted file mode 100644 index d300796b67..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' -// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 420 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -groupshared float SharedArr[64]; - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 0, 0, 0); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixgetcoordinate/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixgetcoordinate/ast.hlsl deleted file mode 100644 index a783e0fa37..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixgetcoordinate/ast.hlsl +++ /dev/null @@ -1,15 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixGetCoordinate 'vector (__builtin_LinAlgMatrix {{.*}}, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 407 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - uint2 coord = __builtin_LinAlg_MatrixGetCoordinate(mat, 0); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixgetelement/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixgetelement/ast.hlsl deleted file mode 100644 index b8eef42699..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixgetelement/ast.hlsl +++ /dev/null @@ -1,28 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixGetElement 'void (unsigned int &, __builtin_LinAlgMatrix {{.*}}, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} ret 'unsigned int &&__restrict' -// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 408 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixGetElement 'void (float &, __builtin_LinAlgMatrix {{.*}}, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} ret 'float &&__restrict' -// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 408 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - - uint elem1; - __builtin_LinAlg_MatrixGetElement(elem1, mat, 3); - - float elem2; - __builtin_LinAlg_MatrixGetElement(elem2, mat, 4); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixlength/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixlength/ast.hlsl deleted file mode 100644 index 41f391c4b2..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixlength/ast.hlsl +++ /dev/null @@ -1,14 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLength 'unsigned int (__builtin_LinAlgMatrix {{.*}})' extern -// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 409 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - __builtin_LinAlg_MatrixLength(mat); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfromdescriptor/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfromdescriptor/ast.hlsl deleted file mode 100644 index 6dd957fbe3..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfromdescriptor/ast.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromDescriptor 'void (__builtin_LinAlgMatrix & {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix &&__restrict {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' -// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 410 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -RWByteAddressBuffer inbuf; - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - __builtin_LinAlg_MatrixLoadFromDescriptor(mat, inbuf, 0, 0, 0); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl deleted file mode 100644 index 3ac0de3880..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' -// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 411 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -groupshared float SharedArr[64]; - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 0, 0, 0); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixmatrixmultiply/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixmatrixmultiply/ast.hlsl deleted file mode 100644 index ddb9d14c5d..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixmatrixmultiply/ast.hlsl +++ /dev/null @@ -1,18 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixMatrixMultiply 'void (__builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}})' extern -// CHECK-NEXT: ParmVarDecl {{.*}} matrixC '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} matrixA '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} matrixB '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 416 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat1; - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat2; - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat3; - __builtin_LinAlg_MatrixMatrixMultiply(mat1, mat2, mat3); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/ast.hlsl deleted file mode 100644 index 59b2c3fe9b..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/ast.hlsl +++ /dev/null @@ -1,19 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixMatrixMultiplyAccumulate 'void (__builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}})' extern -// CHECK-NEXT: ParmVarDecl {{.*}} matrixR '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} matrixA '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} matrixB '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} matrixC '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 417 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat1; - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat2; - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat3; - __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(mat1, mat2, mat3, mat1); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixouterproduct/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixouterproduct/ast.hlsl deleted file mode 100644 index f4eea9609f..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixouterproduct/ast.hlsl +++ /dev/null @@ -1,19 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixOuterProduct 'void (__builtin_LinAlgMatrix {{.*}}, vector, vector)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} vecA 'vector':'vector' -// CHECK-NEXT: ParmVarDecl {{.*}} vecB 'vector':'vector' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 421 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - int4 vecA = {1,2,3,4}; - int4 vecB = {1,2,3,4}; - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - __builtin_LinAlg_MatrixOuterProduct(mat, vecA, vecB); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/ast.hlsl deleted file mode 100644 index 7eaefdec49..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/ast.hlsl +++ /dev/null @@ -1,12 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixQueryAccumulatorLayout 'unsigned int ()' extern -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 418 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - uint layout = __builtin_LinAlg_MatrixQueryAccumulatorLayout(); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixsetelement/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixsetelement/ast.hlsl deleted file mode 100644 index 918fb25dc2..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixsetelement/ast.hlsl +++ /dev/null @@ -1,18 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixSetElement 'void (__builtin_LinAlgMatrix {{.*}}, __builtin_LinAlgMatrix {{.*}}, unsigned int, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} value 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 412 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat1; - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat2; - __builtin_LinAlg_MatrixSetElement(mat2, mat1, 1, 1); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretodescriptor/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretodescriptor/ast.hlsl deleted file mode 100644 index 9757142dc0..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretodescriptor/ast.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToDescriptor 'void (__builtin_LinAlgMatrix {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' -// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 413 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -RWByteAddressBuffer outbuf; - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - __builtin_LinAlg_MatrixStoreToDescriptor(mat, outbuf, 1, 2, 3); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl deleted file mode 100644 index c726d119eb..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' -// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 414 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -groupshared float SharedArr[64]; - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 0, 0, 0); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixvectormultiply/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixvectormultiply/ast.hlsl deleted file mode 100644 index e38c0dc151..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixvectormultiply/ast.hlsl +++ /dev/null @@ -1,21 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixVectorMultiply 'void (vector &, __builtin_LinAlgMatrix {{.*}}, vector, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} ret 'vector &&__restrict' -// CHECK-NEXT: ParmVarDecl {{.*}} mat '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} input 'vector':'vector' -// CHECK-NEXT: ParmVarDecl {{.*}} input_interp 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 422 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - __builtin_LinAlg_FillMatrix(mat, 15); - - float4 vec = {1,2,3,4}; - float4 result; - __builtin_LinAlg_MatrixVectorMultiply(result, mat, vec, 1); -} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixvectormultiplyadd/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixvectormultiplyadd/ast.hlsl deleted file mode 100644 index b45b426ced..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixvectormultiplyadd/ast.hlsl +++ /dev/null @@ -1,24 +0,0 @@ -// REQUIRES: dxil-1-10 -// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixVectorMultiplyAdd 'void (vector &, __builtin_LinAlgMatrix {{.*}}, vector, unsigned int, vector, unsigned int)' extern -// CHECK-NEXT: ParmVarDecl {{.*}} ret 'vector &&__restrict' -// CHECK-NEXT: ParmVarDecl {{.*}} mat '__builtin_LinAlgMatrix {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} input 'vector':'vector' -// CHECK-NEXT: ParmVarDecl {{.*}} input_interp 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} bias 'vector':'vector' -// CHECK-NEXT: ParmVarDecl {{.*}} bias_interp 'unsigned int' -// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 423 -// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - -[shader("compute")] -[numthreads(1,1,1)] -void main() { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 5, 4, 2, 2)]] mat; - __builtin_LinAlg_FillMatrix(mat, 15); - - float4 input = {1,2,3,4}; - float4 bias = {5,6,7,8}; - float4 result; - __builtin_LinAlg_MatrixVectorMultiplyAdd(result, mat, input, 1, bias, 2); -} From 172a794eb14d339dd31d78d2090ed05afaebfb42 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Fri, 13 Mar 2026 14:58:01 -0700 Subject: [PATCH 6/7] Rename the combined file to matrix-builtins-ast.hlsl --- .../builtins/{matrix-ops-ast.hlsl => matrix-builtins-ast.hlsl} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tools/clang/test/SemaHLSL/hlsl/linalg/builtins/{matrix-ops-ast.hlsl => matrix-builtins-ast.hlsl} (100%) diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-ops-ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl similarity index 100% rename from tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-ops-ast.hlsl rename to tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl From ec75f1644b518067004a47bc9494fde4cab80f65 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Tue, 17 Mar 2026 18:25:09 -0700 Subject: [PATCH 7/7] Update variable name --- .../hlsl/linalg/builtins/matrix-builtins-ast.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl index a6510d9dd6..72af30e90d 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl @@ -1,7 +1,7 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s -RWByteAddressBuffer buf; +RWByteAddressBuffer Buf; groupshared float SharedArr[64]; [shader("compute")] @@ -42,7 +42,7 @@ void main() { // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' // CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 419 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - __builtin_LinAlg_MatrixAccumulateToDescriptor(mat1, buf, 1, 2, 3); + __builtin_LinAlg_MatrixAccumulateToDescriptor(mat1, Buf, 1, 2, 3); // CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' @@ -93,7 +93,7 @@ void main() { // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' // CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 410 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - __builtin_LinAlg_MatrixLoadFromDescriptor(mat1, buf, 0, 0, 0); + __builtin_LinAlg_MatrixLoadFromDescriptor(mat1, Buf, 0, 0, 0); // CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' @@ -154,7 +154,7 @@ void main() { // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' // CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 413 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - __builtin_LinAlg_MatrixStoreToDescriptor(mat1, buf, 1, 2, 3); + __builtin_LinAlg_MatrixStoreToDescriptor(mat1, Buf, 1, 2, 3); // CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}'