From e520d0a5d052788c42ed9a5b8567dd9e52fb779a Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 19 Feb 2026 15:37:50 -0700 Subject: [PATCH 1/7] [SM6.10] Implement groupshared Builtins Implements the Load/Store/Accumulate to memory groupshared builtins following the pattern of the previous builtins --- include/dxc/DXIL/DxilInstructions.h | 18 +++--- include/dxc/DXIL/DxilOperations.h | 1 + lib/DXIL/DxilOperations.cpp | 46 ++++++++++----- lib/HLSL/HLOperationLower.cpp | 56 ++++++++++++++++++- .../matrixaccumulatetomemory/nominal.hlsl | 19 +++++++ .../matrixloadfrommemory/nominal.hlsl | 19 +++++++ .../builtins/matrixstoretomemory/nominal.hlsl | 19 +++++++ .../matrixaccumulatetomemory/ast.hlsl | 24 ++++++++ .../unavailable_pre_sm610.hlsl | 15 +++++ .../builtins/matrixloadfrommemory/ast.hlsl | 24 ++++++++ .../unavailable_pre_sm610.hlsl | 15 +++++ .../builtins/matrixstoretomemory/ast.hlsl | 24 ++++++++ .../unavailable_pre_sm610.hlsl | 15 +++++ .../hlsl/linalg/builtins/stage-errors.hlsl | 16 ++++++ utils/hct/gen_intrin_main.txt | 6 +- utils/hct/hctdb.py | 20 +++---- 16 files changed, 297 insertions(+), 40 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 8c48202ce0..941eab6474 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10651,14 +10651,14 @@ struct DxilInst_LinAlgMatrixLoadFromMemory { bool requiresUniformInputs() const { return false; } // Operand indexes enum OperandIdx { - arg_groupsharedArr = 1, + arg_memory = 1, arg_offset = 2, arg_stride = 3, arg_layout = 4, }; // Accessors - llvm::Value *get_groupsharedArr() const { return Instr->getOperand(1); } - void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_memory() const { return Instr->getOperand(1); } + void set_memory(llvm::Value *val) { Instr->setOperand(1, val); } llvm::Value *get_offset() const { return Instr->getOperand(2); } void set_offset(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_stride() const { return Instr->getOperand(3); } @@ -10854,7 +10854,7 @@ struct DxilInst_LinAlgMatrixStoreToMemory { // Operand indexes enum OperandIdx { arg_matrix = 1, - arg_groupsharedArr = 2, + arg_memory = 2, arg_offset = 3, arg_stride = 4, arg_layout = 5, @@ -10862,8 +10862,8 @@ struct DxilInst_LinAlgMatrixStoreToMemory { // Accessors llvm::Value *get_matrix() const { return Instr->getOperand(1); } void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_groupsharedArr() const { return Instr->getOperand(2); } - void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_memory() const { return Instr->getOperand(2); } + void set_memory(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_offset() const { return Instr->getOperand(3); } void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } llvm::Value *get_stride() const { return Instr->getOperand(4); } @@ -11091,7 +11091,7 @@ struct DxilInst_LinAlgMatrixAccumulateToMemory { // Operand indexes enum OperandIdx { arg_matrix = 1, - arg_groupsharedArr = 2, + arg_memory = 2, arg_offset = 3, arg_stride = 4, arg_layout = 5, @@ -11099,8 +11099,8 @@ struct DxilInst_LinAlgMatrixAccumulateToMemory { // Accessors llvm::Value *get_matrix() const { return Instr->getOperand(1); } void set_matrix(llvm::Value *val) { Instr->setOperand(1, val); } - llvm::Value *get_groupsharedArr() const { return Instr->getOperand(2); } - void set_groupsharedArr(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_memory() const { return Instr->getOperand(2); } + void set_memory(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_offset() const { return Instr->getOperand(3); } void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } llvm::Value *get_stride() const { return Instr->getOperand(4); } diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index bab4bffc6e..85df375b3a 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -212,6 +212,7 @@ class OP { TS_UDT = 8, // Ex: %"struct.MyStruct" * TS_Object = 9, // Ex: %"class.StructuredBuffer" TS_Vector = 10, // Ex: <8 x i16> + TS_Array = 11, // Ex: [8 x float] TS_MaskBitCount, // Types used in Mask end here // TS_Extended is only used to identify the unnamed struct type used to wrap // multiple overloads when using GetTypeSlot. diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 4138b3d930..02dcfe65a0 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2863,8 +2863,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixLoadFromMemory", Attribute::None, 2, - {{0x200}, {0x63}}, - {{0x0}, {0x0}}}, // Overloads: o,hfwi + {{0x200}, {0x800}}, + {{0x0}, {0x0}}}, // Overloads: o,a {OC::LinAlgMatrixLength, "LinAlgMatrixLength", OCC::LinAlgMatrixLength, @@ -2911,8 +2911,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixStoreToMemory", Attribute::None, 2, - {{0x200}, {0x63}}, - {{0x0}, {0x0}}}, // Overloads: o,hfwi + {{0x200}, {0x800}}, + {{0x0}, {0x0}}}, // Overloads: o,a {OC::LinAlgMatrixQueryAccumulatorLayout, "LinAlgMatrixQueryAccumulatorLayout", OCC::LinAlgMatrixQueryAccumulatorLayout, @@ -2967,8 +2967,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixAccumulateToMemory", Attribute::None, 2, - {{0x200}, {0x63}}, - {{0x0}, {0x0}}}, // Overloads: o,hfwi + {{0x200}, {0x800}}, + {{0x0}, {0x0}}}, // Overloads: o,a {OC::LinAlgMatrixOuterProduct, "LinAlgMatrixOuterProduct", OCC::LinAlgMatrixOuterProduct, @@ -3152,6 +3152,8 @@ unsigned OP::GetTypeSlot(Type *pType) { return TS_Extended; case Type::VectorTyID: return TS_Vector; + case Type::ArrayTyID: + return TS_Array; default: break; } @@ -3166,26 +3168,39 @@ const char *OP::GetOverloadTypeName(unsigned TypeSlot) { StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { DXASSERT(!Ty->isVoidTy(), "must not pass void type here"); unsigned TypeSlot = OP::GetTypeSlot(Ty); + if (TypeSlot < TS_BasicCount) { return GetOverloadTypeName(TypeSlot); - } else if (TypeSlot == TS_UDT) { + } + + switch (TypeSlot) { + case TS_UDT: { if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); StructType *ST = cast(Ty); return ST->getStructName(); - } else if (TypeSlot == TS_Object) { + } + case TS_Object: { StructType *ST = cast(Ty); if (dxilutil::IsHLSLLinAlgMatrixType(Ty)) return (Twine("m") + Twine(dxilutil::GetHLSLLinAlgMatrixTypeMangling(ST))) .toStringRef(Storage); return ST->getStructName(); - } else if (TypeSlot == TS_Vector) { + } + case TS_Vector: { VectorType *VecTy = cast(Ty); return (Twine("v") + Twine(VecTy->getNumElements()) + Twine( GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) .toStringRef(Storage); - } else if (TypeSlot == TS_Extended) { + } + case TS_Array: { + if (Ty->isPointerTy()) + Ty = Ty->getPointerElementType(); + ArrayType *ArrTy = cast(Ty); + return GetOverloadTypeName(OP::GetTypeSlot(ArrTy->getArrayElementType())); + } + case TS_Extended: { DXASSERT(isa(Ty), "otherwise, extended overload type not wrapped in struct type."); StructType *ST = cast(Ty); @@ -3200,11 +3215,14 @@ StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { OS << GetTypeName(ST->getElementType(I), TempStr); } return OS.str(); - } else { - raw_svector_ostream OS(Storage); - Ty->print(OS); - return OS.str(); } + default: + break; + } + + raw_svector_ostream OS(Storage); + Ty->print(OS); + return OS.str(); } StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode, diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 4f22a4598d..9ea6166f36 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7226,6 +7226,53 @@ Value *TranslateLinAlgCopyConvertMatrix(CallInst *CI, IntrinsicOp IOP, return nullptr; } +Value *TranslateLinAlgMatrixLoadFromMemory( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *MatrixPtr = CI->getArgOperand(1); + DXASSERT_NOMSG(isa(MatrixPtr->getType())); + Type *MatrixType = MatrixPtr->getType()->getPointerElementType(); + + Value *Arr = CI->getArgOperand(2); + Value *Offset = CI->getArgOperand(3); + Value *Stride = CI->getArgOperand(4); + Value *Layout = CI->getArgOperand(5); + + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, Arr->getType()}); + + Value *Matrix = + Builder.CreateCall(DxilFunc, {OpArg, Arr, Offset, Stride, Layout}); + Builder.CreateStore(Matrix, MatrixPtr); + + return nullptr; +} + +Value *TranslateLinAlgMatrixAccumStoreToMemory( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *Matrix = CI->getArgOperand(1); + Value *Arr = CI->getArgOperand(2); + Value *Offset = CI->getArgOperand(3); + Value *Stride = CI->getArgOperand(4); + Value *Layout = CI->getArgOperand(5); + + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); + Function *DxilFunc = + HlslOp->GetOpFunc(OpCode, {Matrix->getType(), Arr->getType()}); + + return Builder.CreateCall(DxilFunc, + {OpArg, Matrix, Arr, Offset, Stride, Layout}); +} + } // namespace // Lower table. @@ -7989,14 +8036,16 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromDescriptor, TranslateLinAlgMatrixLoadFromDescriptor, DXIL::OpCode::LinAlgMatrixLoadFromDescriptor}, - {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromMemory, EmptyLower, + {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromMemory, + TranslateLinAlgMatrixLoadFromMemory, DXIL::OpCode::LinAlgMatrixLoadFromMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixSetElement, TranslateLinAlgMatrixSetElement, DXIL::OpCode::LinAlgMatrixSetElement}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToDescriptor, TranslateLinAlgMatrixAccumStoreToDescriptor, DXIL::OpCode::LinAlgMatrixStoreToDescriptor}, - {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToMemory, EmptyLower, + {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToMemory, + TranslateLinAlgMatrixAccumStoreToMemory, DXIL::OpCode::LinAlgMatrixStoreToMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulate, TranslateLinAlgMatrixAccumulate, DXIL::OpCode::LinAlgMatrixAccumulate}, @@ -8010,7 +8059,8 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToDescriptor, TranslateLinAlgMatrixAccumStoreToDescriptor, DXIL::OpCode::LinAlgMatrixAccumulateToDescriptor}, - {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToMemory, EmptyLower, + {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToMemory, + TranslateLinAlgMatrixAccumStoreToMemory, DXIL::OpCode::LinAlgMatrixAccumulateToMemory}, {IntrinsicOp::IOP___builtin_LinAlg_MatrixOuterProduct, TranslateLinAlgMatrixOuterProduct, DXIL::OpCode::LinAlgMatrixOuterProduct}, diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl new file mode 100644 index 0000000000..5461600016 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); +} + +// CHECK: @{{.*}} = external addrspace(3) global [64 x float] + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + fn(SharedArr); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl new file mode 100644 index 0000000000..a5dd722f1b --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); +} + +// CHECK: @{{.*}} = external addrspace(3) global [64 x float] + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + fn(SharedArr); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl new file mode 100644 index 0000000000..f6c38536a3 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); +} + +// CHECK: @{{.*}} = external addrspace(3) global [64 x float] + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl new file mode 100644 index 0000000000..e3694e1eb4 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl @@ -0,0 +1,24 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 420 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); +} + +[shader("compute")] +[numthreads(1,1,1)] +void main() { + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl new file mode 100644 index 0000000000..8048e22922 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -T cs_6_9 -HV 202x -E main %s -verify + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64], float F) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + + // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixAccumulateToMemory potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); +} + +[numthreads(4,1,1)] +void main() { + fn(SharedArr, 6.0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl new file mode 100644 index 0000000000..2874ba3c37 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl @@ -0,0 +1,24 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 411 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); +} + +[shader("compute")] +[numthreads(1,1,1)] +void main() { + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl new file mode 100644 index 0000000000..af3dd3b846 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -T cs_6_9 -HV 202x -E main %s -verify + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64], float F) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + + // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixLoadFromMemory potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); +} + +[numthreads(4,1,1)] +void main() { + fn(SharedArr, 6.0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl new file mode 100644 index 0000000000..1c2520fe6c --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl @@ -0,0 +1,24 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T lib_6_10 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} memory 'float const __attribute__((address_space(3))) (&)[64]' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 414 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64]) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); +} + +[shader("compute")] +[numthreads(1,1,1)] +void main() { + fn(SharedArr); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl new file mode 100644 index 0000000000..934963f5dc --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -T cs_6_9 -HV 202x -E main %s -verify + +groupshared float SharedArr[64]; + +void fn(groupshared float Arr[64], float F) { + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + + // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixStoreToMemory potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); +} + +[numthreads(4,1,1)] +void main() { + fn(SharedArr, 6.0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl index fbec113e81..c9ebd7adf8 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl @@ -8,8 +8,12 @@ // RUN: %dxc -T lib_6_10 -DMATRIX_STORE_TO_DESCRIPTOR %s -verify // RUN: %dxc -T lib_6_10 -DMATRIX_LENGTH %s -verify // RUN: %dxc -T lib_6_10 -DMATRIX_ACCUMULATE %s -verify +// RUN: %dxc -T lib_6_10 -DMATRIX_LOAD_FROM_MEMORY %s -verify +// RUN: %dxc -T lib_6_10 -DMATRIX_STORE_TO_MEMORY %s -verify +// RUN: %dxc -T lib_6_10 -DMATRIX_ACCUMULATE_TO_MEMORY %s -verify RWByteAddressBuffer buf; +groupshared float gs_arr[64]; void CallFunction() { @@ -62,6 +66,18 @@ void CallFunction() #define DO_FUNC __builtin_LinAlg_MatrixAccumulate(mat1, mat2, mat3); #endif +#ifdef MATRIX_LOAD_FROM_MEMORY + #define DO_FUNC __builtin_LinAlg_MatrixLoadFromMemory(mat1, gs_arr, 0, 0, 0); +#endif + +#ifdef MATRIX_STORE_TO_MEMORY + #define DO_FUNC __builtin_LinAlg_MatrixStoreToMemory(mat1, gs_arr, 0, 0, 0); +#endif + +#ifdef MATRIX_ACCUMULATE_TO_MEMORY + #define DO_FUNC __builtin_LinAlg_MatrixAccumulateToMemory(mat1, gs_arr, 0, 0, 0); +#endif + // The builtins below are allowed in all stages, if they raise an error // then the test will fail with "saw unexpected diagnostic" uint layout = __builtin_LinAlg_MatrixQueryAccumulatorLayout(); diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 49aa2f151b..4810442a7e 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -402,13 +402,13 @@ void [[min_sm=6.10]] __builtin_LinAlg_FillMatrix(out LinAlgMatrix ret, in numeri void [[min_sm=6.10]] __builtin_LinAlg_CopyConvertMatrix(out LinAlgMatrix ret, in LinAlgMatrix source, in bool transpose); void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in ByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromMemory(out LinAlgMatrix ret, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromMemory(out LinAlgMatrix ret, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); uint [[min_sm=6.10]] __builtin_LinAlg_MatrixLength(in LinAlgMatrix matrix); uint<2> [[min_sm=6.10]] __builtin_LinAlg_MatrixGetCoordinate(in LinAlgMatrix matrix, in uint threadLocalIndex); void [[min_sm=6.10]] __builtin_LinAlg_MatrixGetElement(out numeric ret, in LinAlgMatrix matrix, in uint threadLocalIndex); void [[min_sm=6.10]] __builtin_LinAlg_MatrixSetElement(out LinAlgMatrix ret, in LinAlgMatrix matrix, in uint threadLocalIndex, in numeric value); void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToMemory(in LinAlgMatrix matrix, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); uint [[min_sm=6.10]] __builtin_LinAlg_MatrixQueryAccumulatorLayout(); void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiply(out LinAlgMatrix matrixC, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB); void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(out LinAlgMatrix matrixR, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB, in LinAlgMatrix matrixC); @@ -416,7 +416,7 @@ void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulate(out LinAlgMatrix matrixC, void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiply(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp); void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiplyAdd(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp, in numeric<> bias, in uint bias_interp); void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, in int GroupSharedMem, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(out LinAlgMatrix ret, in numeric<> vecA, in numeric<> vecB); } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 71f035e059..5dbb59102f 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -52,9 +52,10 @@ # - "," is used to separate multiple overload dimensions. # - When used, only $x0, $x1, etc. are supported for overloaded parameter # types. +# - "a" is for any array ([n x Ty]) # dxil_all_user_oload_chars must be kept in sync with the indices in # hlsl::OP::TypeSlot in DxilOperations.h. -dxil_all_user_oload_chars = "hfd18wiluo<" +dxil_all_user_oload_chars = "hfd18wiluo Date: Tue, 10 Mar 2026 15:32:04 -0600 Subject: [PATCH 2/7] Address comments --- .../builtins/matrixaccumulatetomemory/nominal.hlsl | 13 ++++--------- .../builtins/matrixloadfrommemory/nominal.hlsl | 13 ++++--------- .../builtins/matrixstoretomemory/nominal.hlsl | 13 ++++--------- .../builtins/matrixaccumulatetomemory/ast.hlsl | 8 ++------ .../unavailable_pre_sm610.hlsl | 10 +++------- .../linalg/builtins/matrixloadfrommemory/ast.hlsl | 8 ++------ .../matrixloadfrommemory/unavailable_pre_sm610.hlsl | 10 +++------- .../linalg/builtins/matrixstoretomemory/ast.hlsl | 8 ++------ .../matrixstoretomemory/unavailable_pre_sm610.hlsl | 10 +++------- 9 files changed, 27 insertions(+), 66 deletions(-) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl index 5461600016..cfdac39028 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -1,19 +1,14 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -groupshared float SharedArr[64]; - -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); -} - // CHECK: @{{.*}} = external addrspace(3) global [64 x float] +groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - fn(SharedArr); + // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl index a5dd722f1b..a3e383ca58 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -1,19 +1,14 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -groupshared float SharedArr[64]; - -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); -} - // CHECK: @{{.*}} = external addrspace(3) global [64 x float] +groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - fn(SharedArr); + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl index f6c38536a3..4b5b50c357 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -1,19 +1,14 @@ // REQUIRES: dxil-1-10 // RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s -groupshared float SharedArr[64]; - -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); -} - // CHECK: @{{.*}} = external addrspace(3) global [64 x float] +groupshared float SharedArr[64]; [numthreads(4,1,1)] void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - fn(SharedArr); + // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl index e3694e1eb4..d300796b67 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl @@ -12,13 +12,9 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); -} - [shader("compute")] [numthreads(1,1,1)] void main() { - fn(SharedArr); + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl index 8048e22922..e5a9ea4895 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/unavailable_pre_sm610.hlsl @@ -2,14 +2,10 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64], float F) { +[numthreads(4,1,1)] +void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixAccumulateToMemory potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixAccumulateToMemory(mat, Arr, 0, 0, 0); -} - -[numthreads(4,1,1)] -void main() { - fn(SharedArr, 6.0); + __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl index 2874ba3c37..3ac0de3880 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl @@ -12,13 +12,9 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); -} - [shader("compute")] [numthreads(1,1,1)] void main() { - fn(SharedArr); + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl index af3dd3b846..d8472ad92b 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/unavailable_pre_sm610.hlsl @@ -2,14 +2,10 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64], float F) { +[numthreads(4,1,1)] +void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixLoadFromMemory potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixLoadFromMemory(mat, Arr, 0, 0, 0); -} - -[numthreads(4,1,1)] -void main() { - fn(SharedArr, 6.0); + __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl index 1c2520fe6c..c726d119eb 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl @@ -12,13 +12,9 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64]) { - __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); -} - [shader("compute")] [numthreads(1,1,1)] void main() { - fn(SharedArr); + __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; + __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 0, 0, 0); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl index 934963f5dc..d3468a2a02 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/unavailable_pre_sm610.hlsl @@ -2,14 +2,10 @@ groupshared float SharedArr[64]; -void fn(groupshared float Arr[64], float F) { +[numthreads(4,1,1)] +void main() { __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixStoreToMemory potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixStoreToMemory(mat, Arr, 0, 0, 0); -} - -[numthreads(4,1,1)] -void main() { - fn(SharedArr, 6.0); + __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 0, 0, 0); } From 6ff022c0aa01b25464d11cc8139e96900135dcb2 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 11 Mar 2026 19:34:20 -0600 Subject: [PATCH 3/7] Rework based on feedback --- include/dxc/DXIL/DxilOperations.h | 1 - lib/DXIL/DxilOperations.cpp | 123 +++++++++--------- lib/HLSL/HLOperationLower.cpp | 17 ++- .../matrixaccumulatetomemory/nominal.hlsl | 2 +- .../matrixloadfrommemory/nominal.hlsl | 2 +- .../builtins/matrixstoretomemory/nominal.hlsl | 2 +- utils/hct/hctdb.py | 22 ++-- utils/hct/hctdb_instrhelp.py | 39 ++++-- 8 files changed, 117 insertions(+), 91 deletions(-) diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index 85df375b3a..bab4bffc6e 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -212,7 +212,6 @@ class OP { TS_UDT = 8, // Ex: %"struct.MyStruct" * TS_Object = 9, // Ex: %"class.StructuredBuffer" TS_Vector = 10, // Ex: <8 x i16> - TS_Array = 11, // Ex: [8 x float] TS_MaskBitCount, // Types used in Mask end here // TS_Extended is only used to identify the unnamed struct type used to wrap // multiple overloads when using GetTypeSlot. diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 02dcfe65a0..ffff4eccd9 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2863,8 +2863,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixLoadFromMemory", Attribute::None, 2, - {{0x200}, {0x800}}, - {{0x0}, {0x0}}}, // Overloads: o,a + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::LinAlgMatrixLength, "LinAlgMatrixLength", OCC::LinAlgMatrixLength, @@ -2911,8 +2911,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixStoreToMemory", Attribute::None, 2, - {{0x200}, {0x800}}, - {{0x0}, {0x0}}}, // Overloads: o,a + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::LinAlgMatrixQueryAccumulatorLayout, "LinAlgMatrixQueryAccumulatorLayout", OCC::LinAlgMatrixQueryAccumulatorLayout, @@ -2967,8 +2967,8 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { "linAlgMatrixAccumulateToMemory", Attribute::None, 2, - {{0x200}, {0x800}}, - {{0x0}, {0x0}}}, // Overloads: o,a + {{0x200}, {0x63}}, + {{0x0}, {0x0}}}, // Overloads: o,hfwi {OC::LinAlgMatrixOuterProduct, "LinAlgMatrixOuterProduct", OCC::LinAlgMatrixOuterProduct, @@ -3152,8 +3152,6 @@ unsigned OP::GetTypeSlot(Type *pType) { return TS_Extended; case Type::VectorTyID: return TS_Vector; - case Type::ArrayTyID: - return TS_Array; default: break; } @@ -3194,12 +3192,6 @@ StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) .toStringRef(Storage); } - case TS_Array: { - if (Ty->isPointerTy()) - Ty = Ty->getPointerElementType(); - ArrayType *ArrTy = cast(Ty); - return GetOverloadTypeName(OP::GetTypeSlot(ArrTy->getArrayElementType())); - } case TS_Extended: { DXASSERT(isa(Ty), "otherwise, extended overload type not wrapped in struct type."); @@ -4332,9 +4324,10 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { #define VEC2(_y) A(VectorType::get(_y, 2)) #define VEC4(_y) A(GetStructVectorType(4, _y)) #define VEC9(_y) A(VectorType::get(_y, 9)) +#define TGSM(_y) A(PointerType::get(_y, DXIL::kTGSMAddrSpace)) // Extended Overload types are wrapped in an anonymous struct -#define EXT(_y) A(cast(pOverloadType)->getElementType(_y)) +#define EXT(_y) cast(pOverloadType)->getElementType(_y) /* hctdb_instrhelp.get_oloads_funcs()*/ switch (opCode) { // return opCode @@ -6445,9 +6438,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { // Linear Algebra Operations case OpCode::MatVecMul: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI1); A(pI32); A(pRes); @@ -6461,9 +6454,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI1); break; case OpCode::MatVecMulAdd: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI1); A(pI32); A(pRes); @@ -6482,8 +6475,8 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { case OpCode::OuterProductAccumulate: A(pV); A(pI32); - EXT(0); - EXT(1); + A(EXT(0)); + A(EXT(1)); A(pRes); A(pI32); A(pI32); @@ -6586,21 +6579,21 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { // Linear Algebra Operations case OpCode::LinAlgMatrixMultiplyAccumulate: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); - EXT(3); + A(EXT(1)); + A(EXT(2)); + A(EXT(3)); break; case OpCode::LinAlgFillMatrix: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); break; case OpCode::LinAlgCopyConvertMatrix: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI1); break; case OpCode::LinAlgMatrixLoadFromDescriptor: @@ -6612,9 +6605,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); break; case OpCode::LinAlgMatrixLoadFromMemory: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + TGSM(EXT(1)); A(pI32); A(pI32); A(pI32); @@ -6631,17 +6624,17 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); break; case OpCode::LinAlgMatrixGetElement: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI32); break; case OpCode::LinAlgMatrixSetElement: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); + A(EXT(1)); A(pI32); - EXT(2); + A(EXT(2)); break; case OpCode::LinAlgMatrixStoreToDescriptor: A(pV); @@ -6655,8 +6648,8 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { case OpCode::LinAlgMatrixStoreToMemory: A(pV); A(pI32); - EXT(0); - EXT(1); + A(EXT(0)); + TGSM(EXT(1)); A(pI32); A(pI32); A(pI32); @@ -6666,31 +6659,31 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); break; case OpCode::LinAlgMatrixMultiply: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); break; case OpCode::LinAlgMatrixAccumulate: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); break; case OpCode::LinAlgMatVecMul: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); A(pI32); break; case OpCode::LinAlgMatVecMulAdd: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); A(pI32); - EXT(3); + A(EXT(3)); A(pI32); break; case OpCode::LinAlgMatrixAccumulateToDescriptor: @@ -6705,17 +6698,17 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { case OpCode::LinAlgMatrixAccumulateToMemory: A(pV); A(pI32); - EXT(0); - EXT(1); + A(EXT(0)); + TGSM(EXT(1)); A(pI32); A(pI32); A(pI32); break; case OpCode::LinAlgMatrixOuterProduct: - EXT(0); + A(EXT(0)); A(pI32); - EXT(1); - EXT(2); + A(EXT(1)); + A(EXT(2)); break; // @@ -7082,7 +7075,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::MatVecMulAdd: case OpCode::LinAlgFillMatrix: case OpCode::LinAlgCopyConvertMatrix: - case OpCode::LinAlgMatrixLoadFromMemory: case OpCode::LinAlgMatrixGetElement: if (FT->getNumParams() < 2) return nullptr; @@ -7090,8 +7082,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { {FT->getReturnType(), FT->getParamType(1)}); case OpCode::OuterProductAccumulate: - case OpCode::LinAlgMatrixStoreToMemory: - case OpCode::LinAlgMatrixAccumulateToMemory: if (FT->getNumParams() < 3) return nullptr; return llvm::StructType::get(Ctx, @@ -7104,12 +7094,27 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { {FT->getReturnType(), FT->getParamType(1), FT->getParamType(2), FT->getParamType(3)}); + case OpCode::LinAlgMatrixLoadFromMemory: + if (FT->getNumParams() < 2) + return nullptr; + return llvm::StructType::get( + Ctx, + {FT->getReturnType(), FT->getParamType(1)->getPointerElementType()}); + case OpCode::LinAlgMatrixSetElement: if (FT->getNumParams() < 4) return nullptr; return llvm::StructType::get( Ctx, {FT->getReturnType(), FT->getParamType(1), FT->getParamType(3)}); + case OpCode::LinAlgMatrixStoreToMemory: + case OpCode::LinAlgMatrixAccumulateToMemory: + if (FT->getNumParams() < 3) + return nullptr; + return llvm::StructType::get( + Ctx, + {FT->getParamType(1), FT->getParamType(2)->getPointerElementType()}); + case OpCode::LinAlgMatrixMultiply: case OpCode::LinAlgMatrixAccumulate: case OpCode::LinAlgMatVecMul: diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 9ea6166f36..6d718257d4 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7242,11 +7242,15 @@ Value *TranslateLinAlgMatrixLoadFromMemory( Value *Stride = CI->getArgOperand(4); Value *Layout = CI->getArgOperand(5); + Value *Zero = Builder.getInt32(0); + Value *ArrPtr = Builder.CreateGEP(Arr, {Zero, Zero}); + Type *ArrEltTy = ArrPtr->getType()->getPointerElementType(); + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); - Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, Arr->getType()}); + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, ArrEltTy}); Value *Matrix = - Builder.CreateCall(DxilFunc, {OpArg, Arr, Offset, Stride, Layout}); + Builder.CreateCall(DxilFunc, {OpArg, ArrPtr, Offset, Stride, Layout}); Builder.CreateStore(Matrix, MatrixPtr); return nullptr; @@ -7265,12 +7269,15 @@ Value *TranslateLinAlgMatrixAccumStoreToMemory( Value *Stride = CI->getArgOperand(4); Value *Layout = CI->getArgOperand(5); + Value *Zero = Builder.getInt32(0); + Value *ArrPtr = Builder.CreateGEP(Arr, {Zero, Zero}); + Type *ArrEltTy = ArrPtr->getType()->getPointerElementType(); + Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); - Function *DxilFunc = - HlslOp->GetOpFunc(OpCode, {Matrix->getType(), Arr->getType()}); + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {Matrix->getType(), ArrEltTy}); return Builder.CreateCall(DxilFunc, - {OpArg, Matrix, Arr, Offset, Stride, Layout}); + {OpArg, Matrix, ArrPtr, Offset, Stride, Layout}); } } // namespace diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl index cfdac39028..f05366d62f 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl @@ -8,7 +8,7 @@ groupshared float SharedArr[64]; void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + // CHECK: call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U1S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixAccumulateToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl index a3e383ca58..9c1e8303b2 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl @@ -8,7 +8,7 @@ groupshared float SharedArr[64]; void main() { // CHECK-LABEL: define void @main() - // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + // CHECK: call %dx.types.LinAlgMatrixC4M5N4U1S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U1S2.f32(i32 -2147483633, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 1, 2, 3); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl index 4b5b50c357..07a4fa38e5 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl @@ -8,7 +8,7 @@ groupshared float SharedArr[64]; void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* nonnull @{{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + // CHECK: call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U1S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, float addrspace(3)* getelementptr {{.*}}, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 1, 2, 3); } diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 5dbb59102f..b72006efa9 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -55,7 +55,7 @@ # - "a" is for any array ([n x Ty]) # dxil_all_user_oload_chars must be kept in sync with the indices in # hlsl::OP::TypeSlot in DxilOperations.h. -dxil_all_user_oload_chars = "hfd18wiluo $x7 + ty = i.llvm_type.replace("_gs", "") + if ty.startswith("$x"): + if ty != "$x" + str(next_oload_idx): raise ValueError( "Extended overloads are not sequentially referenced in " f"DXIL op {self.name}: {i.llvm_type} != $x{next_oload_idx}" @@ -6407,12 +6411,12 @@ def populate_ExperimentalOps(self): "LinAlgMatrixLoadFromMemory", "LinAlgMatrixLoadFromMemory", "fills a matrix with data from a groupshared array", - "o,a", + "o,hfwi", "", [ db_dxil_param(0, "$x0", "", "resulting matrix"), db_dxil_param( - 2, "$x1", "memory", "groupshared array to fill matrix with" + 2, "$x_gs1", "memory", "groupshared array to fill matrix with" ), db_dxil_param(3, "i32", "offset", "starting offset in the array"), db_dxil_param( @@ -6508,13 +6512,13 @@ def populate_ExperimentalOps(self): "LinAlgMatrixStoreToMemory", "LinAlgMatrixStoreToMemory", "stores a matrix to groupshared memory", - "o,a", + "o,hfwi", "", [ db_dxil_param(0, "v", "", ""), db_dxil_param(2, "$x0", "matrix", "matrix to be stored"), db_dxil_param( - 3, "$x1", "memory", "groupshared array to store into" + 3, "$x_gs1", "memory", "groupshared array to store into" ), db_dxil_param(4, "i32", "offset", "starting offset in the array"), db_dxil_param( @@ -6625,13 +6629,13 @@ def populate_ExperimentalOps(self): "LinAlgMatrixAccumulateToMemory", "LinAlgMatrixAccumulateToMemory", "accumulates a matrix to groupshared memory", - "o,a", + "o,hfwi", "", [ db_dxil_param(0, "v", "", ""), db_dxil_param(2, "$x0", "matrix", "Accumulator matrix"), db_dxil_param( - 3, "$x1", "memory", "groupshared array to accumulate into" + 3, "$x_gs1", "memory", "groupshared array to accumulate into" ), db_dxil_param(4, "i32", "offset", "starting offset in the array"), db_dxil_param( diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 5e09578af7..91636c309e 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -644,10 +644,15 @@ def print_opfunc_table(self): "noderecordproperty": "A(nodeRecordProperty);", "hit_object": "A(pHit);", # Extended overload slots, extend as needed: - "$x0": "EXT(0);", - "$x1": "EXT(1);", - "$x2": "EXT(2);", - "$x3": "EXT(3);", + "$x0": "A(EXT(0));", + "$x1": "A(EXT(1));", + "$x2": "A(EXT(2));", + "$x3": "A(EXT(3));", + # Groupshared pointers to extended overloads: + "$x_gs0": "TGSM(EXT(0));", + "$x_gs1": "TGSM(EXT(1));", + "$x_gs2": "TGSM(EXT(2));", + "$x_gs3": "TGSM(EXT(3));", } last_category = None for i in self.db.get_dxil_ops(): @@ -679,6 +684,7 @@ def print_opfunc_oload_type(self): vec_ty = "$vec" gsptr_ty = "$gsptr" extended_ty = "$x" + extended_gs_ty = "$x_gs" last_category = None index_dict = collections.OrderedDict() @@ -846,7 +852,7 @@ def print_opfunc_oload_type(self): # indices the key, and add the opcode to a list of opcodes for that # key. Indices start with 0 for return type, and 1 for the first # function parameter, which is the DXIL OpCode. - indices = [] + indices = [] # (op.pos, unwrap_pointer) pairs for index, op in enumerate(instr.ops): # Skip dxil opcode. if op.pos == 1: @@ -854,8 +860,10 @@ def print_opfunc_oload_type(self): op_type = op.llvm_type if op_type.startswith(extended_ty): + gs_ptr = op_type.startswith(extended_gs_ty) + prefix_len = len(extended_gs_ty) if gs_ptr else len(extended_ty) try: - extended_index = int(op_type[2:]) + extended_index = int(op_type[prefix_len:]) except: raise ValueError( "Error parsing extended operand type " @@ -866,7 +874,7 @@ def print_opfunc_oload_type(self): f"'$x{extended_index}' is not in sequential " + f"order for DXIL op '{instr.name}'" ) - indices.append(op.pos) + indices.append((op.pos, gs_ptr)) if len(indices) != instr.num_oloads: raise ValueError( @@ -875,23 +883,26 @@ def print_opfunc_oload_type(self): ) extended_dict.setdefault(tuple(indices), []).append(instr.name) - def get_type_at_index(index): - if index == 0: - return "FT->getReturnType()" - return f"FT->getParamType({index - 1})" + def get_type_at_index(index, unwrap_pointer): + result = "FT->getReturnType()" + if index > 0: + result = f"FT->getParamType({index - 1})" + if unwrap_pointer: + result = result + "->getPointerElementType()" + return result for index_tuple, opcodes in extended_dict.items(): line = "" for opcode in opcodes: line = line + f"case OpCode::{opcode}:\n" - if index_tuple[-1] > 0: + if index_tuple[-1][0] > 0: line += ( - f" if (FT->getNumParams() < {index_tuple[-1]})\n" + f" if (FT->getNumParams() < {index_tuple[-1][0]})\n" + " return nullptr;\n" ) line += ( " return llvm::StructType::get(Ctx, {" - + ", ".join([get_type_at_index(index) for index in index_tuple]) + + ", ".join([get_type_at_index(index, unwrap_pointer) for index, unwrap_pointer in index_tuple]) + "});\n" ) print(line) From 36b93d5f2d3cf7f148e0d96f876833390bf89609 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 11 Mar 2026 19:46:59 -0600 Subject: [PATCH 4/7] Cleanup comments --- utils/hct/hctdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index b72006efa9..6e9d2920e1 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -51,8 +51,8 @@ # processing. # - "," is used to separate multiple overload dimensions. # - When used, only $x0, $x1, etc. are supported for overloaded parameter -# types. -# - "a" is for any array ([n x Ty]) +# types. $x_gs0, $x_gs1, etc work like $xN except the overload will be a +# pointer to groupshared memory. # dxil_all_user_oload_chars must be kept in sync with the indices in # hlsl::OP::TypeSlot in DxilOperations.h. dxil_all_user_oload_chars = "hfd18wiluo<" From 7ec1237f7179d5a6e8e1b023e81757ee6230956d Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Fri, 6 Mar 2026 18:14:07 -0800 Subject: [PATCH 5/7] [SM6.10] Add LinAlgMatrix ops validation tests for all stages --- .../LinAlgMatrix/linalgmatrix-as.ll | 167 +++++++ .../LinAlgMatrix/linalgmatrix-cs.ll | 159 +++++++ .../LinAlgMatrix/linalgmatrix-ds.ll | 193 +++++++++ .../LinAlgMatrix/linalgmatrix-gs.ll | 198 +++++++++ .../LinAlgMatrix/linalgmatrix-hs.ll | 205 +++++++++ .../LinAlgMatrix/linalgmatrix-ms.ll | 183 ++++++++ .../LinAlgMatrix/linalgmatrix-node.ll | 188 ++++++++ .../LinAlgMatrix/linalgmatrix-ps.ll | 187 ++++++++ .../LinAlgMatrix/linalgmatrix-raytracing.ll | 408 ++++++++++++++++++ .../LinAlgMatrix/linalgmatrix-vs.ll | 188 ++++++++ 10 files changed, 2076 insertions(+) create mode 100644 tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll create mode 100644 tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll create mode 100644 tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll create mode 100644 tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll create mode 100644 tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll create mode 100644 tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll create mode 100644 tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll create mode 100644 tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll create mode 100644 tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll create mode 100644 tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll new file mode 100644 index 0000000000..3fa243952e --- /dev/null +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll @@ -0,0 +1,167 @@ +; REQUIRES: dxil-1-10 +; RUN: %dxv %s 2>&1 | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%struct.AmpPayload.0 = type { [2 x float] } +%dx.types.LinAlgMatrixC4M5N4U2S2 = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U0S2 = type { i8* } +%dx.types.LinAlgMatrixC4M4N5U1S2 = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +define void @mainAS() { + + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + ; + ; Built-ins allowed in all stages + ; + + ; dx.op.linAlgMatrixAccumulate + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + + ; dx.op.linAlgMatrixAccumulateToDescriptor + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + + ; dx.op.linAlgMatrixLength + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + + ; dx.op.linAlgMatrixLoadFromDescriptor + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + + ; dx.op.linAlgMatrixOuterProduct + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + ; dx.op.linAlgMatrixQueryAccumulatorLayout + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + + ; dx.op.linAlgMatVecMul + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + + ; dx.op.linAlgMatVecMulAdd + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + + ; dx.op.linAlgCopyConvertMatrix + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + + ; dx.op.linAlgFillMatrix + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + + ; dx.op.linAlgMatrixGetCoordinate + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixGetElement + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixMultiply + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + + ; dx.op.linAlgMatrixMultiplyAccumulate + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + + ; dx.op.linAlgMatrixSetElement + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + + ; dx.op.linAlgMatrixStoreToDescriptor + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + %2 = alloca %struct.AmpPayload.0, align 8 + call void @dx.op.dispatchMesh.struct.AmpPayload.0(i32 173, i32 8, i32 1, i32 1, %struct.AmpPayload.0* nonnull %2) ; DispatchMesh(threadGroupCountX,threadGroupCountY,threadGroupCountZ,payload) + + ret void +} + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32, i32) #0 + +; Function Attrs: nounwind +declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2, %dx.types.LinAlgMatrixC4M5N4U2S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #1 + +; Function Attrs: nounwind +declare void @dx.op.dispatchMesh.struct.AmpPayload.0(i32, i32, i32, i32, %struct.AmpPayload.0*) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.targetTypes = !{!0, !1, !2} +!llvm.ident = !{!3} +!dx.version = !{!4} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.resources = !{!6} +!dx.entryPoints = !{!9} + +!0 = !{%dx.types.LinAlgMatrixC4M5N4U0S2 undef, i32 4, i32 5, i32 4, i32 0, i32 2} +!1 = !{%dx.types.LinAlgMatrixC4M4N5U1S2 undef, i32 4, i32 4, i32 5, i32 1, i32 2} +!2 = !{%dx.types.LinAlgMatrixC4M5N4U2S2 undef, i32 4, i32 5, i32 4, i32 2, i32 2} +!3 = !{!"dxc(private) 1.9.0.15241 (main, 1f63535ae)"} +!4 = !{i32 1, i32 10} +!5 = !{!"as", i32 6, i32 10} +!6 = !{null, !7, null, null} +!7 = !{!8} +!8 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!9 = !{void ()* @mainAS, !"mainAS", null, !6, !10} +!10 = !{i32 0, i64 8589934608, i32 10, !11} +!11 = !{!12, i32 8} +!12 = !{i32 8, i32 1, i32 1} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll new file mode 100644 index 0000000000..630ef3908a --- /dev/null +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll @@ -0,0 +1,159 @@ +; REQUIRES: dxil-1-10 +; RUN: %dxv %s 2>&1 | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.LinAlgMatrixC4M5N4U2S2 = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U0S2 = type { i8* } +%dx.types.LinAlgMatrixC4M4N5U1S2 = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +define void @mainCS() { + + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + ; + ; Built-ins allowed in all stages + ; + + ; dx.op.linAlgMatrixAccumulate + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + + ; dx.op.linAlgMatrixAccumulateToDescriptor + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + + ; dx.op.linAlgMatrixLength + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + + ; dx.op.linAlgMatrixLoadFromDescriptor + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + + ; dx.op.linAlgMatrixOuterProduct + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + ; dx.op.linAlgMatrixQueryAccumulatorLayout + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + + ; dx.op.linAlgMatVecMul + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + + ; dx.op.linAlgMatVecMulAdd + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + + ; dx.op.linAlgCopyConvertMatrix + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + + ; dx.op.linAlgFillMatrix + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + + ; dx.op.linAlgMatrixGetCoordinate + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixGetElement + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixMultiply + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + + ; dx.op.linAlgMatrixMultiplyAccumulate + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + + ; dx.op.linAlgMatrixSetElement + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + + ; dx.op.linAlgMatrixStoreToDescriptor + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + ret void +} + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32, i32) #0 + +; Function Attrs: nounwind +declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2, %dx.types.LinAlgMatrixC4M5N4U2S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.targetTypes = !{!0, !1, !2} +!llvm.ident = !{!3} +!dx.version = !{!4} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.resources = !{!6} +!dx.entryPoints = !{!9} + +!0 = !{%dx.types.LinAlgMatrixC4M5N4U0S2 undef, i32 4, i32 5, i32 4, i32 0, i32 2} +!1 = !{%dx.types.LinAlgMatrixC4M4N5U1S2 undef, i32 4, i32 4, i32 5, i32 1, i32 2} +!2 = !{%dx.types.LinAlgMatrixC4M5N4U2S2 undef, i32 4, i32 5, i32 4, i32 2, i32 2} +!3 = !{!"dxc(private) 1.9.0.15241 (main, 1f63535ae)"} +!4 = !{i32 1, i32 10} +!5 = !{!"cs", i32 6, i32 10} +!6 = !{null, !7, null, null} +!7 = !{!8} +!8 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!9 = !{void ()* @mainCS, !"mainCS", null, !6, !10} +!10 = !{i32 0, i64 8589934608, i32 4, !11} +!11 = !{i32 4, i32 4, i32 4} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll new file mode 100644 index 0000000000..51da8f2a7d --- /dev/null +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll @@ -0,0 +1,193 @@ +; REQUIRES: dxil-1-10 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixMultiply not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixAccumulate not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixLength not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgCopyConvertMatrix not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgFillMatrix not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixGetElement not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixSetElement not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: MainDS: error: Function uses features incompatible with the shader stage (ds) of the entry function. +; CHECK: Validation failed. + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.LinAlgMatrixC4M5N4U2S2 = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U0S2 = type { i8* } +%dx.types.LinAlgMatrixC4M4N5U1S2 = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +define void @MainDS() { + + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + ; + ; Built-ins allowed in all stages + ; + + ; dx.op.linAlgMatrixAccumulate + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + + ; dx.op.linAlgMatrixAccumulateToDescriptor + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + + ; dx.op.linAlgMatrixLength + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + + ; dx.op.linAlgMatrixLoadFromDescriptor + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + + ; dx.op.linAlgMatrixOuterProduct + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + ; dx.op.linAlgMatrixQueryAccumulatorLayout + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + + ; dx.op.linAlgMatVecMul + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + + ; dx.op.linAlgMatVecMulAdd + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + + ; dx.op.linAlgCopyConvertMatrix + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + + ; dx.op.linAlgFillMatrix + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + + ; dx.op.linAlgMatrixGetCoordinate + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixGetElement + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixMultiply + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + + ; dx.op.linAlgMatrixMultiplyAccumulate + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + + ; dx.op.linAlgMatrixSetElement + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + + ; dx.op.linAlgMatrixStoreToDescriptor + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0) ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis) + %3 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0) ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis) + %4 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 0) ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis) + %5 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 0) ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %4) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %5) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + ret void +} + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32, i32) #0 + +; Function Attrs: nounwind +declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2, %dx.types.LinAlgMatrixC4M5N4U2S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #1 + +; Function Attrs: nounwind readnone +declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.targetTypes = !{!0, !1, !2} +!llvm.ident = !{!3} +!dx.version = !{!4} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.resources = !{!6} +!dx.viewIdState = !{!9} +!dx.entryPoints = !{!10} + +!0 = !{%dx.types.LinAlgMatrixC4M5N4U0S2 undef, i32 4, i32 5, i32 4, i32 0, i32 2} +!1 = !{%dx.types.LinAlgMatrixC4M4N5U1S2 undef, i32 4, i32 4, i32 5, i32 1, i32 2} +!2 = !{%dx.types.LinAlgMatrixC4M5N4U2S2 undef, i32 4, i32 5, i32 4, i32 2, i32 2} +!3 = !{!"dxc(private) 1.9.0.15241 (main, 1f63535ae)"} +!4 = !{i32 1, i32 10} +!5 = !{!"ds", i32 6, i32 10} +!6 = !{null, !7, null, null} +!7 = !{!8} +!8 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!9 = !{[7 x i32] [i32 4, i32 4, i32 1, i32 2, i32 4, i32 8, i32 0]} +!10 = !{void ()* @MainDS, !"MainDS", !11, !6, !16} +!11 = !{!12, !12, null} +!12 = !{!13} +!13 = !{i32 0, !"SV_Position", i8 9, i8 3, !14, i8 4, i32 1, i8 4, i32 0, i8 0, !15} +!14 = !{i32 0} +!15 = !{i32 3, i32 15} +!16 = !{i32 0, i64 8590000144, i32 2, !17} +!17 = !{i32 2, i32 3} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll new file mode 100644 index 0000000000..f471d3c8c2 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll @@ -0,0 +1,198 @@ +; REQUIRES: dxil-1-10 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixMultiply not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixAccumulate not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixLength not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgCopyConvertMatrix not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgFillMatrix not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixGetElement not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixSetElement not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: MainGS: error: Function uses features incompatible with the shader stage (gs) of the entry function. +; CHECK: Validation failed. + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.LinAlgMatrixC4M5N4U2S2 = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U0S2 = type { i8* } +%dx.types.LinAlgMatrixC4M4N5U1S2 = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +define void @MainGS() { + + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + ; + ; Built-ins allowed in all stages + ; + + ; dx.op.linAlgMatrixAccumulate + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + + ; dx.op.linAlgMatrixAccumulateToDescriptor + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + + ; dx.op.linAlgMatrixLength + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + + ; dx.op.linAlgMatrixLoadFromDescriptor + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + + ; dx.op.linAlgMatrixOuterProduct + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + ; dx.op.linAlgMatrixQueryAccumulatorLayout + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + + ; dx.op.linAlgMatVecMul + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + + ; dx.op.linAlgMatVecMulAdd + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + + ; dx.op.linAlgCopyConvertMatrix + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + + ; dx.op.linAlgFillMatrix + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + + ; dx.op.linAlgMatrixGetCoordinate + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixGetElement + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixMultiply + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + + ; dx.op.linAlgMatrixMultiplyAccumulate + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + + ; dx.op.linAlgMatrixSetElement + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + + ; dx.op.linAlgMatrixStoreToDescriptor + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.000000e+01) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+01) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 1.000000e+01) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+01) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.emitStream(i32 97, i8 0) ; EmitStream(streamId) + call void @dx.op.cutStream(i32 98, i8 0) ; CutStream(streamId) + + ret void +} + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32, i32) #0 + +; Function Attrs: nounwind +declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2, %dx.types.LinAlgMatrixC4M5N4U2S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #1 + +; Function Attrs: nounwind +declare void @dx.op.cutStream(i32, i8) #0 + +; Function Attrs: nounwind +declare void @dx.op.emitStream(i32, i8) #0 + +; Function Attrs: nounwind +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.targetTypes = !{!0, !1, !2} +!llvm.ident = !{!3} +!dx.version = !{!4} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.resources = !{!6} +!dx.viewIdState = !{!9} +!dx.entryPoints = !{!10} + +!0 = !{%dx.types.LinAlgMatrixC4M5N4U0S2 undef, i32 4, i32 5, i32 4, i32 0, i32 2} +!1 = !{%dx.types.LinAlgMatrixC4M4N5U1S2 undef, i32 4, i32 4, i32 5, i32 1, i32 2} +!2 = !{%dx.types.LinAlgMatrixC4M5N4U2S2 undef, i32 4, i32 5, i32 4, i32 2, i32 2} +!3 = !{!"dxc(private) 1.9.0.15241 (main, 1f63535ae)"} +!4 = !{i32 1, i32 10} +!5 = !{!"gs", i32 6, i32 10} +!6 = !{null, !7, null, null} +!7 = !{!8} +!8 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!9 = !{[9 x i32] [i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0]} +!10 = !{void ()* @MainGS, !"MainGS", !11, !6, !18} +!11 = !{!12, !15, null} +!12 = !{!13} +!13 = !{i32 0, !"SV_Position", i8 9, i8 3, !14, i8 4, i32 1, i8 4, i32 0, i8 0, null} +!14 = !{i32 0} +!15 = !{!16} +!16 = !{i32 0, !"SV_Position", i8 9, i8 3, !14, i8 4, i32 1, i8 4, i32 0, i8 0, !17} +!17 = !{i32 3, i32 15} +!18 = !{i32 0, i64 8590000144, i32 1, !19} +!19 = !{i32 3, i32 1, i32 1, i32 1, i32 1} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll new file mode 100644 index 0000000000..a24cbf5cf7 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll @@ -0,0 +1,205 @@ +; REQUIRES: dxil-1-10 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixMultiply not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixAccumulate not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixLength not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgCopyConvertMatrix not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgFillMatrix not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixGetElement not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixSetElement not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: MainHS: error: Function uses features incompatible with the shader stage (hs) of the entry function. +; CHECK: Validation failed. + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.LinAlgMatrixC4M5N4U2S2 = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U0S2 = type { i8* } +%dx.types.LinAlgMatrixC4M4N5U1S2 = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +define void @MainHS() { + + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + ; + ; Built-ins allowed in all stages + ; + + ; dx.op.linAlgMatrixAccumulate + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + + ; dx.op.linAlgMatrixAccumulateToDescriptor + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + + ; dx.op.linAlgMatrixLength + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + + ; dx.op.linAlgMatrixLoadFromDescriptor + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + + ; dx.op.linAlgMatrixOuterProduct + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + ; dx.op.linAlgMatrixQueryAccumulatorLayout + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + + ; dx.op.linAlgMatVecMul + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + + ; dx.op.linAlgMatVecMulAdd + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + + ; dx.op.linAlgCopyConvertMatrix + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + + ; dx.op.linAlgFillMatrix + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + + ; dx.op.linAlgMatrixGetCoordinate + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixGetElement + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixMultiply + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + + ; dx.op.linAlgMatrixMultiplyAccumulate + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + + ; dx.op.linAlgMatrixSetElement + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + + ; dx.op.linAlgMatrixStoreToDescriptor + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + ret void +} + +define void @"\01?HSPatch@@YA?AUPCStruct@@V?$InputPatch@UPosStruct@@$02@@V?$OutputPatch@UPosStruct@@$02@@I@Z"() { + %1 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 0) ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis) + call void @dx.op.storePatchConstant.f32(i32 106, i32 0, i32 0, i8 0, float %1) ; StorePatchConstant(outputSigID,row,col,value) + call void @dx.op.storePatchConstant.f32(i32 106, i32 0, i32 1, i8 0, float %1) ; StorePatchConstant(outputSigID,row,col,value) + call void @dx.op.storePatchConstant.f32(i32 106, i32 0, i32 2, i8 0, float %1) ; StorePatchConstant(outputSigID,row,col,value) + call void @dx.op.storePatchConstant.f32(i32 106, i32 1, i32 0, i8 0, float %1) ; StorePatchConstant(outputSigID,row,col,value) + call void @dx.op.storePatchConstant.f32(i32 106, i32 2, i32 0, i8 0, float undef) ; StorePatchConstant(outputSigID,row,col,value) + call void @dx.op.storePatchConstant.f32(i32 106, i32 2, i32 0, i8 1, float undef) ; StorePatchConstant(outputSigID,row,col,value) + call void @dx.op.storePatchConstant.f32(i32 106, i32 2, i32 0, i8 2, float undef) ; StorePatchConstant(outputSigID,row,col,value) + call void @dx.op.storePatchConstant.f32(i32 106, i32 2, i32 0, i8 3, float undef) ; StorePatchConstant(outputSigID,row,col,value) + ret void +} + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32, i32) #0 + +; Function Attrs: nounwind +declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2, %dx.types.LinAlgMatrixC4M5N4U2S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #1 + + +; Function Attrs: nounwind readnone +declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.storePatchConstant.f32(i32, i32, i32, i8, float) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.targetTypes = !{!0, !1, !2} +!llvm.ident = !{!3} +!dx.version = !{!4} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.resources = !{!6} +!dx.viewIdState = !{!9} +!dx.entryPoints = !{!10} + +!0 = !{%dx.types.LinAlgMatrixC4M5N4U0S2 undef, i32 4, i32 5, i32 4, i32 0, i32 2} +!1 = !{%dx.types.LinAlgMatrixC4M4N5U1S2 undef, i32 4, i32 4, i32 5, i32 1, i32 2} +!2 = !{%dx.types.LinAlgMatrixC4M5N4U2S2 undef, i32 4, i32 5, i32 4, i32 2, i32 2} +!3 = !{!"dxc(private) 1.9.0.15241 (main, 1f63535ae)"} +!4 = !{i32 1, i32 10} +!5 = !{!"hs", i32 6, i32 10} +!6 = !{null, !7, null, null} +!7 = !{!8} +!8 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!9 = !{[11 x i32] [i32 4, i32 4, i32 1, i32 2, i32 4, i32 8, i32 20, i32 0, i32 0, i32 0, i32 6280]} +!10 = !{void ()* @MainHS, !"MainHS", !11, !6, !22} +!11 = !{!12, !12, !16} +!12 = !{!13} +!13 = !{i32 0, !"SV_Position", i8 9, i8 3, !14, i8 4, i32 1, i8 4, i32 0, i8 0, !15} +!14 = !{i32 0} +!15 = !{i32 3, i32 15} +!16 = !{!17, !20, !21} +!17 = !{i32 0, !"SV_TessFactor", i8 9, i8 25, !18, i8 0, i32 3, i8 1, i32 0, i8 3, !19} +!18 = !{i32 0, i32 1, i32 2} +!19 = !{i32 3, i32 1} +!20 = !{i32 1, !"SV_InsideTessFactor", i8 9, i8 26, !14, i8 0, i32 1, i8 1, i32 3, i8 0, !19} +!21 = !{i32 2, !"TEST", i8 9, i8 0, !14, i8 0, i32 1, i8 4, i32 4, i8 0, !15} +!22 = !{i32 0, i64 8590000144, i32 3, !23} +!23 = !{void ()* @"\01?HSPatch@@YA?AUPCStruct@@V?$InputPatch@UPosStruct@@$02@@V?$OutputPatch@UPosStruct@@$02@@I@Z", i32 3, i32 3, i32 2, i32 3, i32 3, float 6.400000e+01} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll new file mode 100644 index 0000000000..199a63ccf4 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll @@ -0,0 +1,183 @@ +; REQUIRES: dxil-1-10 +; RUN: %dxv %s 2>&1 | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.LinAlgMatrixC4M5N4U2S2 = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U0S2 = type { i8* } +%dx.types.LinAlgMatrixC4M4N5U1S2 = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +define void @mainMeS() { + + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + %thread_id_group = call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96) ; FlattenedThreadIdInGroup() + ; + ; Built-ins allowed in all stages + ; + + ; dx.op.linAlgMatrixAccumulate + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + + ; dx.op.linAlgMatrixAccumulateToDescriptor + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + + ; dx.op.linAlgMatrixLength + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + + ; dx.op.linAlgMatrixLoadFromDescriptor + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + + ; dx.op.linAlgMatrixOuterProduct + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + ; dx.op.linAlgMatrixQueryAccumulatorLayout + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + + ; dx.op.linAlgMatVecMul + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + + ; dx.op.linAlgMatVecMulAdd + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + + ; dx.op.linAlgCopyConvertMatrix + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + + ; dx.op.linAlgFillMatrix + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + + ; dx.op.linAlgMatrixGetCoordinate + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixGetElement + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixMultiply + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + + ; dx.op.linAlgMatrixMultiplyAccumulate + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + + ; dx.op.linAlgMatrixSetElement + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + + ; dx.op.linAlgMatrixStoreToDescriptor + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + call void @dx.op.setMeshOutputCounts(i32 168, i32 32, i32 16) ; SetMeshOutputCounts(numVertices,numPrimitives) + call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 0, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) + call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 1, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) + call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 2, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) + call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 3, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) + + ret void +} + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32, i32) #0 + +; Function Attrs: nounwind +declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2, %dx.types.LinAlgMatrixC4M5N4U2S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #1 + +; Function Attrs: nounwind readnone +declare i32 @dx.op.flattenedThreadIdInGroup.i32(i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.storeVertexOutput.f32(i32, i32, i32, i8, float, i32) #1 + +; Function Attrs: nounwind +declare void @dx.op.setMeshOutputCounts(i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.targetTypes = !{!0, !1, !2} +!llvm.ident = !{!3} +!dx.version = !{!4} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.resources = !{!6} +!dx.viewIdState = !{!9} +!dx.entryPoints = !{!10} + +!0 = !{%dx.types.LinAlgMatrixC4M5N4U0S2 undef, i32 4, i32 5, i32 4, i32 0, i32 2} +!1 = !{%dx.types.LinAlgMatrixC4M4N5U1S2 undef, i32 4, i32 4, i32 5, i32 1, i32 2} +!2 = !{%dx.types.LinAlgMatrixC4M5N4U2S2 undef, i32 4, i32 5, i32 4, i32 2, i32 2} +!3 = !{!"dxc(private) 1.9.0.15241 (main, 1f63535ae)"} +!4 = !{i32 1, i32 10} +!5 = !{!"ms", i32 6, i32 10} +!6 = !{null, !7, null, null} +!7 = !{!8} +!8 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!9 = !{[3 x i32] [i32 0, i32 4, i32 0]} +!10 = !{void ()* @mainMeS, !"mainMeS", !11, !6, !16} +!11 = !{null, !12, null} +!12 = !{!13} +!13 = !{i32 0, !"SV_Position", i8 9, i8 3, !14, i8 4, i32 1, i8 4, i32 0, i8 0, !15} +!14 = !{i32 0} +!15 = !{i32 3, i32 15} +!16 = !{i32 0, i64 8589934608, i32 9, !17} +!17 = !{!18, i32 32, i32 0, i32 2, i32 0} +!18 = !{i32 8, i32 8, i32 2} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll new file mode 100644 index 0000000000..b4280ba682 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll @@ -0,0 +1,188 @@ +; REQUIRES: dxil-1-10 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixMultiply not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixAccumulate not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixLength not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgCopyConvertMatrix not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgFillMatrix not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixGetElement not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixSetElement not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: mainNS: error: Function uses features incompatible with the shader stage (node) of the entry function. +; CHECK: Validation failed. + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.LinAlgMatrixC4M5N4U2S2 = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U0S2 = type { i8* } +%dx.types.LinAlgMatrixC4M4N5U1S2 = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +@"\01?buf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 + +define void @mainNS() { + + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + ; + ; Built-ins allowed in all stages + ; + + ; dx.op.linAlgMatrixAccumulate + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + + ; dx.op.linAlgMatrixAccumulateToDescriptor + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + + ; dx.op.linAlgMatrixLength + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + + ; dx.op.linAlgMatrixLoadFromDescriptor + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + + ; dx.op.linAlgMatrixOuterProduct + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + ; dx.op.linAlgMatrixQueryAccumulatorLayout + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + + ; dx.op.linAlgMatVecMul + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + + ; dx.op.linAlgMatVecMulAdd + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + + ; dx.op.linAlgCopyConvertMatrix + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + + ; dx.op.linAlgFillMatrix + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + + ; dx.op.linAlgMatrixGetCoordinate + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixGetElement + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixMultiply + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + + ; dx.op.linAlgMatrixMultiplyAccumulate + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + + ; dx.op.linAlgMatrixSetElement + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + + ; dx.op.linAlgMatrixStoreToDescriptor + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + ret void +} + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32, i32) #0 + +; Function Attrs: nounwind +declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2, %dx.types.LinAlgMatrixC4M5N4U2S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #1 + +; Function Attrs: nounwind +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.targetTypes = !{!0, !1, !2} +!llvm.ident = !{!3} +!dx.version = !{!4} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.resources = !{!6} +!dx.typeAnnotations = !{!9} +!dx.entryPoints = !{!13, !15} + +!0 = !{%dx.types.LinAlgMatrixC4M5N4U0S2 undef, i32 4, i32 5, i32 4, i32 0, i32 2} +!1 = !{%dx.types.LinAlgMatrixC4M4N5U1S2 undef, i32 4, i32 4, i32 5, i32 1, i32 2} +!2 = !{%dx.types.LinAlgMatrixC4M5N4U2S2 undef, i32 4, i32 5, i32 4, i32 2, i32 2} +!3 = !{!"dxc(private) 1.9.0.15241 (main, 1f63535ae)"} +!4 = !{i32 1, i32 10} +!5 = !{!"lib", i32 6, i32 10} +!6 = !{null, !7, null, null} +!7 = !{!8} +!8 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"buf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!9 = !{i32 1, void ()* @mainNS, !10} +!10 = !{!11} +!11 = !{i32 1, !12, !12} +!12 = !{} +!13 = !{null, !"", null, !6, !14} +!14 = !{i32 0, i64 8589934608} +!15 = !{void ()* @mainNS, !"mainNS", null, null, !16} +!16 = !{i32 8, i32 15, i32 13, i32 1, i32 15, !17, i32 16, i32 -1, i32 18, !18, i32 4, !19, i32 5, !20} +!17 = !{!"mainNS", i32 0} +!18 = !{i32 8, i32 1, i32 1} +!19 = !{i32 64, i32 2, i32 2} +!20 = !{i32 0} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll new file mode 100644 index 0000000000..2e3a6ef71f --- /dev/null +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll @@ -0,0 +1,187 @@ +; REQUIRES: dxil-1-10 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixMultiply not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixAccumulate not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixLength not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgCopyConvertMatrix not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgFillMatrix not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixGetElement not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixSetElement not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: mainPS: error: Function uses features incompatible with the shader stage (ps) of the entry function. +; CHECK: Validation failed. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.LinAlgMatrixC4M5N4U2S2 = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U0S2 = type { i8* } +%dx.types.LinAlgMatrixC4M4N5U1S2 = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +define void @mainPS() { + + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + ; + ; Built-ins allowed in all stages + ; + + ; dx.op.linAlgMatrixAccumulate + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + + ; dx.op.linAlgMatrixAccumulateToDescriptor + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + + ; dx.op.linAlgMatrixLength + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + + ; dx.op.linAlgMatrixLoadFromDescriptor + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + + ; dx.op.linAlgMatrixOuterProduct + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + ; dx.op.linAlgMatrixQueryAccumulatorLayout + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + + ; dx.op.linAlgMatVecMul + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + + ; dx.op.linAlgMatVecMulAdd + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + + ; dx.op.linAlgCopyConvertMatrix + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + + ; dx.op.linAlgFillMatrix + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + + ; dx.op.linAlgMatrixGetCoordinate + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixGetElement + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixMultiply + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + + ; dx.op.linAlgMatrixMultiplyAccumulate + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + + ; dx.op.linAlgMatrixSetElement + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + + ; dx.op.linAlgMatrixStoreToDescriptor + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + + ret void +} + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32, i32) #0 + +; Function Attrs: nounwind +declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2, %dx.types.LinAlgMatrixC4M5N4U2S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #1 + +; Function Attrs: nounwind +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.targetTypes = !{!0, !1, !2} +!llvm.ident = !{!3} +!dx.version = !{!4} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.resources = !{!6} +!dx.viewIdState = !{!9} +!dx.entryPoints = !{!10} + +!0 = !{%dx.types.LinAlgMatrixC4M5N4U0S2 undef, i32 4, i32 5, i32 4, i32 0, i32 2} +!1 = !{%dx.types.LinAlgMatrixC4M4N5U1S2 undef, i32 4, i32 4, i32 5, i32 1, i32 2} +!2 = !{%dx.types.LinAlgMatrixC4M5N4U2S2 undef, i32 4, i32 5, i32 4, i32 2, i32 2} +!3 = !{!"dxc(private) 1.9.0.15241 (main, 1f63535ae)"} +!4 = !{i32 1, i32 10} +!5 = !{!"ps", i32 6, i32 10} +!6 = !{null, !7, null, null} +!7 = !{!8} +!8 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!9 = !{[3 x i32] [i32 1, i32 4, i32 0]} +!10 = !{void ()* @mainPS, !"mainPS", !11, !6, !18} +!11 = !{!12, !15, null} +!12 = !{!13} +!13 = !{i32 0, !"SV_PrimitiveID", i8 5, i8 10, !14, i8 1, i32 1, i8 1, i32 0, i8 0, null} +!14 = !{i32 0} +!15 = !{!16} +!16 = !{i32 0, !"SV_Target", i8 9, i8 16, !14, i8 0, i32 1, i8 4, i32 0, i8 0, !17} +!17 = !{i32 3, i32 15} +!18 = !{i32 0, i64 8589934608} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll new file mode 100644 index 0000000000..c627b5e4cc --- /dev/null +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll @@ -0,0 +1,408 @@ +; REQUIRES: dxil-1-10 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixMultiply not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixMultiply not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixMultiply not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixMultiply not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixMultiply not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixMultiply not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixAccumulate not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixAccumulate not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixAccumulate not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixAccumulate not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixAccumulate not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixAccumulate not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixLength not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixLength not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixLength not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixLength not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixLength not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixLength not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgCopyConvertMatrix not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgCopyConvertMatrix not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgCopyConvertMatrix not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgCopyConvertMatrix not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgCopyConvertMatrix not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgCopyConvertMatrix not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgFillMatrix not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgFillMatrix not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgFillMatrix not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgFillMatrix not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgFillMatrix not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgFillMatrix not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixGetElement not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixGetElement not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixGetElement not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixGetElement not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixGetElement not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixGetElement not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixSetElement not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixSetElement not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixSetElement not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixSetElement not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixSetElement not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixSetElement not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Function uses features incompatible with the shader stage (raygeneration) of the entry function. + +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Function uses features incompatible with the shader stage (intersection) of the entry function. + +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Function uses features incompatible with the shader stage (callable) of the entry function. + +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Function uses features incompatible with the shader stage (anyhit) of the entry function. + +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Function uses features incompatible with the shader stage (closesthit) of the entry function. + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Function uses features incompatible with the shader stage (miss) of the entry function. + +; CHECK: Validation failed. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" +%dx.types.Handle = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U2S2 = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U0S2 = type { i8* } +%dx.types.LinAlgMatrixC4M4N5U1S2 = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.Attribs = type { <2 x float> } +%struct.RayPayload = type { float } +%struct.RWByteAddressBuffer = type { i32 } + +@"\01?buf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 + +define void @"\01?MainRG@@YAXXZ"() #0 { + + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + ; + ; Built-ins allowed in all stages + ; + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + ret void +} + +define void @"\01?MainIS@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + ; + ; Built-ins allowed in all stages + ; + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + ret void +} + +define void @"\01?MainCL@@YAXUAttribs@@@Z"(%struct.Attribs* noalias nocapture %attrs) #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + ; + ; Built-ins allowed in all stages + ; + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + ret void +} + +define void @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noalias nocapture %pld, %struct.Attribs* nocapture readnone %attrs) #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + ; + ; Built-ins allowed in all stages + ; + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + ret void +} + +define void @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noalias nocapture %pld, %struct.Attribs* nocapture readnone %attrs) #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + ; + ; Built-ins allowed in all stages + ; + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + ret void +} + +define void @"\01?MainMS@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapture %pld) #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + ; + ; Built-ins allowed in all stages + ; + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + ret void +} + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32, i32) #0 + +; Function Attrs: nounwind +declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2, %dx.types.LinAlgMatrixC4M5N4U2S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.targetTypes = !{!0, !1, !2} +!llvm.ident = !{!3} +!dx.version = !{!4} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.resources = !{!6} +!dx.typeAnnotations = !{!9} +!dx.dxrPayloadAnnotations = !{!17} +!dx.entryPoints = !{!20, !22, !25, !27, !29, !31, !33} + +!0 = !{%dx.types.LinAlgMatrixC4M5N4U0S2 undef, i32 4, i32 5, i32 4, i32 0, i32 2} +!1 = !{%dx.types.LinAlgMatrixC4M4N5U1S2 undef, i32 4, i32 4, i32 5, i32 1, i32 2} +!2 = !{%dx.types.LinAlgMatrixC4M5N4U2S2 undef, i32 4, i32 5, i32 4, i32 2, i32 2} +!3 = !{!"dxc(private) 1.9.0.15241 (Main, 1f63535ae)"} +!4 = !{i32 1, i32 10} +!5 = !{!"lib", i32 6, i32 10} +!6 = !{null, !7, null, null} +!7 = !{!8} +!8 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"buf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!9 = !{i32 1, void ()* @"\01?MainRG@@YAXXZ", !10, void ()* @"\01?MainIS@@YAXXZ", !10, void (%struct.Attribs*)* @"\01?MainCL@@YAXUAttribs@@@Z", !13, void (%struct.RayPayload*, %struct.Attribs*)* @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z", !15, void (%struct.RayPayload*, %struct.Attribs*)* @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z", !15, void (%struct.RayPayload*)* @"\01?MainMS@@YAXURayPayload@@@Z", !13} +!10 = !{!11} +!11 = !{i32 1, !12, !12} +!12 = !{} +!13 = !{!11, !14} +!14 = !{i32 2, !12, !12} +!15 = !{!11, !14, !16} +!16 = !{i32 0, !12, !12} +!17 = !{i32 0, %struct.RayPayload undef, !18} +!18 = !{!19} +!19 = !{i32 0, i32 13107} +!20 = !{null, !"", null, !6, !21} +!21 = !{i32 0, i64 8589934608} +!22 = !{void ()* @"\01?MainRG@@YAXXZ", !"\01?MainRG@@YAXXZ", null, null, !23} +!23 = !{i32 8, i32 7, i32 5, !24} +!24 = !{i32 0} +!25 = !{void (%struct.RayPayload*, %struct.Attribs*)* @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z", !"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z", null, null, !26} +!26 = !{i32 8, i32 9, i32 6, i32 4, i32 7, i32 8, i32 5, !24} +!27 = !{void (%struct.Attribs*)* @"\01?MainCL@@YAXUAttribs@@@Z", !"\01?MainCL@@YAXUAttribs@@@Z", null, null, !28} +!28 = !{i32 8, i32 12, i32 6, i32 8, i32 5, !24} +!29 = !{void (%struct.RayPayload*, %struct.Attribs*)* @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z", !"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z", null, null, !30} +!30 = !{i32 8, i32 10, i32 6, i32 4, i32 7, i32 8, i32 5, !24} +!31 = !{void ()* @"\01?MainIS@@YAXXZ", !"\01?MainIS@@YAXXZ", null, null, !32} +!32 = !{i32 8, i32 8, i32 5, !24} +!33 = !{void (%struct.RayPayload*)* @"\01?MainMS@@YAXURayPayload@@@Z", !"\01?MainMS@@YAXURayPayload@@@Z", null, null, !34} +!34 = !{i32 8, i32 11, i32 6, i32 4, i32 5, !24} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll new file mode 100644 index 0000000000..09a3753a6a --- /dev/null +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll @@ -0,0 +1,188 @@ +; REQUIRES: dxil-1-10 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixMultiply not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixAccumulate not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixStoreToDescriptor not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixLength not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgCopyConvertMatrix not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgFillMatrix not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixGetCoordinate not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixGetElement not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixSetElement not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: mainVS: error: Function uses features incompatible with the shader stage (vs) of the entry function. +; CHECK: Validation failed. + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.LinAlgMatrixC4M5N4U2S2 = type { i8* } +%dx.types.LinAlgMatrixC4M5N4U0S2 = type { i8* } +%dx.types.LinAlgMatrixC4M4N5U1S2 = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +define void @mainVS() { + + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + ; + ; Built-ins allowed in all stages + ; + + ; dx.op.linAlgMatrixAccumulate + %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) + + ; dx.op.linAlgMatrixAccumulateToDescriptor + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + + ; dx.op.linAlgMatrixLength + %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) + + ; dx.op.linAlgMatrixLoadFromDescriptor + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + + ; dx.op.linAlgMatrixOuterProduct + %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) + + ; dx.op.linAlgMatrixQueryAccumulatorLayout + %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() + + ; dx.op.linAlgMatVecMul + %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) + + ; dx.op.linAlgMatVecMulAdd + %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation) + + ; + ; Built-ins restricted to compute, mesh and amplification shaders + ; + + ; dx.op.linAlgCopyConvertMatrix + %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) + + ; dx.op.linAlgFillMatrix + %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) + + ; dx.op.linAlgMatrixGetCoordinate + %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixGetElement + %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) + + ; dx.op.linAlgMatrixMultiply + %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) + + ; dx.op.linAlgMatrixMultiplyAccumulate + %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) + + ; dx.op.linAlgMatrixSetElement + %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) + + ; dx.op.linAlgMatrixStoreToDescriptor + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + + ; FIXME: 3 more ops coming soon + + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + + ret void +} + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 + +; Function Attrs: nounwind +declare i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, <4 x i32>, i32, <4 x i32>, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32, i32) #0 + +; Function Attrs: nounwind +declare <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2, %dx.types.LinAlgMatrixC4M5N4U2S2) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #1 + +; Function Attrs: nounwind +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.targetTypes = !{!0, !1, !2} +!llvm.ident = !{!3} +!dx.version = !{!4} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.resources = !{!6} +!dx.viewIdState = !{!9} +!dx.entryPoints = !{!10} + +!0 = !{%dx.types.LinAlgMatrixC4M5N4U0S2 undef, i32 4, i32 5, i32 4, i32 0, i32 2} +!1 = !{%dx.types.LinAlgMatrixC4M4N5U1S2 undef, i32 4, i32 4, i32 5, i32 1, i32 2} +!2 = !{%dx.types.LinAlgMatrixC4M5N4U2S2 undef, i32 4, i32 5, i32 4, i32 2, i32 2} +!3 = !{!"dxc(private) 1.9.0.15241 (main, 1f63535ae)"} +!4 = !{i32 1, i32 10} +!5 = !{!"vs", i32 6, i32 10} +!6 = !{null, !7, null, null} +!7 = !{!8} +!8 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!9 = !{[3 x i32] [i32 1, i32 4, i32 0]} +!10 = !{void ()* @mainVS, !"mainVS", !11, !6, !18} +!11 = !{!12, !15, null} +!12 = !{!13} +!13 = !{i32 0, !"SV_VertexID", i8 5, i8 1, !14, i8 0, i32 1, i8 1, i32 0, i8 0, null} +!14 = !{i32 0} +!15 = !{!16} +!16 = !{i32 0, !"OUT", i8 9, i8 0, !14, i8 2, i32 1, i8 4, i32 0, i8 0, !17} +!17 = !{i32 3, i32 15} +!18 = !{i32 0, i64 8590000144} From f0872f435cdba01ee1272e7fc3b3b4ab90c49839 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Fri, 13 Mar 2026 12:57:50 -0700 Subject: [PATCH 6/7] Add last 3 ops --- .../LinAlgMatrix/linalgmatrix-as.ll | 20 +++++- .../LinAlgMatrix/linalgmatrix-cs.ll | 20 +++++- .../LinAlgMatrix/linalgmatrix-ds.ll | 23 ++++++- .../LinAlgMatrix/linalgmatrix-gs.ll | 23 ++++++- .../LinAlgMatrix/linalgmatrix-hs.ll | 23 ++++++- .../LinAlgMatrix/linalgmatrix-ms.ll | 20 +++++- .../LinAlgMatrix/linalgmatrix-node.ll | 22 ++++++- .../LinAlgMatrix/linalgmatrix-ps.ll | 23 ++++++- .../LinAlgMatrix/linalgmatrix-raytracing.ll | 63 +++++++++++++++---- .../LinAlgMatrix/linalgmatrix-vs.ll | 23 ++++++- 10 files changed, 238 insertions(+), 22 deletions(-) diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll index 3fa243952e..8295d09ba7 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll @@ -15,6 +15,8 @@ target triple = "dxil-ms-dx" %dx.types.ResourceProperties = type { i32, i32 } %struct.RWByteAddressBuffer = type { i32 } +@"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 + define void @mainAS() { %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) @@ -76,8 +78,15 @@ define void @mainAS() { ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - ; FIXME: 3 more ops coming soon + ; dx.op.linAlgMatrixAccumulateToMemory + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + + ; dx.op.linAlgMatrixLoadFromMemory + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + ; dx.op.linAlgMatrixStoreToMemory + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + %2 = alloca %struct.AmpPayload.0, align 8 call void @dx.op.dispatchMesh.struct.AmpPayload.0(i32 173, i32 8, i32 1, i32 1, %struct.AmpPayload.0* nonnull %2) ; DispatchMesh(threadGroupCountX,threadGroupCountY,threadGroupCountZ,payload) @@ -132,6 +141,15 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.m ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32, float addrspace(3)*, i32, i32, i32) #0 + ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll index 630ef3908a..68e4bf24a6 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll @@ -14,6 +14,8 @@ target triple = "dxil-ms-dx" %dx.types.ResourceProperties = type { i32, i32 } %struct.RWByteAddressBuffer = type { i32 } +@"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 + define void @mainCS() { %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) @@ -75,7 +77,14 @@ define void @mainCS() { ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - ; FIXME: 3 more ops coming soon + ; dx.op.linAlgMatrixAccumulateToMemory + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + + ; dx.op.linAlgMatrixLoadFromMemory + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + + ; dx.op.linAlgMatrixStoreToMemory + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) ret void } @@ -128,6 +137,15 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.m ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32, float addrspace(3)*, i32, i32, i32) #0 + ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll index 51da8f2a7d..6f29147319 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll @@ -11,6 +11,9 @@ ; CHECK: Function: MainDS: error: Opcode LinAlgMatrixGetElement not valid in shader model ds_6_10. ; CHECK: Function: MainDS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model ds_6_10. ; CHECK: Function: MainDS: error: Opcode LinAlgMatrixSetElement not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model ds_6_10. +; CHECK: Function: MainDS: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model ds_6_10. ; CHECK: Function: MainDS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. ; CHECK: Function: MainDS: error: Function uses features incompatible with the shader stage (ds) of the entry function. ; CHECK: Validation failed. @@ -27,6 +30,8 @@ target triple = "dxil-ms-dx" %dx.types.ResourceProperties = type { i32, i32 } %struct.RWByteAddressBuffer = type { i32 } +@"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 + define void @MainDS() { %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) @@ -88,7 +93,14 @@ define void @MainDS() { ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - ; FIXME: 3 more ops coming soon + ; dx.op.linAlgMatrixAccumulateToMemory + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + + ; dx.op.linAlgMatrixLoadFromMemory + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + + ; dx.op.linAlgMatrixStoreToMemory + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0) ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis) %3 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0) ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis) @@ -149,6 +161,15 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.m ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32, float addrspace(3)*, i32, i32, i32) #0 + ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll index f471d3c8c2..a56a3d1e0b 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll @@ -11,6 +11,9 @@ ; CHECK: Function: MainGS: error: Opcode LinAlgMatrixGetElement not valid in shader model gs_6_10. ; CHECK: Function: MainGS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model gs_6_10. ; CHECK: Function: MainGS: error: Opcode LinAlgMatrixSetElement not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model gs_6_10. +; CHECK: Function: MainGS: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model gs_6_10. ; CHECK: Function: MainGS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. ; CHECK: Function: MainGS: error: Function uses features incompatible with the shader stage (gs) of the entry function. ; CHECK: Validation failed. @@ -27,6 +30,8 @@ target triple = "dxil-ms-dx" %dx.types.ResourceProperties = type { i32, i32 } %struct.RWByteAddressBuffer = type { i32 } +@"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 + define void @MainGS() { %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) @@ -88,7 +93,14 @@ define void @MainGS() { ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - ; FIXME: 3 more ops coming soon + ; dx.op.linAlgMatrixAccumulateToMemory + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + + ; dx.op.linAlgMatrixLoadFromMemory + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + + ; dx.op.linAlgMatrixStoreToMemory + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.000000e+01) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+01) ; StoreOutput(outputSigId,rowIndex,colIndex,value) @@ -148,6 +160,15 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.m ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32, float addrspace(3)*, i32, i32, i32) #0 + ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll index a24cbf5cf7..6dcc3accfb 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll @@ -11,6 +11,9 @@ ; CHECK: Function: MainHS: error: Opcode LinAlgMatrixGetElement not valid in shader model hs_6_10. ; CHECK: Function: MainHS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model hs_6_10. ; CHECK: Function: MainHS: error: Opcode LinAlgMatrixSetElement not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model hs_6_10. +; CHECK: Function: MainHS: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model hs_6_10. ; CHECK: Function: MainHS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. ; CHECK: Function: MainHS: error: Function uses features incompatible with the shader stage (hs) of the entry function. ; CHECK: Validation failed. @@ -27,6 +30,8 @@ target triple = "dxil-ms-dx" %dx.types.ResourceProperties = type { i32, i32 } %struct.RWByteAddressBuffer = type { i32 } +@"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 + define void @MainHS() { %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) @@ -88,7 +93,14 @@ define void @MainHS() { ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - ; FIXME: 3 more ops coming soon + ; dx.op.linAlgMatrixAccumulateToMemory + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + + ; dx.op.linAlgMatrixLoadFromMemory + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + + ; dx.op.linAlgMatrixStoreToMemory + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) ret void } @@ -154,6 +166,15 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.m ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32, float addrspace(3)*, i32, i32, i32) #0 + ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll index 199a63ccf4..9f04cb0d46 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll @@ -14,6 +14,8 @@ target triple = "dxil-ms-dx" %dx.types.ResourceProperties = type { i32, i32 } %struct.RWByteAddressBuffer = type { i32 } +@"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 + define void @mainMeS() { %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) @@ -76,7 +78,14 @@ define void @mainMeS() { ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - ; FIXME: 3 more ops coming soon + ; dx.op.linAlgMatrixAccumulateToMemory + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + + ; dx.op.linAlgMatrixLoadFromMemory + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + + ; dx.op.linAlgMatrixStoreToMemory + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) call void @dx.op.setMeshOutputCounts(i32 168, i32 32, i32 16) ; SetMeshOutputCounts(numVertices,numPrimitives) call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 0, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) @@ -135,6 +144,15 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.m ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32, float addrspace(3)*, i32, i32, i32) #0 + ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll index b4280ba682..c77999c15c 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll @@ -11,6 +11,9 @@ ; CHECK: Function: mainNS: error: Opcode LinAlgMatrixGetElement not valid in shader model lib_6_10(node). ; CHECK: Function: mainNS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model lib_6_10(node). ; CHECK: Function: mainNS: error: Opcode LinAlgMatrixSetElement not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model lib_6_10(node). +; CHECK: Function: mainNS: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model lib_6_10(node). ; CHECK: Function: mainNS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. ; CHECK: Function: mainNS: error: Function uses features incompatible with the shader stage (node) of the entry function. ; CHECK: Validation failed. @@ -28,6 +31,7 @@ target triple = "dxil-ms-dx" %struct.RWByteAddressBuffer = type { i32 } @"\01?buf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 +@"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 define void @mainNS() { @@ -90,7 +94,14 @@ define void @mainNS() { ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - ; FIXME: 3 more ops coming soon + ; dx.op.linAlgMatrixAccumulateToMemory + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + + ; dx.op.linAlgMatrixLoadFromMemory + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + + ; dx.op.linAlgMatrixStoreToMemory + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) ret void } @@ -143,6 +154,15 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.m ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32, float addrspace(3)*, i32, i32, i32) #0 + ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll index 2e3a6ef71f..7b8072e0ce 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll @@ -11,6 +11,9 @@ ; CHECK: Function: mainPS: error: Opcode LinAlgMatrixGetElement not valid in shader model ps_6_10. ; CHECK: Function: mainPS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model ps_6_10. ; CHECK: Function: mainPS: error: Opcode LinAlgMatrixSetElement not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model ps_6_10. +; CHECK: Function: mainPS: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model ps_6_10. ; CHECK: Function: mainPS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. ; CHECK: Function: mainPS: error: Function uses features incompatible with the shader stage (ps) of the entry function. ; CHECK: Validation failed. @@ -26,6 +29,8 @@ target triple = "dxil-ms-dx" %dx.types.ResourceProperties = type { i32, i32 } %struct.RWByteAddressBuffer = type { i32 } +@"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 + define void @mainPS() { %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) @@ -87,7 +92,14 @@ define void @mainPS() { ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - ; FIXME: 3 more ops coming soon + ; dx.op.linAlgMatrixAccumulateToMemory + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + + ; dx.op.linAlgMatrixLoadFromMemory + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + + ; dx.op.linAlgMatrixStoreToMemory + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) @@ -145,6 +157,15 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.m ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32, float addrspace(3)*, i32, i32, i32) #0 + ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll index c627b5e4cc..14588f4b7f 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll @@ -71,6 +71,27 @@ ; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixSetElement not valid in shader model lib_6_10(intersection). ; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixSetElement not valid in shader model lib_6_10(raygeneration). +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model lib_6_10(raygeneration). + +; CHECK: Function: {{.*}}MainMS{{.*}}: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model lib_6_10(miss). +; CHECK: Function: {{.*}}MainCH{{.*}}: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model lib_6_10(closesthit). +; CHECK: Function: {{.*}}MainAH{{.*}}: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model lib_6_10(anyhit). +; CHECK: Function: {{.*}}MainCL{{.*}}: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model lib_6_10(callable). +; CHECK: Function: {{.*}}MainIS{{.*}}: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model lib_6_10(intersection). +; CHECK: Function: {{.*}}MainRG{{.*}}: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model lib_6_10(raygeneration). + ; CHECK: Function: {{.*}}MainRG{{.*}}: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. ; CHECK: Function: {{.*}}MainRG{{.*}}: error: Function uses features incompatible with the shader stage (raygeneration) of the entry function. @@ -103,6 +124,7 @@ target triple = "dxil-ms-dx" %struct.RWByteAddressBuffer = type { i32 } @"\01?buf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 +@"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 define void @"\01?MainRG@@YAXXZ"() #0 { @@ -132,8 +154,9 @@ define void @"\01?MainRG@@YAXXZ"() #0 { %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - - ; FIXME: 3 more ops coming soon + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) ret void } @@ -165,9 +188,10 @@ define void @"\01?MainIS@@YAXXZ"() #0 { %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - - ; FIXME: 3 more ops coming soon - + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) + ret void } @@ -198,9 +222,10 @@ define void @"\01?MainCL@@YAXUAttribs@@@Z"(%struct.Attribs* noalias nocapture %a %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - ; FIXME: 3 more ops coming soon - ret void } @@ -231,9 +256,10 @@ define void @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - ; FIXME: 3 more ops coming soon - ret void } @@ -264,9 +290,10 @@ define void @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - ; FIXME: 3 more ops coming soon - ret void } @@ -297,9 +324,10 @@ define void @"\01?MainMS@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapt %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - ; FIXME: 3 more ops coming soon - ret void } @@ -351,6 +379,15 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.m ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32, float addrspace(3)*, i32, i32, i32) #0 + ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll index 09a3753a6a..cacff8b532 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll @@ -11,6 +11,9 @@ ; CHECK: Function: mainVS: error: Opcode LinAlgMatrixGetElement not valid in shader model vs_6_10. ; CHECK: Function: mainVS: error: Opcode LinAlgMatrixMultiplyAccumulate not valid in shader model vs_6_10. ; CHECK: Function: mainVS: error: Opcode LinAlgMatrixSetElement not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixStoreToMemory not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixAccumulateToMemory not valid in shader model vs_6_10. +; CHECK: Function: mainVS: error: Opcode LinAlgMatrixLoadFromMemory not valid in shader model vs_6_10. ; CHECK: Function: mainVS: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. ; CHECK: Function: mainVS: error: Function uses features incompatible with the shader stage (vs) of the entry function. ; CHECK: Validation failed. @@ -27,6 +30,8 @@ target triple = "dxil-ms-dx" %dx.types.ResourceProperties = type { i32, i32 } %struct.RWByteAddressBuffer = type { i32 } +@"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 + define void @mainVS() { %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) @@ -88,7 +93,14 @@ define void @mainVS() { ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - ; FIXME: 3 more ops coming soon + ; dx.op.linAlgMatrixAccumulateToMemory + call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) + + ; dx.op.linAlgMatrixLoadFromMemory + %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) + + ; dx.op.linAlgMatrixStoreToMemory + call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) @@ -146,6 +158,15 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.m ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, float addrspace(3)*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32, float addrspace(3)*, i32, i32, i32) #0 + ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 From c64d0a347a01a4e9abc521db8964f8979a6271ef Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Tue, 17 Mar 2026 17:36:01 -0700 Subject: [PATCH 7/7] Fix merge (revert hctdb.py change) --- utils/hct/hctdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 74515d6a4a..e88834aa62 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -9601,7 +9601,7 @@ def __init__(self, intrinsic_defs, opcode_data): "out": "AR_QUAL_OUT", "col_major": "AR_QUAL_COLMAJOR", "row_major": "AR_QUAL_ROWMAJOR", - "groupshared": "AR_QUAL_IN | AR_QUAL_GROUPSHARED", + "groupshared": "AR_QUAL_GROUPSHARED", } self.intrinsics = [] self.load_intrinsics(intrinsic_defs)