From 3035d316c35289b68e8fc9d8cf21d86a204fb0e2 Mon Sep 17 00:00:00 2001 From: Chris B Date: Tue, 1 Apr 2025 12:10:06 -0500 Subject: [PATCH 01/19] Require CMake 3.17, remove CMP0051 (#7287) Hopefully this works and gets us able to build with CMake 4+. --- CMakeLists.txt | 9 --------- tools/clang/CMakeLists.txt | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 74244c1d58..0977fa1246 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,15 +17,6 @@ if(POLICY CMP0022) cmake_policy(SET CMP0022 NEW) # automatic when 2.8.12 is required endif() -if (POLICY CMP0051) - # CMake 3.1 and higher include generator expressions of the form - # $ in the SOURCES property. These need to be - # stripped everywhere that access the SOURCES property, so we just - # defer to the OLD behavior of not including generator expressions - # in the output for now. - cmake_policy(SET CMP0051 OLD) -endif() - if(CMAKE_VERSION VERSION_LESS 3.1.20141117) set(cmake_3_2_USES_TERMINAL) else() diff --git a/tools/clang/CMakeLists.txt b/tools/clang/CMakeLists.txt index 71190336ca..449e6c28b4 100644 --- a/tools/clang/CMakeLists.txt +++ b/tools/clang/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.8) +cmake_minimum_required(VERSION 3.17.2) # HLSL Change - Require CMake 3.17.2. # FIXME: It may be removed when we use 2.8.12. if(CMAKE_VERSION VERSION_LESS 2.8.12) From 30bfd82296a04f8302c949d79387b06fc37a31c6 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 1 Apr 2025 11:56:18 -0700 Subject: [PATCH 02/19] NFC: Infrastructure changes for DXIL op vector and multi-dim overloads (#7259) This change adds vector and multi-dimensional overload support for DXIL operations. Multi-dimensional (or "extended") overloads are added, where two or more types in a DXIL Op function signature may vary independently, such as both the return type and a parameter type. Until now, only one overload dimension has been necessary. For single-dim overloads, any number of parameters in a DXIL op may refer to this single overload type. For multi-dim overloads, each type that can vary must have a unique overload dimension, even when two or more types must be the same. This follows a pattern from llvm intrinsics. If two or more of the types need to be the same, this constraint must be handled manually, outside the automatic overload constraints defined by the DXIL op definitions. Vector overloads are also added, requiring an additional set of scalar overload types to define the allowed vector element types, on top of the original set describing the allowed scalar overloads for an operation, since both scalar and vector overloads may be allowed on the same operation. There are several components involved in handling DXIL operation overloads, with some changes: - DXIL Op definitions in `hctdb.py` use a string of characters to define the allowed overloads, and special type names used in parameter definitions that refer to the overload type. - Overload string syntax updated and more heavily validated. - `','` may separate dimensions for multi-dim overloads - `'<'` indicates that a vector overload is allowed, in which case, scalar components on the left indicate normal scalar overloads allowed, and scalar components on the right indicate the allowed vector element overloads. - If scalar overloads are present to the left, and omitted to the right, the scalar components are replicated to the right automatically. For instance: `"hf<"` is equivalent to `"hf}`. This makes it compatible with all these existing mechanisms without requiring an API overhaul impacting the broader code base. `GetExtendedOverloadType` is used to construct this type from multiple types. While updating `DxilOperations.h|cpp`, I noticed and removed some unused methods: `IsDxilOpTypeName`, `IsDxilOpType`, `IsDupDxilOpType`, `GetOriginalDxilOpType`. --------- Co-authored-by: Greg Roth --- include/dxc/DXIL/DxilConstants.h | 5 + include/dxc/DXIL/DxilOperations.h | 103 +- lib/DXIL/DxilOperations.cpp | 5884 ++++++++++++------------- lib/DxilValidation/DxilValidation.cpp | 17 +- utils/hct/hctdb.py | 170 +- utils/hct/hctdb_instrhelp.py | 142 +- 6 files changed, 3162 insertions(+), 3159 deletions(-) diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 0a9c6a4ffd..447728300b 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -155,6 +155,11 @@ const float kMinMipLodBias = -16.0f; const unsigned kResRetStatusIndex = 4; +/* hctdb_instrhelp.get_max_oload_dims()*/ +// OLOAD_DIMS-TEXT:BEGIN +const unsigned kDxilMaxOloadDims = 2; +// OLOAD_DIMS-TEXT:END + enum class ComponentType : uint32_t { Invalid = 0, I1, diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index e522e06204..05021ce789 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -57,12 +57,31 @@ class OP { // caches. void RefreshCache(); + // The single llvm::Type * "OverloadType" has one of these forms: + // No overloads (NumOverloadDims == 0): + // - TS_Void: VoidTy + // For single overload dimension (NumOverloadDims == 1): + // - TS_F*, TS_I*: a scalar numeric type (half, float, i1, i64, etc.), + // - TS_UDT: a pointer to a StructType representing a User Defined Type, + // - TS_Object: a named StructType representing a built-in object, or + // - TS_Vector: a vector type (<4 x float>, <16 x i16>, etc.) + // For multiple overload dimensions (TS_Extended, NumOverloadDims > 1): + // - an unnamed StructType containing each type for the corresponding + // dimension, such as: type { i32, <2 x float> } + // - contained type options are the same as for single dimension. + llvm::Function *GetOpFunc(OpCode OpCode, llvm::Type *pOverloadType); + + // N-dimension convenience version of GetOpFunc: + llvm::Function *GetOpFunc(OpCode OpCode, + llvm::ArrayRef OverloadTypes); + const llvm::SmallMapVector & GetOpFuncList(OpCode OpCode) const; bool IsDxilOpUsed(OpCode opcode) const; void RemoveFunction(llvm::Function *F); llvm::LLVMContext &GetCtx() { return m_Ctx; } + llvm::Module *GetModule() { return m_pModule; } llvm::Type *GetHandleType() const; llvm::Type *GetHitObjectType() const; llvm::Type *GetNodeHandleType() const; @@ -81,9 +100,14 @@ class OP { llvm::Type *GetResRetType(llvm::Type *pOverloadType); llvm::Type *GetCBufferRetType(llvm::Type *pOverloadType); - llvm::Type *GetVectorType(unsigned numElements, llvm::Type *pOverloadType); + llvm::Type *GetStructVectorType(unsigned numElements, + llvm::Type *pOverloadType); bool IsResRetType(llvm::Type *Ty); + // Construct an unnamed struct type containing the set of member types. + llvm::StructType * + GetExtendedOverloadType(llvm::ArrayRef OverloadTypes); + // Try to get the opcode class for a function. // Return true and set `opClass` if the given function is a dxil function. // Return false if the given function is not a dxil function. @@ -128,11 +152,6 @@ class OP { static bool BarrierRequiresGroup(const llvm::CallInst *CI); static bool BarrierRequiresNode(const llvm::CallInst *CI); static DXIL::BarrierMode TranslateToBarrierMode(const llvm::CallInst *CI); - static bool IsDxilOpTypeName(llvm::StringRef name); - static bool IsDxilOpType(llvm::StructType *ST); - static bool IsDupDxilOpType(llvm::StructType *ST); - static llvm::StructType *GetOriginalDxilOpType(llvm::StructType *ST, - llvm::Module &M); static void GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, unsigned &major, unsigned &minor, unsigned &mask); @@ -141,6 +160,13 @@ class OP { unsigned valMinor, unsigned &major, unsigned &minor, unsigned &mask); + static bool IsDxilOpExtendedOverload(OpCode C); + + // Return true if the overload name suffix for this operation may be + // constructed based on a user-defined or user-influenced type name + // that may not represent the same type in different linked modules. + static bool MayHaveNonCanonicalOverload(OpCode OC); + private: // Per-module properties. llvm::LLVMContext &m_Ctx; @@ -164,13 +190,33 @@ class OP { DXIL::LowPrecisionMode m_LowPrecisionMode; - static const unsigned kUserDefineTypeSlot = 9; - static const unsigned kObjectTypeSlot = 10; - static const unsigned kNumTypeOverloads = - 11; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj + // Overload types are split into "basic" overload types and special types + // Basic: void, half, float, double, i1, i8, i16, i32, i64 + // - These have one canonical overload per TypeSlot + // Special: udt, obj, vec, extended + // - These may have many overloads per type slot + enum TypeSlot : unsigned { + TS_F16 = 0, + TS_F32 = 1, + TS_F64 = 2, + TS_I1 = 3, + TS_I8 = 4, + TS_I16 = 5, + TS_I32 = 6, + TS_I64 = 7, + TS_BasicCount, + TS_UDT = 8, // Ex: %"struct.MyStruct" * + TS_Object = 9, // Ex: %"class.StructuredBuffer" + TS_Vector = 10, // Ex: <8 x i16> + TS_MaskBitCount, // Types used in Mask end here + // TS_Extended is only used to identify the unnamed struct type used to wrap + // multiple overloads when using GetTypeSlot. + TS_Extended, // Ex: type { float, <16 x i32> } + TS_Invalid = UINT_MAX, + }; - llvm::Type *m_pResRetType[kNumTypeOverloads]; - llvm::Type *m_pCBufferRetType[kNumTypeOverloads]; + llvm::Type *m_pResRetType[TS_BasicCount]; + llvm::Type *m_pCBufferRetType[TS_BasicCount]; struct OpCodeCacheItem { llvm::SmallMapVector pOverloads; @@ -181,27 +227,46 @@ class OP { private: // Static properties. + struct OverloadMask { + // mask of type slot bits as (1 << TypeSlot) + uint16_t SlotMask; + static_assert(TS_MaskBitCount <= (sizeof(SlotMask) * 8)); + bool operator[](unsigned TypeSlot) const { + return (TypeSlot < TS_MaskBitCount) ? (bool)(SlotMask & (1 << TypeSlot)) + : 0; + } + operator bool() const { return SlotMask != 0; } + }; struct OpCodeProperty { OpCode opCode; const char *pOpCodeName; OpCodeClass opCodeClass; const char *pOpCodeClassName; - bool bAllowOverload[kNumTypeOverloads]; // void, h,f,d, i1, i8,i16,i32,i64, - // udt llvm::Attribute::AttrKind FuncAttr; + + // Number of overload dimensions used by the operation. + unsigned int NumOverloadDims; + + // Mask of supported overload types for each overload dimension. + OverloadMask AllowedOverloads[DXIL::kDxilMaxOloadDims]; + + // Mask of scalar components allowed for each demension where + // AllowedOverloads[n][TS_Vector] is true. + OverloadMask AllowedVectorElements[DXIL::kDxilMaxOloadDims]; }; static const OpCodeProperty m_OpCodeProps[(unsigned)OpCode::NumOpCodes]; - static const char *m_OverloadTypeName[kNumTypeOverloads]; + static const char *m_OverloadTypeName[TS_BasicCount]; static const char *m_NamePrefix; static const char *m_TypePrefix; static const char *m_MatrixTypePrefix; static unsigned GetTypeSlot(llvm::Type *pType); static const char *GetOverloadTypeName(unsigned TypeSlot); - static llvm::StringRef GetTypeName(llvm::Type *Ty, std::string &str); - static llvm::StringRef ConstructOverloadName(llvm::Type *Ty, - DXIL::OpCode opCode, - std::string &funcNameStorage); + static llvm::StringRef GetTypeName(llvm::Type *Ty, + llvm::SmallVectorImpl &Storage); + static llvm::StringRef + ConstructOverloadName(llvm::Type *Ty, DXIL::OpCode opCode, + llvm::SmallVectorImpl &Storage); }; } // namespace hlsl diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 86049fee9c..56cdd0d04f 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -23,8 +23,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -using std::string; -using std::vector; namespace hlsl { @@ -41,2989 +39,2605 @@ import hctdb_instrhelp /* hctdb_instrhelp.get_oloads_props()*/ // OPCODE-OLOADS:BEGIN const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { - // OpCode OpCode name, OpCodeClass - // OpCodeClass name, void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj, function attribute - // Temporary, indexable, input, output registers void, h, f, d, - // i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::TempRegLoad, - "TempRegLoad", - OCC::TempRegLoad, - "tempRegLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TempRegStore, - "TempRegStore", - OCC::TempRegStore, - "tempRegStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::MinPrecXRegLoad, - "MinPrecXRegLoad", - OCC::MinPrecXRegLoad, - "minPrecXRegLoad", - {false, true, false, false, false, false, true, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::MinPrecXRegStore, - "MinPrecXRegStore", - OCC::MinPrecXRegStore, - "minPrecXRegStore", - {false, true, false, false, false, false, true, false, false, false, - false}, - Attribute::None, - }, - { - OC::LoadInput, - "LoadInput", - OCC::LoadInput, - "loadInput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::StoreOutput, - "StoreOutput", - OCC::StoreOutput, - "storeOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - - // Unary float void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FAbs, - "FAbs", - OCC::Unary, - "unary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Saturate, - "Saturate", - OCC::Unary, - "unary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsNaN, - "IsNaN", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsInf, - "IsInf", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsFinite, - "IsFinite", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsNormal, - "IsNormal", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Cos, - "Cos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Sin, - "Sin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Tan, - "Tan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Acos, - "Acos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Asin, - "Asin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Atan, - "Atan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Hcos, - "Hcos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Hsin, - "Hsin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Htan, - "Htan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Exp, - "Exp", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Frc, - "Frc", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Log, - "Log", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Sqrt, - "Sqrt", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Rsqrt, - "Rsqrt", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Unary float - rounding void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::Round_ne, - "Round_ne", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_ni, - "Round_ni", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_pi, - "Round_pi", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_z, - "Round_z", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Unary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Bfrev, - "Bfrev", - OCC::Unary, - "unary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::Countbits, - "Countbits", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::FirstbitLo, - "FirstbitLo", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Unary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FirstbitHi, - "FirstbitHi", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Unary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FirstbitSHi, - "FirstbitSHi", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Binary float void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FMax, - "FMax", - OCC::Binary, - "binary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::FMin, - "FMin", - OCC::Binary, - "binary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Binary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IMax, - "IMax", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::IMin, - "IMin", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Binary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::UMax, - "UMax", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::UMin, - "UMin", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Binary int with two outputs void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::IMul, - "IMul", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Binary uint with two outputs void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::UMul, - "UMul", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::UDiv, - "UDiv", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Binary uint with carry or borrow void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::UAddc, - "UAddc", - OCC::BinaryWithCarryOrBorrow, - "binaryWithCarryOrBorrow", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::USubb, - "USubb", - OCC::BinaryWithCarryOrBorrow, - "binaryWithCarryOrBorrow", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary float void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::FMad, - "FMad", - OCC::Tertiary, - "tertiary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Fma, - "Fma", - OCC::Tertiary, - "tertiary", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IMad, - "IMad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::UMad, - "UMad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Msad, - "Msad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::Ibfe, - "Ibfe", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Ubfe, - "Ubfe", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Quaternary void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Bfi, - "Bfi", - OCC::Quaternary, - "quaternary", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Dot void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::Dot2, - "Dot2", - OCC::Dot2, - "dot2", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot3, - "Dot3", - OCC::Dot3, - "dot3", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4, - "Dot4", - OCC::Dot4, - "dot4", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::CreateHandle, - "CreateHandle", - OCC::CreateHandle, - "createHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::CBufferLoad, - "CBufferLoad", - OCC::CBufferLoad, - "cbufferLoad", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::ReadOnly, - }, - { - OC::CBufferLoadLegacy, - "CBufferLoadLegacy", - OCC::CBufferLoadLegacy, - "cbufferLoadLegacy", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::ReadOnly, - }, - - // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Sample, - "Sample", - OCC::Sample, - "sample", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleBias, - "SampleBias", - OCC::SampleBias, - "sampleBias", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleLevel, - "SampleLevel", - OCC::SampleLevel, - "sampleLevel", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleGrad, - "SampleGrad", - OCC::SampleGrad, - "sampleGrad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmp, - "SampleCmp", - OCC::SampleCmp, - "sampleCmp", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmpLevelZero, - "SampleCmpLevelZero", - OCC::SampleCmpLevelZero, - "sampleCmpLevelZero", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::TextureLoad, - "TextureLoad", - OCC::TextureLoad, - "textureLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TextureStore, - "TextureStore", - OCC::TextureStore, - "textureStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::BufferLoad, - "BufferLoad", - OCC::BufferLoad, - "bufferLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::BufferStore, - "BufferStore", - OCC::BufferStore, - "bufferStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::BufferUpdateCounter, - "BufferUpdateCounter", - OCC::BufferUpdateCounter, - "bufferUpdateCounter", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::CheckAccessFullyMapped, - "CheckAccessFullyMapped", - OCC::CheckAccessFullyMapped, - "checkAccessFullyMapped", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::GetDimensions, - "GetDimensions", - OCC::GetDimensions, - "getDimensions", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources - gather void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::TextureGather, - "TextureGather", - OCC::TextureGather, - "textureGather", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TextureGatherCmp, - "TextureGatherCmp", - OCC::TextureGatherCmp, - "textureGatherCmp", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Texture2DMSGetSamplePosition, - "Texture2DMSGetSamplePosition", - OCC::Texture2DMSGetSamplePosition, - "texture2DMSGetSamplePosition", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RenderTargetGetSamplePosition, - "RenderTargetGetSamplePosition", - OCC::RenderTargetGetSamplePosition, - "renderTargetGetSamplePosition", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RenderTargetGetSampleCount, - "RenderTargetGetSampleCount", - OCC::RenderTargetGetSampleCount, - "renderTargetGetSampleCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Synchronization void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AtomicBinOp, - "AtomicBinOp", - OCC::AtomicBinOp, - "atomicBinOp", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::None, - }, - { - OC::AtomicCompareExchange, - "AtomicCompareExchange", - OCC::AtomicCompareExchange, - "atomicCompareExchange", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::None, - }, - { - OC::Barrier, - "Barrier", - OCC::Barrier, - "barrier", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - - // Derivatives void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::CalculateLOD, - "CalculateLOD", - OCC::CalculateLOD, - "calculateLOD", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Discard, - "Discard", - OCC::Discard, - "discard", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Derivatives void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::DerivCoarseX, - "DerivCoarseX", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivCoarseY, - "DerivCoarseY", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivFineX, - "DerivFineX", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivFineY, - "DerivFineY", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::EvalSnapped, - "EvalSnapped", - OCC::EvalSnapped, - "evalSnapped", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::EvalSampleIndex, - "EvalSampleIndex", - OCC::EvalSampleIndex, - "evalSampleIndex", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::EvalCentroid, - "EvalCentroid", - OCC::EvalCentroid, - "evalCentroid", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::SampleIndex, - "SampleIndex", - OCC::SampleIndex, - "sampleIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Coverage, - "Coverage", - OCC::Coverage, - "coverage", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::InnerCoverage, - "InnerCoverage", - OCC::InnerCoverage, - "innerCoverage", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Compute/Mesh/Amplification/Node shader void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::ThreadId, - "ThreadId", - OCC::ThreadId, - "threadId", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::GroupId, - "GroupId", - OCC::GroupId, - "groupId", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::ThreadIdInGroup, - "ThreadIdInGroup", - OCC::ThreadIdInGroup, - "threadIdInGroup", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::FlattenedThreadIdInGroup, - "FlattenedThreadIdInGroup", - OCC::FlattenedThreadIdInGroup, - "flattenedThreadIdInGroup", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Geometry shader void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::EmitStream, - "EmitStream", - OCC::EmitStream, - "emitStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::CutStream, - "CutStream", - OCC::CutStream, - "cutStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::EmitThenCutStream, - "EmitThenCutStream", - OCC::EmitThenCutStream, - "emitThenCutStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GSInstanceID, - "GSInstanceID", - OCC::GSInstanceID, - "gsInstanceID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Double precision void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::MakeDouble, - "MakeDouble", - OCC::MakeDouble, - "makeDouble", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::SplitDouble, - "SplitDouble", - OCC::SplitDouble, - "splitDouble", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Domain and hull shader void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::LoadOutputControlPoint, - "LoadOutputControlPoint", - OCC::LoadOutputControlPoint, - "loadOutputControlPoint", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LoadPatchConstant, - "LoadPatchConstant", - OCC::LoadPatchConstant, - "loadPatchConstant", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Domain shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::DomainLocation, - "DomainLocation", - OCC::DomainLocation, - "domainLocation", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Hull shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::StorePatchConstant, - "StorePatchConstant", - OCC::StorePatchConstant, - "storePatchConstant", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::OutputControlPointID, - "OutputControlPointID", - OCC::OutputControlPointID, - "outputControlPointID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Hull, Domain and Geometry shaders void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::PrimitiveID, - "PrimitiveID", - OCC::PrimitiveID, - "primitiveID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Other void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::CycleCounterLegacy, - "CycleCounterLegacy", - OCC::CycleCounterLegacy, - "cycleCounterLegacy", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveIsFirstLane, - "WaveIsFirstLane", - OCC::WaveIsFirstLane, - "waveIsFirstLane", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveGetLaneIndex, - "WaveGetLaneIndex", - OCC::WaveGetLaneIndex, - "waveGetLaneIndex", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::WaveGetLaneCount, - "WaveGetLaneCount", - OCC::WaveGetLaneCount, - "waveGetLaneCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WaveAnyTrue, - "WaveAnyTrue", - OCC::WaveAnyTrue, - "waveAnyTrue", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveAllTrue, - "WaveAllTrue", - OCC::WaveAllTrue, - "waveAllTrue", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveActiveAllEqual, - "WaveActiveAllEqual", - OCC::WaveActiveAllEqual, - "waveActiveAllEqual", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveBallot, - "WaveActiveBallot", - OCC::WaveActiveBallot, - "waveActiveBallot", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveReadLaneAt, - "WaveReadLaneAt", - OCC::WaveReadLaneAt, - "waveReadLaneAt", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveReadLaneFirst, - "WaveReadLaneFirst", - OCC::WaveReadLaneFirst, - "waveReadLaneFirst", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveOp, - "WaveActiveOp", - OCC::WaveActiveOp, - "waveActiveOp", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveBit, - "WaveActiveBit", - OCC::WaveActiveBit, - "waveActiveBit", - {false, false, false, false, false, true, true, true, true, false, - false}, - Attribute::None, - }, - { - OC::WavePrefixOp, - "WavePrefixOp", - OCC::WavePrefixOp, - "wavePrefixOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - - // Quad Wave Ops void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::QuadReadLaneAt, - "QuadReadLaneAt", - OCC::QuadReadLaneAt, - "quadReadLaneAt", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::QuadOp, - "QuadOp", - OCC::QuadOp, - "quadOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - - // Bitcasts with different sizes void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::BitcastI16toF16, - "BitcastI16toF16", - OCC::BitcastI16toF16, - "bitcastI16toF16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF16toI16, - "BitcastF16toI16", - OCC::BitcastF16toI16, - "bitcastF16toI16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastI32toF32, - "BitcastI32toF32", - OCC::BitcastI32toF32, - "bitcastI32toF32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF32toI32, - "BitcastF32toI32", - OCC::BitcastF32toI32, - "bitcastF32toI32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastI64toF64, - "BitcastI64toF64", - OCC::BitcastI64toF64, - "bitcastI64toF64", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF64toI64, - "BitcastF64toI64", - OCC::BitcastF64toI64, - "bitcastF64toI64", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Legacy floating-point void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::LegacyF32ToF16, - "LegacyF32ToF16", - OCC::LegacyF32ToF16, - "legacyF32ToF16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyF16ToF32, - "LegacyF16ToF32", - OCC::LegacyF16ToF32, - "legacyF16ToF32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Double precision void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::LegacyDoubleToFloat, - "LegacyDoubleToFloat", - OCC::LegacyDoubleToFloat, - "legacyDoubleToFloat", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyDoubleToSInt32, - "LegacyDoubleToSInt32", - OCC::LegacyDoubleToSInt32, - "legacyDoubleToSInt32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyDoubleToUInt32, - "LegacyDoubleToUInt32", - OCC::LegacyDoubleToUInt32, - "legacyDoubleToUInt32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveAllBitCount, - "WaveAllBitCount", - OCC::WaveAllOp, - "waveAllOp", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WavePrefixBitCount, - "WavePrefixBitCount", - OCC::WavePrefixOp, - "wavePrefixOp", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::AttributeAtVertex, - "AttributeAtVertex", - OCC::AttributeAtVertex, - "attributeAtVertex", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Graphics shader void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::ViewID, - "ViewID", - OCC::ViewID, - "viewID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::RawBufferLoad, - "RawBufferLoad", - OCC::RawBufferLoad, - "rawBufferLoad", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::ReadOnly, - }, - { - OC::RawBufferStore, - "RawBufferStore", - OCC::RawBufferStore, - "rawBufferStore", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::None, - }, - - // Raytracing object space uint System Values void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::InstanceID, - "InstanceID", - OCC::InstanceID, - "instanceID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::InstanceIndex, - "InstanceIndex", - OCC::InstanceIndex, - "instanceIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Raytracing hit uint System Values void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::HitKind, - "HitKind", - OCC::HitKind, - "hitKind", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Raytracing uint System Values void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::RayFlags, - "RayFlags", - OCC::RayFlags, - "rayFlags", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray Dispatch Arguments void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::DispatchRaysIndex, - "DispatchRaysIndex", - OCC::DispatchRaysIndex, - "dispatchRaysIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DispatchRaysDimensions, - "DispatchRaysDimensions", - OCC::DispatchRaysDimensions, - "dispatchRaysDimensions", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray Vectors void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::WorldRayOrigin, - "WorldRayOrigin", - OCC::WorldRayOrigin, - "worldRayOrigin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WorldRayDirection, - "WorldRayDirection", - OCC::WorldRayDirection, - "worldRayDirection", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray object space Vectors void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::ObjectRayOrigin, - "ObjectRayOrigin", - OCC::ObjectRayOrigin, - "objectRayOrigin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::ObjectRayDirection, - "ObjectRayDirection", - OCC::ObjectRayDirection, - "objectRayDirection", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray Transforms void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::ObjectToWorld, - "ObjectToWorld", - OCC::ObjectToWorld, - "objectToWorld", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WorldToObject, - "WorldToObject", - OCC::WorldToObject, - "worldToObject", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // RayT void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::RayTMin, - "RayTMin", - OCC::RayTMin, - "rayTMin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::RayTCurrent, - "RayTCurrent", - OCC::RayTCurrent, - "rayTCurrent", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // AnyHit Terminals void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::IgnoreHit, - "IgnoreHit", - OCC::IgnoreHit, - "ignoreHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoReturn, - }, - { - OC::AcceptHitAndEndSearch, - "AcceptHitAndEndSearch", - OCC::AcceptHitAndEndSearch, - "acceptHitAndEndSearch", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoReturn, - }, - - // Indirect Shader Invocation void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::TraceRay, - "TraceRay", - OCC::TraceRay, - "traceRay", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - { - OC::ReportHit, - "ReportHit", - OCC::ReportHit, - "reportHit", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - { - OC::CallShader, - "CallShader", - OCC::CallShader, - "callShader", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - - // Library create handle from resource struct (like HL intrinsic) void, h, - // f, d, i1, i8, i16, i32, i64, udt, obj , function - // attribute - { - OC::CreateHandleForLib, - "CreateHandleForLib", - OCC::CreateHandleForLib, - "createHandleForLib", - {false, false, false, false, false, false, false, false, false, false, - true}, - Attribute::ReadOnly, - }, - - // Raytracing object space uint System Values void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::PrimitiveIndex, - "PrimitiveIndex", - OCC::PrimitiveIndex, - "primitiveIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Dot product with accumulate void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::Dot2AddHalf, - "Dot2AddHalf", - OCC::Dot2AddHalf, - "dot2AddHalf", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4AddI8Packed, - "Dot4AddI8Packed", - OCC::Dot4AddPacked, - "dot4AddPacked", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4AddU8Packed, - "Dot4AddU8Packed", - OCC::Dot4AddPacked, - "dot4AddPacked", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveMatch, - "WaveMatch", - OCC::WaveMatch, - "waveMatch", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveMultiPrefixOp, - "WaveMultiPrefixOp", - OCC::WaveMultiPrefixOp, - "waveMultiPrefixOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveMultiPrefixBitCount, - "WaveMultiPrefixBitCount", - OCC::WaveMultiPrefixBitCount, - "waveMultiPrefixBitCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Mesh shader instructions void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::SetMeshOutputCounts, - "SetMeshOutputCounts", - OCC::SetMeshOutputCounts, - "setMeshOutputCounts", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::EmitIndices, - "EmitIndices", - OCC::EmitIndices, - "emitIndices", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GetMeshPayload, - "GetMeshPayload", - OCC::GetMeshPayload, - "getMeshPayload", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::ReadOnly, - }, - { - OC::StoreVertexOutput, - "StoreVertexOutput", - OCC::StoreVertexOutput, - "storeVertexOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::StorePrimitiveOutput, - "StorePrimitiveOutput", - OCC::StorePrimitiveOutput, - "storePrimitiveOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - - // Amplification shader instructions void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::DispatchMesh, - "DispatchMesh", - OCC::DispatchMesh, - "dispatchMesh", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - - // Sampler Feedback void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::WriteSamplerFeedback, - "WriteSamplerFeedback", - OCC::WriteSamplerFeedback, - "writeSamplerFeedback", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackBias, - "WriteSamplerFeedbackBias", - OCC::WriteSamplerFeedbackBias, - "writeSamplerFeedbackBias", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackLevel, - "WriteSamplerFeedbackLevel", - OCC::WriteSamplerFeedbackLevel, - "writeSamplerFeedbackLevel", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackGrad, - "WriteSamplerFeedbackGrad", - OCC::WriteSamplerFeedbackGrad, - "writeSamplerFeedbackGrad", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AllocateRayQuery, - "AllocateRayQuery", - OCC::AllocateRayQuery, - "allocateRayQuery", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_TraceRayInline, - "RayQuery_TraceRayInline", - OCC::RayQuery_TraceRayInline, - "rayQuery_TraceRayInline", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_Proceed, - "RayQuery_Proceed", - OCC::RayQuery_Proceed, - "rayQuery_Proceed", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_Abort, - "RayQuery_Abort", - OCC::RayQuery_Abort, - "rayQuery_Abort", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommitNonOpaqueTriangleHit, - "RayQuery_CommitNonOpaqueTriangleHit", - OCC::RayQuery_CommitNonOpaqueTriangleHit, - "rayQuery_CommitNonOpaqueTriangleHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommitProceduralPrimitiveHit, - "RayQuery_CommitProceduralPrimitiveHit", - OCC::RayQuery_CommitProceduralPrimitiveHit, - "rayQuery_CommitProceduralPrimitiveHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommittedStatus, - "RayQuery_CommittedStatus", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateType, - "RayQuery_CandidateType", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectToWorld3x4, - "RayQuery_CandidateObjectToWorld3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateWorldToObject3x4, - "RayQuery_CandidateWorldToObject3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectToWorld3x4, - "RayQuery_CommittedObjectToWorld3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedWorldToObject3x4, - "RayQuery_CommittedWorldToObject3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateProceduralPrimitiveNonOpaque, - "RayQuery_CandidateProceduralPrimitiveNonOpaque", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleFrontFace, - "RayQuery_CandidateTriangleFrontFace", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedTriangleFrontFace, - "RayQuery_CommittedTriangleFrontFace", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleBarycentrics, - "RayQuery_CandidateTriangleBarycentrics", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedTriangleBarycentrics, - "RayQuery_CommittedTriangleBarycentrics", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_RayFlags, - "RayQuery_RayFlags", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_WorldRayOrigin, - "RayQuery_WorldRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_WorldRayDirection, - "RayQuery_WorldRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_RayTMin, - "RayQuery_RayTMin", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleRayT, - "RayQuery_CandidateTriangleRayT", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedRayT, - "RayQuery_CommittedRayT", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateInstanceIndex, - "RayQuery_CandidateInstanceIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateInstanceID, - "RayQuery_CandidateInstanceID", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateGeometryIndex, - "RayQuery_CandidateGeometryIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidatePrimitiveIndex, - "RayQuery_CandidatePrimitiveIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectRayOrigin, - "RayQuery_CandidateObjectRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectRayDirection, - "RayQuery_CandidateObjectRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceIndex, - "RayQuery_CommittedInstanceIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceID, - "RayQuery_CommittedInstanceID", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedGeometryIndex, - "RayQuery_CommittedGeometryIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedPrimitiveIndex, - "RayQuery_CommittedPrimitiveIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectRayOrigin, - "RayQuery_CommittedObjectRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectRayDirection, - "RayQuery_CommittedObjectRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Raytracing object space uint System Values, raytracing tier 1.1 void, h, - // f, d, i1, i8, i16, i32, i64, udt, obj , function - // attribute - { - OC::GeometryIndex, - "GeometryIndex", - OCC::GeometryIndex, - "geometryIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::RayQuery_CandidateInstanceContributionToHitGroupIndex, - "RayQuery_CandidateInstanceContributionToHitGroupIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceContributionToHitGroupIndex, - "RayQuery_CommittedInstanceContributionToHitGroupIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - - // Get handle from heap void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AnnotateHandle, - "AnnotateHandle", - OCC::AnnotateHandle, - "annotateHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateHandleFromBinding, - "CreateHandleFromBinding", - OCC::CreateHandleFromBinding, - "createHandleFromBinding", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateHandleFromHeap, - "CreateHandleFromHeap", - OCC::CreateHandleFromHeap, - "createHandleFromHeap", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Unpacking intrinsics void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Unpack4x8, - "Unpack4x8", - OCC::Unpack4x8, - "unpack4x8", - {false, false, false, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Packing intrinsics void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Pack4x8, - "Pack4x8", - OCC::Pack4x8, - "pack4x8", - {false, false, false, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Helper Lanes void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IsHelperLane, - "IsHelperLane", - OCC::IsHelperLane, - "isHelperLane", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Quad Wave Ops void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::QuadVote, - "QuadVote", - OCC::QuadVote, - "quadVote", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Resources - gather void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::TextureGatherRaw, - "TextureGatherRaw", - OCC::TextureGatherRaw, - "textureGatherRaw", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadOnly, - }, - - // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::SampleCmpLevel, - "SampleCmpLevel", - OCC::SampleCmpLevel, - "sampleCmpLevel", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::TextureStoreSample, - "TextureStoreSample", - OCC::TextureStoreSample, - "textureStoreSample", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - - // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::Reserved0, - "Reserved0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved1, - "Reserved1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved2, - "Reserved2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved3, - "Reserved3", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved4, - "Reserved4", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved5, - "Reserved5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved6, - "Reserved6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved7, - "Reserved7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved8, - "Reserved8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved9, - "Reserved9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved10, - "Reserved10", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved11, - "Reserved11", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Create/Annotate Node Handles void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::AllocateNodeOutputRecords, - "AllocateNodeOutputRecords", - OCC::AllocateNodeOutputRecords, - "allocateNodeOutputRecords", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Get Pointer to Node Record in Address Space 6 void, h, f, d, - // i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::GetNodeRecordPtr, - "GetNodeRecordPtr", - OCC::GetNodeRecordPtr, - "getNodeRecordPtr", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::ReadNone, - }, - - // Work Graph intrinsics void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::IncrementOutputCount, - "IncrementOutputCount", - OCC::IncrementOutputCount, - "incrementOutputCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::OutputComplete, - "OutputComplete", - OCC::OutputComplete, - "outputComplete", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GetInputRecordCount, - "GetInputRecordCount", - OCC::GetInputRecordCount, - "getInputRecordCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::FinishedCrossGroupSharing, - "FinishedCrossGroupSharing", - OCC::FinishedCrossGroupSharing, - "finishedCrossGroupSharing", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Synchronization void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::BarrierByMemoryType, - "BarrierByMemoryType", - OCC::BarrierByMemoryType, - "barrierByMemoryType", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - { - OC::BarrierByMemoryHandle, - "BarrierByMemoryHandle", - OCC::BarrierByMemoryHandle, - "barrierByMemoryHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - { - OC::BarrierByNodeRecordHandle, - "BarrierByNodeRecordHandle", - OCC::BarrierByNodeRecordHandle, - "barrierByNodeRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - - // Create/Annotate Node Handles void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::CreateNodeOutputHandle, - "CreateNodeOutputHandle", - OCC::createNodeOutputHandle, - "createNodeOutputHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IndexNodeHandle, - "IndexNodeHandle", - OCC::IndexNodeHandle, - "indexNodeHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::AnnotateNodeHandle, - "AnnotateNodeHandle", - OCC::AnnotateNodeHandle, - "annotateNodeHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateNodeInputRecordHandle, - "CreateNodeInputRecordHandle", - OCC::CreateNodeInputRecordHandle, - "createNodeInputRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::AnnotateNodeRecordHandle, - "AnnotateNodeRecordHandle", - OCC::AnnotateNodeRecordHandle, - "annotateNodeRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Work Graph intrinsics void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::NodeOutputIsValid, - "NodeOutputIsValid", - OCC::NodeOutputIsValid, - "nodeOutputIsValid", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::GetRemainingRecursionLevels, - "GetRemainingRecursionLevels", - OCC::GetRemainingRecursionLevels, - "getRemainingRecursionLevels", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Comparison Samples void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::SampleCmpGrad, - "SampleCmpGrad", - OCC::SampleCmpGrad, - "sampleCmpGrad", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmpBias, - "SampleCmpBias", - OCC::SampleCmpBias, - "sampleCmpBias", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Extended Command Information void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::StartVertexLocation, - "StartVertexLocation", - OCC::StartVertexLocation, - "startVertexLocation", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::StartInstanceLocation, - "StartInstanceLocation", - OCC::StartInstanceLocation, - "startInstanceLocation", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AllocateRayQuery2, - "AllocateRayQuery2", - OCC::AllocateRayQuery2, - "allocateRayQuery2", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::ReservedA0, - "ReservedA0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedA1, - "ReservedA1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedA2, - "ReservedA2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB0, - "ReservedB0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB1, - "ReservedB1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB2, - "ReservedB2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Shader Execution Reordering void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::HitObject_MakeMiss, - "HitObject_MakeMiss", - OCC::HitObject_MakeMiss, - "hitObject_MakeMiss", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::HitObject_MakeNop, - "HitObject_MakeNop", - OCC::HitObject_MakeNop, - "hitObject_MakeNop", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::ReservedB5, - "ReservedB5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB6, - "ReservedB6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB7, - "ReservedB7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB8, - "ReservedB8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB9, - "ReservedB9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB10, - "ReservedB10", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB11, - "ReservedB11", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB12, - "ReservedB12", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB13, - "ReservedB13", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB14, - "ReservedB14", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB15, - "ReservedB15", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB16, - "ReservedB16", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB17, - "ReservedB17", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB18, - "ReservedB18", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB19, - "ReservedB19", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB20, - "ReservedB20", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB21, - "ReservedB21", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB22, - "ReservedB22", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB23, - "ReservedB23", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB24, - "ReservedB24", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB25, - "ReservedB25", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB26, - "ReservedB26", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB27, - "ReservedB27", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB28, - "ReservedB28", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB29, - "ReservedB29", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB30, - "ReservedB30", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC0, - "ReservedC0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC1, - "ReservedC1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC2, - "ReservedC2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC3, - "ReservedC3", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC4, - "ReservedC4", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC5, - "ReservedC5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC6, - "ReservedC6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC7, - "ReservedC7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC8, - "ReservedC8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC9, - "ReservedC9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // Temporary, indexable, input, output registers + {OC::TempRegLoad, + "TempRegLoad", + OCC::TempRegLoad, + "tempRegLoad", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::TempRegStore, + "TempRegStore", + OCC::TempRegStore, + "tempRegStore", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::MinPrecXRegLoad, + "MinPrecXRegLoad", + OCC::MinPrecXRegLoad, + "minPrecXRegLoad", + Attribute::ReadOnly, + 1, + {{0x21}}, + {{0x0}}}, // Overloads: hw + {OC::MinPrecXRegStore, + "MinPrecXRegStore", + OCC::MinPrecXRegStore, + "minPrecXRegStore", + Attribute::None, + 1, + {{0x21}}, + {{0x0}}}, // Overloads: hw + {OC::LoadInput, + "LoadInput", + OCC::LoadInput, + "loadInput", + Attribute::ReadNone, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::StoreOutput, + "StoreOutput", + OCC::StoreOutput, + "storeOutput", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + // Unary float + {OC::FAbs, + "FAbs", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x7}}, + {{0x0}}}, // Overloads: hfd + {OC::Saturate, + "Saturate", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x7}}, + {{0x0}}}, // Overloads: hfd + {OC::IsNaN, + "IsNaN", + OCC::IsSpecialFloat, + "isSpecialFloat", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::IsInf, + "IsInf", + OCC::IsSpecialFloat, + "isSpecialFloat", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::IsFinite, + "IsFinite", + OCC::IsSpecialFloat, + "isSpecialFloat", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::IsNormal, + "IsNormal", + OCC::IsSpecialFloat, + "isSpecialFloat", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Cos, + "Cos", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Sin, + "Sin", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Tan, + "Tan", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Acos, + "Acos", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Asin, + "Asin", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Atan, + "Atan", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Hcos, + "Hcos", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Hsin, + "Hsin", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Htan, + "Htan", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Exp, + "Exp", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Frc, + "Frc", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Log, + "Log", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Sqrt, + "Sqrt", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Rsqrt, + "Rsqrt", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Unary float - rounding + {OC::Round_ne, + "Round_ne", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Round_ni, + "Round_ni", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Round_pi, + "Round_pi", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Round_z, + "Round_z", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Unary int + {OC::Bfrev, + "Bfrev", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + {OC::Countbits, + "Countbits", + OCC::UnaryBits, + "unaryBits", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + {OC::FirstbitLo, + "FirstbitLo", + OCC::UnaryBits, + "unaryBits", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Unary uint + {OC::FirstbitHi, + "FirstbitHi", + OCC::UnaryBits, + "unaryBits", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Unary int + {OC::FirstbitSHi, + "FirstbitSHi", + OCC::UnaryBits, + "unaryBits", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Binary float + {OC::FMax, + "FMax", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0x7}}, + {{0x0}}}, // Overloads: hfd + {OC::FMin, + "FMin", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0x7}}, + {{0x0}}}, // Overloads: hfd + + // Binary int + {OC::IMax, + "IMax", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + {OC::IMin, + "IMin", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Binary uint + {OC::UMax, + "UMax", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + {OC::UMin, + "UMin", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Binary int with two outputs + {OC::IMul, + "IMul", + OCC::BinaryWithTwoOuts, + "binaryWithTwoOuts", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Binary uint with two outputs + {OC::UMul, + "UMul", + OCC::BinaryWithTwoOuts, + "binaryWithTwoOuts", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::UDiv, + "UDiv", + OCC::BinaryWithTwoOuts, + "binaryWithTwoOuts", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Binary uint with carry or borrow + {OC::UAddc, + "UAddc", + OCC::BinaryWithCarryOrBorrow, + "binaryWithCarryOrBorrow", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::USubb, + "USubb", + OCC::BinaryWithCarryOrBorrow, + "binaryWithCarryOrBorrow", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Tertiary float + {OC::FMad, + "FMad", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0x7}}, + {{0x0}}}, // Overloads: hfd + {OC::Fma, + "Fma", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0x4}}, + {{0x0}}}, // Overloads: d + + // Tertiary int + {OC::IMad, + "IMad", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Tertiary uint + {OC::UMad, + "UMad", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Tertiary int + {OC::Msad, + "Msad", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0xc0}}, + {{0x0}}}, // Overloads: il + {OC::Ibfe, + "Ibfe", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0xc0}}, + {{0x0}}}, // Overloads: il + + // Tertiary uint + {OC::Ubfe, + "Ubfe", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0xc0}}, + {{0x0}}}, // Overloads: il + + // Quaternary + {OC::Bfi, + "Bfi", + OCC::Quaternary, + "quaternary", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Dot + {OC::Dot2, + "Dot2", + OCC::Dot2, + "dot2", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Dot3, + "Dot3", + OCC::Dot3, + "dot3", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Dot4, + "Dot4", + OCC::Dot4, + "dot4", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Resources + {OC::CreateHandle, + "CreateHandle", + OCC::CreateHandle, + "createHandle", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::CBufferLoad, + "CBufferLoad", + OCC::CBufferLoad, + "cbufferLoad", + Attribute::ReadOnly, + 1, + {{0xf7}}, + {{0x0}}}, // Overloads: hfd8wil + {OC::CBufferLoadLegacy, + "CBufferLoadLegacy", + OCC::CBufferLoadLegacy, + "cbufferLoadLegacy", + Attribute::ReadOnly, + 1, + {{0xe7}}, + {{0x0}}}, // Overloads: hfdwil + + // Resources - sample + {OC::Sample, + "Sample", + OCC::Sample, + "sample", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::SampleBias, + "SampleBias", + OCC::SampleBias, + "sampleBias", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::SampleLevel, + "SampleLevel", + OCC::SampleLevel, + "sampleLevel", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::SampleGrad, + "SampleGrad", + OCC::SampleGrad, + "sampleGrad", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::SampleCmp, + "SampleCmp", + OCC::SampleCmp, + "sampleCmp", + Attribute::ReadOnly, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::SampleCmpLevelZero, + "SampleCmpLevelZero", + OCC::SampleCmpLevelZero, + "sampleCmpLevelZero", + Attribute::ReadOnly, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Resources + {OC::TextureLoad, + "TextureLoad", + OCC::TextureLoad, + "textureLoad", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::TextureStore, + "TextureStore", + OCC::TextureStore, + "textureStore", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::BufferLoad, + "BufferLoad", + OCC::BufferLoad, + "bufferLoad", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::BufferStore, + "BufferStore", + OCC::BufferStore, + "bufferStore", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::BufferUpdateCounter, + "BufferUpdateCounter", + OCC::BufferUpdateCounter, + "bufferUpdateCounter", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::CheckAccessFullyMapped, + "CheckAccessFullyMapped", + OCC::CheckAccessFullyMapped, + "checkAccessFullyMapped", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::GetDimensions, + "GetDimensions", + OCC::GetDimensions, + "getDimensions", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + + // Resources - gather + {OC::TextureGather, + "TextureGather", + OCC::TextureGather, + "textureGather", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::TextureGatherCmp, + "TextureGatherCmp", + OCC::TextureGatherCmp, + "textureGatherCmp", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + // Resources - sample + {OC::Texture2DMSGetSamplePosition, + "Texture2DMSGetSamplePosition", + OCC::Texture2DMSGetSamplePosition, + "texture2DMSGetSamplePosition", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::RenderTargetGetSamplePosition, + "RenderTargetGetSamplePosition", + OCC::RenderTargetGetSamplePosition, + "renderTargetGetSamplePosition", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::RenderTargetGetSampleCount, + "RenderTargetGetSampleCount", + OCC::RenderTargetGetSampleCount, + "renderTargetGetSampleCount", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + + // Synchronization + {OC::AtomicBinOp, + "AtomicBinOp", + OCC::AtomicBinOp, + "atomicBinOp", + Attribute::None, + 1, + {{0xc0}}, + {{0x0}}}, // Overloads: li + {OC::AtomicCompareExchange, + "AtomicCompareExchange", + OCC::AtomicCompareExchange, + "atomicCompareExchange", + Attribute::None, + 1, + {{0xc0}}, + {{0x0}}}, // Overloads: li + {OC::Barrier, + "Barrier", + OCC::Barrier, + "barrier", + Attribute::NoDuplicate, + 0, + {}, + {}}, // Overloads: v + + // Derivatives + {OC::CalculateLOD, + "CalculateLOD", + OCC::CalculateLOD, + "calculateLOD", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // Pixel shader + {OC::Discard, + "Discard", + OCC::Discard, + "discard", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Derivatives + {OC::DerivCoarseX, + "DerivCoarseX", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::DerivCoarseY, + "DerivCoarseY", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::DerivFineX, + "DerivFineX", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::DerivFineY, + "DerivFineY", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Pixel shader + {OC::EvalSnapped, + "EvalSnapped", + OCC::EvalSnapped, + "evalSnapped", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::EvalSampleIndex, + "EvalSampleIndex", + OCC::EvalSampleIndex, + "evalSampleIndex", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::EvalCentroid, + "EvalCentroid", + OCC::EvalCentroid, + "evalCentroid", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::SampleIndex, + "SampleIndex", + OCC::SampleIndex, + "sampleIndex", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::Coverage, + "Coverage", + OCC::Coverage, + "coverage", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::InnerCoverage, + "InnerCoverage", + OCC::InnerCoverage, + "innerCoverage", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Compute/Mesh/Amplification/Node shader + {OC::ThreadId, + "ThreadId", + OCC::ThreadId, + "threadId", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::GroupId, + "GroupId", + OCC::GroupId, + "groupId", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::ThreadIdInGroup, + "ThreadIdInGroup", + OCC::ThreadIdInGroup, + "threadIdInGroup", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::FlattenedThreadIdInGroup, + "FlattenedThreadIdInGroup", + OCC::FlattenedThreadIdInGroup, + "flattenedThreadIdInGroup", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Geometry shader + {OC::EmitStream, + "EmitStream", + OCC::EmitStream, + "emitStream", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::CutStream, + "CutStream", + OCC::CutStream, + "cutStream", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::EmitThenCutStream, + "EmitThenCutStream", + OCC::EmitThenCutStream, + "emitThenCutStream", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::GSInstanceID, + "GSInstanceID", + OCC::GSInstanceID, + "gsInstanceID", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Double precision + {OC::MakeDouble, + "MakeDouble", + OCC::MakeDouble, + "makeDouble", + Attribute::ReadNone, + 1, + {{0x4}}, + {{0x0}}}, // Overloads: d + {OC::SplitDouble, + "SplitDouble", + OCC::SplitDouble, + "splitDouble", + Attribute::ReadNone, + 1, + {{0x4}}, + {{0x0}}}, // Overloads: d + + // Domain and hull shader + {OC::LoadOutputControlPoint, + "LoadOutputControlPoint", + OCC::LoadOutputControlPoint, + "loadOutputControlPoint", + Attribute::ReadNone, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::LoadPatchConstant, + "LoadPatchConstant", + OCC::LoadPatchConstant, + "loadPatchConstant", + Attribute::ReadNone, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + // Domain shader + {OC::DomainLocation, + "DomainLocation", + OCC::DomainLocation, + "domainLocation", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // Hull shader + {OC::StorePatchConstant, + "StorePatchConstant", + OCC::StorePatchConstant, + "storePatchConstant", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::OutputControlPointID, + "OutputControlPointID", + OCC::OutputControlPointID, + "outputControlPointID", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Hull, Domain and Geometry shaders + {OC::PrimitiveID, + "PrimitiveID", + OCC::PrimitiveID, + "primitiveID", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Other + {OC::CycleCounterLegacy, + "CycleCounterLegacy", + OCC::CycleCounterLegacy, + "cycleCounterLegacy", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Wave + {OC::WaveIsFirstLane, + "WaveIsFirstLane", + OCC::WaveIsFirstLane, + "waveIsFirstLane", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WaveGetLaneIndex, + "WaveGetLaneIndex", + OCC::WaveGetLaneIndex, + "waveGetLaneIndex", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::WaveGetLaneCount, + "WaveGetLaneCount", + OCC::WaveGetLaneCount, + "waveGetLaneCount", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::WaveAnyTrue, + "WaveAnyTrue", + OCC::WaveAnyTrue, + "waveAnyTrue", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WaveAllTrue, + "WaveAllTrue", + OCC::WaveAllTrue, + "waveAllTrue", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WaveActiveAllEqual, + "WaveActiveAllEqual", + OCC::WaveActiveAllEqual, + "waveActiveAllEqual", + Attribute::None, + 1, + {{0xff}}, + {{0x0}}}, // Overloads: hfd18wil + {OC::WaveActiveBallot, + "WaveActiveBallot", + OCC::WaveActiveBallot, + "waveActiveBallot", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WaveReadLaneAt, + "WaveReadLaneAt", + OCC::WaveReadLaneAt, + "waveReadLaneAt", + Attribute::None, + 1, + {{0xff}}, + {{0x0}}}, // Overloads: hfd18wil + {OC::WaveReadLaneFirst, + "WaveReadLaneFirst", + OCC::WaveReadLaneFirst, + "waveReadLaneFirst", + Attribute::None, + 1, + {{0xff}}, + {{0x0}}}, // Overloads: hfd18wil + {OC::WaveActiveOp, + "WaveActiveOp", + OCC::WaveActiveOp, + "waveActiveOp", + Attribute::None, + 1, + {{0xff}}, + {{0x0}}}, // Overloads: hfd18wil + {OC::WaveActiveBit, + "WaveActiveBit", + OCC::WaveActiveBit, + "waveActiveBit", + Attribute::None, + 1, + {{0xf0}}, + {{0x0}}}, // Overloads: 8wil + {OC::WavePrefixOp, + "WavePrefixOp", + OCC::WavePrefixOp, + "wavePrefixOp", + Attribute::None, + 1, + {{0xf7}}, + {{0x0}}}, // Overloads: hfd8wil + + // Quad Wave Ops + {OC::QuadReadLaneAt, + "QuadReadLaneAt", + OCC::QuadReadLaneAt, + "quadReadLaneAt", + Attribute::None, + 1, + {{0xff}}, + {{0x0}}}, // Overloads: hfd18wil + {OC::QuadOp, + "QuadOp", + OCC::QuadOp, + "quadOp", + Attribute::None, + 1, + {{0xf7}}, + {{0x0}}}, // Overloads: hfd8wil + + // Bitcasts with different sizes + {OC::BitcastI16toF16, + "BitcastI16toF16", + OCC::BitcastI16toF16, + "bitcastI16toF16", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::BitcastF16toI16, + "BitcastF16toI16", + OCC::BitcastF16toI16, + "bitcastF16toI16", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::BitcastI32toF32, + "BitcastI32toF32", + OCC::BitcastI32toF32, + "bitcastI32toF32", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::BitcastF32toI32, + "BitcastF32toI32", + OCC::BitcastF32toI32, + "bitcastF32toI32", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::BitcastI64toF64, + "BitcastI64toF64", + OCC::BitcastI64toF64, + "bitcastI64toF64", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::BitcastF64toI64, + "BitcastF64toI64", + OCC::BitcastF64toI64, + "bitcastF64toI64", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + // Legacy floating-point + {OC::LegacyF32ToF16, + "LegacyF32ToF16", + OCC::LegacyF32ToF16, + "legacyF32ToF16", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::LegacyF16ToF32, + "LegacyF16ToF32", + OCC::LegacyF16ToF32, + "legacyF16ToF32", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + // Double precision + {OC::LegacyDoubleToFloat, + "LegacyDoubleToFloat", + OCC::LegacyDoubleToFloat, + "legacyDoubleToFloat", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::LegacyDoubleToSInt32, + "LegacyDoubleToSInt32", + OCC::LegacyDoubleToSInt32, + "legacyDoubleToSInt32", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::LegacyDoubleToUInt32, + "LegacyDoubleToUInt32", + OCC::LegacyDoubleToUInt32, + "legacyDoubleToUInt32", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + // Wave + {OC::WaveAllBitCount, + "WaveAllBitCount", + OCC::WaveAllOp, + "waveAllOp", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WavePrefixBitCount, + "WavePrefixBitCount", + OCC::WavePrefixOp, + "wavePrefixOp", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Pixel shader + {OC::AttributeAtVertex, + "AttributeAtVertex", + OCC::AttributeAtVertex, + "attributeAtVertex", + Attribute::ReadNone, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfiw + + // Graphics shader + {OC::ViewID, + "ViewID", + OCC::ViewID, + "viewID", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Resources + {OC::RawBufferLoad, + "RawBufferLoad", + OCC::RawBufferLoad, + "rawBufferLoad", + Attribute::ReadOnly, + 1, + {{0xe7}}, + {{0x0}}}, // Overloads: hfwidl + {OC::RawBufferStore, + "RawBufferStore", + OCC::RawBufferStore, + "rawBufferStore", + Attribute::None, + 1, + {{0xe7}}, + {{0x0}}}, // Overloads: hfwidl + + // Raytracing object space uint System Values + {OC::InstanceID, + "InstanceID", + OCC::InstanceID, + "instanceID", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::InstanceIndex, + "InstanceIndex", + OCC::InstanceIndex, + "instanceIndex", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Raytracing hit uint System Values + {OC::HitKind, + "HitKind", + OCC::HitKind, + "hitKind", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Raytracing uint System Values + {OC::RayFlags, + "RayFlags", + OCC::RayFlags, + "rayFlags", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Ray Dispatch Arguments + {OC::DispatchRaysIndex, + "DispatchRaysIndex", + OCC::DispatchRaysIndex, + "dispatchRaysIndex", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::DispatchRaysDimensions, + "DispatchRaysDimensions", + OCC::DispatchRaysDimensions, + "dispatchRaysDimensions", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Ray Vectors + {OC::WorldRayOrigin, + "WorldRayOrigin", + OCC::WorldRayOrigin, + "worldRayOrigin", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::WorldRayDirection, + "WorldRayDirection", + OCC::WorldRayDirection, + "worldRayDirection", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // Ray object space Vectors + {OC::ObjectRayOrigin, + "ObjectRayOrigin", + OCC::ObjectRayOrigin, + "objectRayOrigin", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::ObjectRayDirection, + "ObjectRayDirection", + OCC::ObjectRayDirection, + "objectRayDirection", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // Ray Transforms + {OC::ObjectToWorld, + "ObjectToWorld", + OCC::ObjectToWorld, + "objectToWorld", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::WorldToObject, + "WorldToObject", + OCC::WorldToObject, + "worldToObject", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // RayT + {OC::RayTMin, + "RayTMin", + OCC::RayTMin, + "rayTMin", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayTCurrent, + "RayTCurrent", + OCC::RayTCurrent, + "rayTCurrent", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // AnyHit Terminals + {OC::IgnoreHit, + "IgnoreHit", + OCC::IgnoreHit, + "ignoreHit", + Attribute::NoReturn, + 0, + {}, + {}}, // Overloads: v + {OC::AcceptHitAndEndSearch, + "AcceptHitAndEndSearch", + OCC::AcceptHitAndEndSearch, + "acceptHitAndEndSearch", + Attribute::NoReturn, + 0, + {}, + {}}, // Overloads: v + + // Indirect Shader Invocation + {OC::TraceRay, + "TraceRay", + OCC::TraceRay, + "traceRay", + Attribute::None, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + {OC::ReportHit, + "ReportHit", + OCC::ReportHit, + "reportHit", + Attribute::None, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + {OC::CallShader, + "CallShader", + OCC::CallShader, + "callShader", + Attribute::None, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + + // Library create handle from resource struct (like HL intrinsic) + {OC::CreateHandleForLib, + "CreateHandleForLib", + OCC::CreateHandleForLib, + "createHandleForLib", + Attribute::ReadOnly, + 1, + {{0x200}}, + {{0x0}}}, // Overloads: o + + // Raytracing object space uint System Values + {OC::PrimitiveIndex, + "PrimitiveIndex", + OCC::PrimitiveIndex, + "primitiveIndex", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Dot product with accumulate + {OC::Dot2AddHalf, + "Dot2AddHalf", + OCC::Dot2AddHalf, + "dot2AddHalf", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::Dot4AddI8Packed, + "Dot4AddI8Packed", + OCC::Dot4AddPacked, + "dot4AddPacked", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::Dot4AddU8Packed, + "Dot4AddU8Packed", + OCC::Dot4AddPacked, + "dot4AddPacked", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Wave + {OC::WaveMatch, + "WaveMatch", + OCC::WaveMatch, + "waveMatch", + Attribute::None, + 1, + {{0xf7}}, + {{0x0}}}, // Overloads: hfd8wil + {OC::WaveMultiPrefixOp, + "WaveMultiPrefixOp", + OCC::WaveMultiPrefixOp, + "waveMultiPrefixOp", + Attribute::None, + 1, + {{0xf7}}, + {{0x0}}}, // Overloads: hfd8wil + {OC::WaveMultiPrefixBitCount, + "WaveMultiPrefixBitCount", + OCC::WaveMultiPrefixBitCount, + "waveMultiPrefixBitCount", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Mesh shader instructions + {OC::SetMeshOutputCounts, + "SetMeshOutputCounts", + OCC::SetMeshOutputCounts, + "setMeshOutputCounts", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::EmitIndices, + "EmitIndices", + OCC::EmitIndices, + "emitIndices", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::GetMeshPayload, + "GetMeshPayload", + OCC::GetMeshPayload, + "getMeshPayload", + Attribute::ReadOnly, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + {OC::StoreVertexOutput, + "StoreVertexOutput", + OCC::StoreVertexOutput, + "storeVertexOutput", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::StorePrimitiveOutput, + "StorePrimitiveOutput", + OCC::StorePrimitiveOutput, + "storePrimitiveOutput", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + // Amplification shader instructions + {OC::DispatchMesh, + "DispatchMesh", + OCC::DispatchMesh, + "dispatchMesh", + Attribute::None, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + + // Sampler Feedback + {OC::WriteSamplerFeedback, + "WriteSamplerFeedback", + OCC::WriteSamplerFeedback, + "writeSamplerFeedback", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WriteSamplerFeedbackBias, + "WriteSamplerFeedbackBias", + OCC::WriteSamplerFeedbackBias, + "writeSamplerFeedbackBias", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WriteSamplerFeedbackLevel, + "WriteSamplerFeedbackLevel", + OCC::WriteSamplerFeedbackLevel, + "writeSamplerFeedbackLevel", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WriteSamplerFeedbackGrad, + "WriteSamplerFeedbackGrad", + OCC::WriteSamplerFeedbackGrad, + "writeSamplerFeedbackGrad", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Inline Ray Query + {OC::AllocateRayQuery, + "AllocateRayQuery", + OCC::AllocateRayQuery, + "allocateRayQuery", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::RayQuery_TraceRayInline, + "RayQuery_TraceRayInline", + OCC::RayQuery_TraceRayInline, + "rayQuery_TraceRayInline", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::RayQuery_Proceed, + "RayQuery_Proceed", + OCC::RayQuery_Proceed, + "rayQuery_Proceed", + Attribute::None, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::RayQuery_Abort, + "RayQuery_Abort", + OCC::RayQuery_Abort, + "rayQuery_Abort", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::RayQuery_CommitNonOpaqueTriangleHit, + "RayQuery_CommitNonOpaqueTriangleHit", + OCC::RayQuery_CommitNonOpaqueTriangleHit, + "rayQuery_CommitNonOpaqueTriangleHit", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::RayQuery_CommitProceduralPrimitiveHit, + "RayQuery_CommitProceduralPrimitiveHit", + OCC::RayQuery_CommitProceduralPrimitiveHit, + "rayQuery_CommitProceduralPrimitiveHit", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::RayQuery_CommittedStatus, + "RayQuery_CommittedStatus", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidateType, + "RayQuery_CandidateType", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidateObjectToWorld3x4, + "RayQuery_CandidateObjectToWorld3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CandidateWorldToObject3x4, + "RayQuery_CandidateWorldToObject3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedObjectToWorld3x4, + "RayQuery_CommittedObjectToWorld3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedWorldToObject3x4, + "RayQuery_CommittedWorldToObject3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CandidateProceduralPrimitiveNonOpaque, + "RayQuery_CandidateProceduralPrimitiveNonOpaque", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::RayQuery_CandidateTriangleFrontFace, + "RayQuery_CandidateTriangleFrontFace", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::RayQuery_CommittedTriangleFrontFace, + "RayQuery_CommittedTriangleFrontFace", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::RayQuery_CandidateTriangleBarycentrics, + "RayQuery_CandidateTriangleBarycentrics", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedTriangleBarycentrics, + "RayQuery_CommittedTriangleBarycentrics", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_RayFlags, + "RayQuery_RayFlags", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_WorldRayOrigin, + "RayQuery_WorldRayOrigin", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_WorldRayDirection, + "RayQuery_WorldRayDirection", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_RayTMin, + "RayQuery_RayTMin", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CandidateTriangleRayT, + "RayQuery_CandidateTriangleRayT", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedRayT, + "RayQuery_CommittedRayT", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CandidateInstanceIndex, + "RayQuery_CandidateInstanceIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidateInstanceID, + "RayQuery_CandidateInstanceID", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidateGeometryIndex, + "RayQuery_CandidateGeometryIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidatePrimitiveIndex, + "RayQuery_CandidatePrimitiveIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidateObjectRayOrigin, + "RayQuery_CandidateObjectRayOrigin", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CandidateObjectRayDirection, + "RayQuery_CandidateObjectRayDirection", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedInstanceIndex, + "RayQuery_CommittedInstanceIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CommittedInstanceID, + "RayQuery_CommittedInstanceID", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CommittedGeometryIndex, + "RayQuery_CommittedGeometryIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CommittedPrimitiveIndex, + "RayQuery_CommittedPrimitiveIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CommittedObjectRayOrigin, + "RayQuery_CommittedObjectRayOrigin", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedObjectRayDirection, + "RayQuery_CommittedObjectRayDirection", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // Raytracing object space uint System Values, raytracing tier 1.1 + {OC::GeometryIndex, + "GeometryIndex", + OCC::GeometryIndex, + "geometryIndex", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Inline Ray Query + {OC::RayQuery_CandidateInstanceContributionToHitGroupIndex, + "RayQuery_CandidateInstanceContributionToHitGroupIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CommittedInstanceContributionToHitGroupIndex, + "RayQuery_CommittedInstanceContributionToHitGroupIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Get handle from heap + {OC::AnnotateHandle, + "AnnotateHandle", + OCC::AnnotateHandle, + "annotateHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::CreateHandleFromBinding, + "CreateHandleFromBinding", + OCC::CreateHandleFromBinding, + "createHandleFromBinding", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::CreateHandleFromHeap, + "CreateHandleFromHeap", + OCC::CreateHandleFromHeap, + "createHandleFromHeap", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + // Unpacking intrinsics + {OC::Unpack4x8, + "Unpack4x8", + OCC::Unpack4x8, + "unpack4x8", + Attribute::ReadNone, + 1, + {{0x60}}, + {{0x0}}}, // Overloads: iw + + // Packing intrinsics + {OC::Pack4x8, + "Pack4x8", + OCC::Pack4x8, + "pack4x8", + Attribute::ReadNone, + 1, + {{0x60}}, + {{0x0}}}, // Overloads: iw + + // Helper Lanes + {OC::IsHelperLane, + "IsHelperLane", + OCC::IsHelperLane, + "isHelperLane", + Attribute::ReadOnly, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + + // Quad Wave Ops + {OC::QuadVote, + "QuadVote", + OCC::QuadVote, + "quadVote", + Attribute::None, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + + // Resources - gather + {OC::TextureGatherRaw, + "TextureGatherRaw", + OCC::TextureGatherRaw, + "textureGatherRaw", + Attribute::ReadOnly, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Resources - sample + {OC::SampleCmpLevel, + "SampleCmpLevel", + OCC::SampleCmpLevel, + "sampleCmpLevel", + Attribute::ReadOnly, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Resources + {OC::TextureStoreSample, + "TextureStoreSample", + OCC::TextureStoreSample, + "textureStoreSample", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + {OC::Reserved0, + "Reserved0", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved1, + "Reserved1", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved2, + "Reserved2", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved3, + "Reserved3", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved4, + "Reserved4", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved5, + "Reserved5", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved6, + "Reserved6", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved7, + "Reserved7", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved8, + "Reserved8", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved9, + "Reserved9", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved10, + "Reserved10", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved11, + "Reserved11", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Create/Annotate Node Handles + {OC::AllocateNodeOutputRecords, + "AllocateNodeOutputRecords", + OCC::AllocateNodeOutputRecords, + "allocateNodeOutputRecords", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Get Pointer to Node Record in Address Space 6 + {OC::GetNodeRecordPtr, + "GetNodeRecordPtr", + OCC::GetNodeRecordPtr, + "getNodeRecordPtr", + Attribute::ReadNone, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + + // Work Graph intrinsics + {OC::IncrementOutputCount, + "IncrementOutputCount", + OCC::IncrementOutputCount, + "incrementOutputCount", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::OutputComplete, + "OutputComplete", + OCC::OutputComplete, + "outputComplete", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::GetInputRecordCount, + "GetInputRecordCount", + OCC::GetInputRecordCount, + "getInputRecordCount", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::FinishedCrossGroupSharing, + "FinishedCrossGroupSharing", + OCC::FinishedCrossGroupSharing, + "finishedCrossGroupSharing", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Synchronization + {OC::BarrierByMemoryType, + "BarrierByMemoryType", + OCC::BarrierByMemoryType, + "barrierByMemoryType", + Attribute::NoDuplicate, + 0, + {}, + {}}, // Overloads: v + {OC::BarrierByMemoryHandle, + "BarrierByMemoryHandle", + OCC::BarrierByMemoryHandle, + "barrierByMemoryHandle", + Attribute::NoDuplicate, + 0, + {}, + {}}, // Overloads: v + {OC::BarrierByNodeRecordHandle, + "BarrierByNodeRecordHandle", + OCC::BarrierByNodeRecordHandle, + "barrierByNodeRecordHandle", + Attribute::NoDuplicate, + 0, + {}, + {}}, // Overloads: v + + // Create/Annotate Node Handles + {OC::CreateNodeOutputHandle, + "CreateNodeOutputHandle", + OCC::createNodeOutputHandle, + "createNodeOutputHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::IndexNodeHandle, + "IndexNodeHandle", + OCC::IndexNodeHandle, + "indexNodeHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::AnnotateNodeHandle, + "AnnotateNodeHandle", + OCC::AnnotateNodeHandle, + "annotateNodeHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::CreateNodeInputRecordHandle, + "CreateNodeInputRecordHandle", + OCC::CreateNodeInputRecordHandle, + "createNodeInputRecordHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::AnnotateNodeRecordHandle, + "AnnotateNodeRecordHandle", + OCC::AnnotateNodeRecordHandle, + "annotateNodeRecordHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + // Work Graph intrinsics + {OC::NodeOutputIsValid, + "NodeOutputIsValid", + OCC::NodeOutputIsValid, + "nodeOutputIsValid", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::GetRemainingRecursionLevels, + "GetRemainingRecursionLevels", + OCC::GetRemainingRecursionLevels, + "getRemainingRecursionLevels", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + + // Comparison Samples + {OC::SampleCmpGrad, + "SampleCmpGrad", + OCC::SampleCmpGrad, + "sampleCmpGrad", + Attribute::ReadOnly, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::SampleCmpBias, + "SampleCmpBias", + OCC::SampleCmpBias, + "sampleCmpBias", + Attribute::ReadOnly, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Extended Command Information + {OC::StartVertexLocation, + "StartVertexLocation", + OCC::StartVertexLocation, + "startVertexLocation", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::StartInstanceLocation, + "StartInstanceLocation", + OCC::StartInstanceLocation, + "startInstanceLocation", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Inline Ray Query + {OC::AllocateRayQuery2, + "AllocateRayQuery2", + OCC::AllocateRayQuery2, + "allocateRayQuery2", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + {OC::ReservedA0, + "ReservedA0", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedA1, + "ReservedA1", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedA2, + "ReservedA2", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB0, + "ReservedB0", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB1, + "ReservedB1", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB2, + "ReservedB2", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Shader Execution Reordering + {OC::HitObject_MakeMiss, + "HitObject_MakeMiss", + OCC::HitObject_MakeMiss, + "hitObject_MakeMiss", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::HitObject_MakeNop, + "HitObject_MakeNop", + OCC::HitObject_MakeNop, + "hitObject_MakeNop", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + {OC::ReservedB5, + "ReservedB5", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB6, + "ReservedB6", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB7, + "ReservedB7", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB8, + "ReservedB8", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB9, + "ReservedB9", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB10, + "ReservedB10", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB11, + "ReservedB11", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB12, + "ReservedB12", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB13, + "ReservedB13", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB14, + "ReservedB14", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB15, + "ReservedB15", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB16, + "ReservedB16", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB17, + "ReservedB17", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB18, + "ReservedB18", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB19, + "ReservedB19", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB20, + "ReservedB20", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB21, + "ReservedB21", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB22, + "ReservedB22", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB23, + "ReservedB23", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB24, + "ReservedB24", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB25, + "ReservedB25", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB26, + "ReservedB26", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB27, + "ReservedB27", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB28, + "ReservedB28", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB29, + "ReservedB29", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB30, + "ReservedB30", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC0, + "ReservedC0", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC1, + "ReservedC1", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC2, + "ReservedC2", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC3, + "ReservedC3", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC4, + "ReservedC4", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC5, + "ReservedC5", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC6, + "ReservedC6", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC7, + "ReservedC7", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC8, + "ReservedC8", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC9, + "ReservedC9", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v }; // OPCODE-OLOADS:END -const char *OP::m_OverloadTypeName[kNumTypeOverloads] = { - "void", "f16", "f32", "f64", "i1", "i8", - "i16", "i32", "i64", "udt", "obj", // These should not be used -}; +const char *OP::m_OverloadTypeName[TS_BasicCount] = { + "f16", "f32", "f64", "i1", "i8", "i16", "i32", "i64"}; const char *OP::m_NamePrefix = "dx.op."; const char *OP::m_TypePrefix = "dx.types."; @@ -3040,82 +2654,110 @@ unsigned OP::GetTypeSlot(Type *pType) { Type::TypeID T = pType->getTypeID(); switch (T) { case Type::VoidTyID: - return 0; + return TS_Invalid; case Type::HalfTyID: - return 1; + return TS_F16; case Type::FloatTyID: - return 2; + return TS_F32; case Type::DoubleTyID: - return 3; + return TS_F64; case Type::IntegerTyID: { IntegerType *pIT = dyn_cast(pType); unsigned Bits = pIT->getBitWidth(); switch (Bits) { case 1: - return 4; + return TS_I1; case 8: - return 5; + return TS_I8; case 16: - return 6; + return TS_I16; case 32: - return 7; + return TS_I32; case 64: - return 8; + return TS_I64; } llvm_unreachable("Invalid Bits size"); + return TS_Invalid; } case Type::PointerTyID: { pType = cast(pType)->getElementType(); if (pType->isStructTy()) - return kUserDefineTypeSlot; + return TS_UDT; DXASSERT(!pType->isPointerTy(), "pointer-to-pointer type unsupported"); return GetTypeSlot(pType); } case Type::StructTyID: - return kObjectTypeSlot; + // Named struct value (not pointer) indicates a built-in object type. + // Anonymous struct value is used to wrap multi-overload dimensions. + if (cast(pType)->hasName()) + return TS_Object; + else + return TS_Extended; + case Type::VectorTyID: + return TS_Vector; default: break; } - return UINT_MAX; + return TS_Invalid; } const char *OP::GetOverloadTypeName(unsigned TypeSlot) { - DXASSERT(TypeSlot < kUserDefineTypeSlot, "otherwise caller passed OOB index"); + DXASSERT(TypeSlot < TS_BasicCount, "otherwise caller passed OOB index"); return m_OverloadTypeName[TypeSlot]; } -llvm::StringRef OP::GetTypeName(Type *Ty, std::string &str) { +StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { + DXASSERT(!Ty->isVoidTy(), "must not pass void type here"); unsigned TypeSlot = OP::GetTypeSlot(Ty); - if (TypeSlot < kUserDefineTypeSlot) { + if (TypeSlot < TS_BasicCount) { return GetOverloadTypeName(TypeSlot); - } else if (TypeSlot == kUserDefineTypeSlot) { + } else if (TypeSlot == TS_UDT) { if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); StructType *ST = cast(Ty); return ST->getStructName(); - } else if (TypeSlot == kObjectTypeSlot) { + } else if (TypeSlot == TS_Object) { StructType *ST = cast(Ty); return ST->getStructName(); + } else if (TypeSlot == TS_Vector) { + VectorType *VecTy = cast(Ty); + return (Twine("v") + Twine(VecTy->getNumElements()) + + Twine( + GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) + .toStringRef(Storage); + } else if (TypeSlot == TS_Extended) { + DXASSERT(isa(Ty), + "otherwise, extended overload type not wrapped in struct type."); + StructType *ST = cast(Ty); + DXASSERT(ST->getNumElements() <= DXIL::kDxilMaxOloadDims, + "otherwise, extended overload has too many dimensions."); + // Iterate extended slots, recurse, separate with '.' + raw_svector_ostream OS(Storage); + for (unsigned I = 0; I < ST->getNumElements(); ++I) { + if (I > 0) + OS << "."; + SmallVector TempStr; + OS << GetTypeName(ST->getElementType(I), TempStr); + } + return OS.str(); } else { - raw_string_ostream os(str); - Ty->print(os); - os.flush(); - return str; + raw_svector_ostream OS(Storage); + Ty->print(OS); + return OS.str(); } } -llvm::StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode, - std::string &funcNameStorage) { +StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode, + SmallVectorImpl &Storage) { if (Ty == Type::getVoidTy(Ty->getContext())) { - funcNameStorage = - (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode))).str(); + return (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode))) + .toStringRef(Storage); } else { - funcNameStorage = - (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)) + "." + - GetTypeName(Ty, funcNameStorage)) - .str(); + llvm::SmallVector TempStr; + return (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)) + "." + + GetTypeName(Ty, TempStr)) + .toStringRef(Storage); } - return funcNameStorage; } const char *OP::GetOpCodeName(OpCode opCode) { @@ -3143,13 +2785,41 @@ llvm::Attribute::AttrKind OP::GetMemAccessAttr(OpCode opCode) { } bool OP::IsOverloadLegal(OpCode opCode, Type *pType) { - if (!pType) + if (static_cast(opCode) >= + static_cast(OpCode::NumOpCodes)) return false; - if (opCode == OpCode::NumOpCodes) + if (!pType) return false; - unsigned TypeSlot = GetTypeSlot(pType); - return TypeSlot != UINT_MAX && - m_OpCodeProps[(unsigned)opCode].bAllowOverload[TypeSlot]; + auto &OpProps = m_OpCodeProps[static_cast(opCode)]; + + if (OpProps.NumOverloadDims == 0) + return pType->isVoidTy(); + + // Normalize 1+ overload dimensions into array. + Type *Types[DXIL::kDxilMaxOloadDims] = {pType}; + if (OpProps.NumOverloadDims > 1) { + StructType *ST = dyn_cast(pType); + // Make sure multi-overload is well-formed. + if (!ST || ST->hasName() || ST->getNumElements() != OpProps.NumOverloadDims) + return false; + for (unsigned I = 0; I < ST->getNumElements(); ++I) + Types[I] = ST->getElementType(I); + } + + for (unsigned I = 0; I < OpProps.NumOverloadDims; ++I) { + Type *Ty = Types[I]; + unsigned TypeSlot = GetTypeSlot(Ty); + if (!OpProps.AllowedOverloads[I][TypeSlot]) + return false; + if (TypeSlot == TS_Vector) { + unsigned EltTypeSlot = + GetTypeSlot(cast(Ty)->getElementType()); + if (!OpProps.AllowedVectorElements[I][EltTypeSlot]) + return false; + } + } + + return true; } bool OP::CheckOpCodeTable() { @@ -3173,41 +2843,6 @@ bool OP::IsDxilOpFunc(const llvm::Function *F) { return IsDxilOpFuncName(F->getName()); } -bool OP::IsDxilOpTypeName(StringRef name) { - return name.startswith(m_TypePrefix) || name.startswith(m_MatrixTypePrefix); -} - -bool OP::IsDxilOpType(llvm::StructType *ST) { - if (!ST->hasName()) - return false; - StringRef Name = ST->getName(); - return IsDxilOpTypeName(Name); -} - -bool OP::IsDupDxilOpType(llvm::StructType *ST) { - if (!ST->hasName()) - return false; - StringRef Name = ST->getName(); - if (!IsDxilOpTypeName(Name)) - return false; - size_t DotPos = Name.rfind('.'); - if (DotPos == 0 || DotPos == StringRef::npos || Name.back() == '.' || - !isdigit(static_cast(Name[DotPos + 1]))) - return false; - return true; -} - -StructType *OP::GetOriginalDxilOpType(llvm::StructType *ST, llvm::Module &M) { - DXASSERT(IsDupDxilOpType(ST), "else should not call GetOriginalDxilOpType"); - StringRef Name = ST->getName(); - size_t DotPos = Name.rfind('.'); - StructType *OriginalST = M.getTypeByName(Name.substr(0, DotPos)); - DXASSERT(OriginalST, "else name collison without original type"); - DXASSERT(ST->isLayoutIdentical(OriginalST), - "else invalid layout for dxil types"); - return OriginalST; -} - bool OP::IsDxilOpFuncCallInst(const llvm::Instruction *I) { const CallInst *CI = dyn_cast(I); if (CI == nullptr) @@ -3297,6 +2932,12 @@ bool OP::IsDxilOpBarrier(OpCode C) { // OPCODE-BARRIER:END } +bool OP::IsDxilOpExtendedOverload(OpCode C) { + if (C >= OpCode::NumOpCodes) + return false; + return m_OpCodeProps[static_cast(C)].NumOverloadDims > 1; +} + static unsigned MaskMemoryTypeFlagsIfAllowed(unsigned memoryTypeFlags, unsigned allowedMask) { // If the memory type is AllMemory, masking inapplicable flags is allowed. @@ -3945,13 +3586,12 @@ void OP::FixOverloadNames() { if (F.isDeclaration() && OP::IsDxilOpFunc(&F) && !F.user_empty()) { CallInst *CI = cast(*F.user_begin()); DXIL::OpCode opCode = OP::GetDxilOpFuncCallInst(CI); + if (!MayHaveNonCanonicalOverload(opCode)) + continue; llvm::Type *Ty = OP::GetOverloadType(opCode, &F); if (!OP::IsOverloadLegal(opCode, Ty)) continue; - if (!isa(Ty) && !isa(Ty)) - continue; - - std::string funcName; + SmallVector funcName; if (OP::ConstructOverloadName(Ty, opCode, funcName) .compare(F.getName()) != 0) F.setName(funcName); @@ -3964,11 +3604,54 @@ void OP::UpdateCache(OpCodeClass opClass, Type *Ty, llvm::Function *F) { m_FunctionToOpClass[F] = opClass; } +bool OP::MayHaveNonCanonicalOverload(OpCode OC) { + if (OC >= OpCode::NumOpCodes) + return false; + const unsigned CheckMask = (1 << TS_UDT) | (1 << TS_Object); + auto &OpProps = m_OpCodeProps[static_cast(OC)]; + for (unsigned I = 0; I < OpProps.NumOverloadDims; ++I) + if ((CheckMask & OpProps.AllowedOverloads[I].SlotMask) != 0) + return true; + return false; +} + +Function *OP::GetOpFunc(OpCode OC, ArrayRef OverloadTypes) { + if (OC >= OpCode::NumOpCodes) + return nullptr; + if (OverloadTypes.size() != + m_OpCodeProps[static_cast(OC)].NumOverloadDims) { + llvm_unreachable("incorrect overload dimensions"); + return nullptr; + } + if (OverloadTypes.size() == 0) { + return GetOpFunc(OC, Type::getVoidTy(m_Ctx)); + } else if (OverloadTypes.size() == 1) { + return GetOpFunc(OC, OverloadTypes[0]); + } + return GetOpFunc(OC, GetExtendedOverloadType(OverloadTypes)); +} + Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { - if (opCode == OpCode::NumOpCodes) + if (opCode >= OpCode::NumOpCodes) return nullptr; if (!pOverloadType) return nullptr; + + auto &OpProps = m_OpCodeProps[static_cast(opCode)]; + if (IsDxilOpExtendedOverload(opCode)) { + // Make sure pOverloadType is well formed for an extended overload. + StructType *ST = dyn_cast(pOverloadType); + DXASSERT(ST != nullptr, + "otherwise, extended overload type is not a struct"); + if (ST == nullptr) + return nullptr; + bool EltCountValid = ST->getNumElements() == OpProps.NumOverloadDims; + DXASSERT(EltCountValid, + "otherwise, incorrect type count for extended overload."); + if (!EltCountValid) + return nullptr; + } + // Illegal overloads are generated and eliminated by DXIL op constant // evaluation for a number of cases where a double overload of an HL intrinsic // that otherwise does not support double is used for literal values, when @@ -3976,7 +3659,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { // Illegal overloads of DXIL intrinsics may survive through to final DXIL, // but these will be caught by the validator, and this is not a regression. - OpCodeClass opClass = m_OpCodeProps[(unsigned)opCode].opCodeClass; + OpCodeClass opClass = OpProps.opCodeClass; Function *&F = m_OpCodeClassCache[(unsigned)opClass].pOverloads[pOverloadType]; if (F != nullptr) { @@ -3984,7 +3667,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { return F; } - vector ArgTypes; // RetType is ArgTypes[0] + SmallVector ArgTypes; // RetType is ArgTypes[0] Type *pETy = pOverloadType; Type *pRes = GetHandleType(); Type *pNodeHandle = GetNodeHandleType(); @@ -4020,7 +3703,10 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { #define A(_x) ArgTypes.emplace_back(_x) #define RRT(_y) A(GetResRetType(_y)) #define CBRT(_y) A(GetCBufferRetType(_y)) -#define VEC4(_y) A(GetVectorType(4, _y)) +#define VEC4(_y) A(GetStructVectorType(4, _y)) + +// Extended Overload types are wrapped in an anonymous struct +#define EXT(_y) A(cast(pOverloadType)->getElementType(_y)) /* hctdb_instrhelp.get_oloads_funcs()*/ switch (opCode) { // return opCode @@ -6066,14 +5752,15 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { pFT = FunctionType::get( ArgTypes[0], ArrayRef(&ArgTypes[1], ArgTypes.size() - 1), false); - std::string funcName; - ConstructOverloadName(pOverloadType, opCode, funcName); + SmallVector FuncStorage; + StringRef FuncName = + ConstructOverloadName(pOverloadType, opCode, FuncStorage); // Try to find existing function with the same name in the module. // This needs to happen after the switch statement that constructs arguments // and return values to ensure that ResRetType is constructed in the // RefreshCache case. - if (Function *existF = m_pModule->getFunction(funcName)) { + if (Function *existF = m_pModule->getFunction(FuncName)) { if (existF->getFunctionType() != pFT) return nullptr; F = existF; @@ -6081,13 +5768,13 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { return F; } - F = cast(m_pModule->getOrInsertFunction(funcName, pFT)); + F = cast(m_pModule->getOrInsertFunction(FuncName, pFT)); UpdateCache(opClass, pOverloadType, F); F->setCallingConv(CallingConv::C); F->addFnAttr(Attribute::NoUnwind); - if (m_OpCodeProps[(unsigned)opCode].FuncAttr != Attribute::None) - F->addFnAttr(m_OpCodeProps[(unsigned)opCode].FuncAttr); + if (OpProps.FuncAttr != Attribute::None) + F->addFnAttr(OpProps.FuncAttr); return F; } @@ -6494,62 +6181,91 @@ Type *OP::GetFourI32Type() const { return m_pFourI32Type; } Type *OP::GetFourI16Type() const { return m_pFourI16Type; } bool OP::IsResRetType(llvm::Type *Ty) { + if (!Ty->isStructTy()) + return false; for (Type *ResTy : m_pResRetType) { if (Ty == ResTy) return true; } - return false; + // Check for vector overload which isn't cached in m_pResRetType. + StructType *ST = cast(Ty); + if (!ST->hasName() || ST->getNumElements() < 2 || + !ST->getElementType(0)->isVectorTy()) + return false; + return Ty == GetResRetType(ST->getElementType(0)); } Type *OP::GetResRetType(Type *pOverloadType) { unsigned TypeSlot = GetTypeSlot(pOverloadType); - if (m_pResRetType[TypeSlot] == nullptr) { - string TypeName("dx.types.ResRet."); - TypeName += GetOverloadTypeName(TypeSlot); - Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType, - pOverloadType, Type::getInt32Ty(m_Ctx)}; - m_pResRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + if (TypeSlot < TS_BasicCount) { + if (m_pResRetType[TypeSlot] == nullptr) { + SmallVector Storage; + StringRef TypeName = + (Twine("dx.types.ResRet.") + Twine(GetOverloadTypeName(TypeSlot))) + .toStringRef(Storage); + Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType, + pOverloadType, Type::getInt32Ty(m_Ctx)}; + m_pResRetType[TypeSlot] = + GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + } + return m_pResRetType[TypeSlot]; + } else if (TypeSlot == TS_Vector) { + SmallVector Storage; + VectorType *VecTy = cast(pOverloadType); + StringRef TypeName = + (Twine("dx.types.ResRet.v") + Twine(VecTy->getNumElements()) + + Twine(GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) + .toStringRef(Storage); + Type *FieldTypes[2] = {pOverloadType, Type::getInt32Ty(m_Ctx)}; + return GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); } - return m_pResRetType[TypeSlot]; + llvm_unreachable("Invalid overload for GetResRetType"); + return nullptr; } Type *OP::GetCBufferRetType(Type *pOverloadType) { unsigned TypeSlot = GetTypeSlot(pOverloadType); + if (TypeSlot >= TS_BasicCount) { + llvm_unreachable("Invalid overload for GetResRetType"); + return nullptr; + } + if (m_pCBufferRetType[TypeSlot] == nullptr) { DXASSERT(m_LowPrecisionMode != DXIL::LowPrecisionMode::Undefined, "m_LowPrecisionMode must be set before constructing type."); - string TypeName("dx.types.CBufRet."); - TypeName += GetOverloadTypeName(TypeSlot); + SmallVector Storage; + raw_svector_ostream OS(Storage); + OS << "dx.types.CBufRet."; + OS << GetOverloadTypeName(TypeSlot); Type *i64Ty = Type::getInt64Ty(pOverloadType->getContext()); Type *i16Ty = Type::getInt16Ty(pOverloadType->getContext()); if (pOverloadType->isDoubleTy() || pOverloadType == i64Ty) { Type *FieldTypes[2] = {pOverloadType, pOverloadType}; m_pCBufferRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule); } else if (!UseMinPrecision() && (pOverloadType->isHalfTy() || pOverloadType == i16Ty)) { - TypeName += ".8"; // dx.types.CBufRet.fp16.8 for buffer of 8 halves + OS << ".8"; // dx.types.CBufRet.f16.8 for buffer of 8 halves Type *FieldTypes[8] = { pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, }; m_pCBufferRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule); } else { Type *FieldTypes[4] = {pOverloadType, pOverloadType, pOverloadType, pOverloadType}; m_pCBufferRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule); } } return m_pCBufferRetType[TypeSlot]; } -Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) { +Type *OP::GetStructVectorType(unsigned numElements, Type *pOverloadType) { if (numElements == 4) { if (pOverloadType == Type::getInt32Ty(pOverloadType->getContext())) { return m_pFourI32Type; @@ -6561,6 +6277,10 @@ Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) { return nullptr; } +StructType *OP::GetExtendedOverloadType(ArrayRef OverloadTypes) { + return StructType::get(m_Ctx, OverloadTypes); +} + //------------------------------------------------------------------------------ // // LLVM utility methods. diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 4622256dfe..cac074adc3 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -2037,7 +2037,7 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } - dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty->getScalarType()); + dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty); } if (!dxilFunc) { @@ -2109,17 +2109,20 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { return true; unsigned EltNum = ST->getNumElements(); + Type *EltTy = ST->getElementType(0); switch (EltNum) { case 2: + // Check if it's a native vector resret. + if (EltTy->isVectorTy()) + return ST == hlslOP->GetResRetType(EltTy); + LLVM_FALLTHROUGH; case 4: - case 8: { // 2 for doubles, 8 for halfs. - Type *EltTy = ST->getElementType(0); + case 8: // 2 for doubles, 8 for halfs. return ST == hlslOP->GetCBufferRetType(EltTy); - } break; - case 5: { - Type *EltTy = ST->getElementType(0); + break; + case 5: return ST == hlslOP->GetResRetType(EltTy); - } break; + break; default: return false; } diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index e32ab1915a..05bc7d472d 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -37,6 +37,30 @@ "array_local_ldst", ] +# These are the valid overload type characters for DXIL instructions. +# - "v" is for void, and can only be used alone. +# - "u" is for user defined type (UDT), and is mutually exclusive with the other +# types. +# - "o" is for an HLSL object type (e.g. Texture, Sampler, etc.), and is +# mutually exclusive with the other types. +# - "<" is for vector overloads, and may be followed by a set of supported +# component types. +# - If "<" is not followed by any component types, any preceding scalar types +# are used. +# - Vector component types are captured into a separate list during +# processing. +# - "," is used to separate multiple overload dimensions. +# - When used, only $x0, $x1, etc. are supported for overloaded parameter +# types. +# dxil_all_user_oload_chars must be kept in sync with the indices in +# hlsl::OP::TypeSlot in DxilOperations.h. +dxil_all_user_oload_chars = "hfd18wiluo<" +dxil_scalar_oload_chars = "hfd18wil" + +# Maximum number of overload dimensions supported through the extended overload +# in DXIL instructions. +dxil_max_overload_dims = 2 + class db_dxil_enum_value(object): "A representation for a value in an enumeration type" @@ -81,6 +105,7 @@ def __init__(self, name, **kwargs): self.ops = [] # the operands that this instruction takes self.is_allowed = True # whether this instruction is allowed in a DXIL program self.oload_types = "" # overload types if applicable + # Always call process_oload_types() after setting oload_types. self.fn_attr = "" # attribute shorthands: rn=does not access memory,ro=only reads from memory, self.is_deriv = False # whether this is some kind of derivative self.is_gradient = False # whether this requires a gradient calculation @@ -98,6 +123,9 @@ def __init__(self, name, **kwargs): self.is_reserved = self.dxil_class == "Reserved" self.shader_model_translated = () # minimum shader model required with translation by linker self.props = {} # extra properties + self.num_oloads = 0 # number of overloads for this instruction + if self.is_dxil_op: + self.process_oload_types() def __str__(self): return self.name @@ -105,6 +133,127 @@ def __str__(self): def fully_qualified_name(self): return "{}::{}".format(self.fully_qualified_name_prefix, self.name) + def process_oload_types(self): + if type(self.oload_types) is not str: + raise ValueError( + f"overload for '{self.name}' should be a string - use empty if n/a" + ) + # Early out for LLVM instructions + if not self.is_dxil_op: + return + + self.num_oloads = 0 + + # Early out for void overloads. + if self.oload_types == "v": + return + + if self.oload_types == "": + raise ValueError( + f"overload for '{self.name}' should not be empty - use void if n/a" + ) + if "v" in self.oload_types: + raise ValueError( + f"void overload should be exclusive to other types for '({self.name})'" + ) + + # Process oload_types for extended and vector overloads. + # Contrived example: "hf<, dxil_max_overload_dims: + raise ValueError( + "Too many overload dimensions for DXIL op " + f"{self.name}: '{self.oload_types}'" + ) + + def check_duplicate_overloads(oloads): + if len(oloads) != len(set(oloads)): + raise ValueError( + "Duplicate overload types specified for DXIL op " + f"{self.name}: '{oloads}' in '{self.oload_types}'" + ) + + def check_overload_chars(oloads, valid_chars): + invalid_chars = set(oloads).difference(set(valid_chars)) + if invalid_chars: + raise ValueError( + "Invalid overload type character(s) used for DXIL op " + f"{self.name}: '{invalid_chars}' in '{oloads}' from " + f"'{self.oload_types}'" + ) + + for n, oloads in enumerate(oload_types): + if len(oloads) == 0: + raise ValueError( + f"Invalid empty overload type for DXIL op " + f"{self.name}: '{self.oload_types}'" + ) + check_overload_chars(oloads, dxil_all_user_oload_chars) + + # split at vector for component overloads, if vector specified + # without following components, use the scalar overloads that + # precede the vector character. + split = oloads.split("<") + if len(split) == 1: + # No vector overload. + continue + elif len(split) != 2: + raise ValueError( + f"Invalid vector overload for DXIL op {self.name}: " + f"{oloads} in '{self.oload_types}'" + ) + + # Split into scalar and vector component overloads. + scalars, vector_oloads = split + check_duplicate_overloads(scalars) + if not vector_oloads: + vector_oloads = scalars + else: + check_duplicate_overloads(vector_oloads) + if not vector_oloads: + raise ValueError( + "No scalar overload types provided with vector overload " + f"for DXIL op {self.name}: '{self.oload_types}'" + ) + check_overload_chars(vector_oloads, dxil_scalar_oload_chars) + oload_types[n] = scalars + "<" + vector_oloads + # Reconstruct overload string with default vector overloads. + self.oload_types = ",".join(oload_types) + self.check_extended_oload_ops() + + def check_extended_oload_ops(self): + "Ensure ops has sequential extended overload references with $x0, $x1, etc." + if self.num_oloads < 2: + return + next_oload_idx = 0 + for i in self.ops: + if i.llvm_type.startswith("$x"): + if i.llvm_type != "$x" + str(next_oload_idx): + raise ValueError( + "Extended overloads are not sequentially referenced in " + f"DXIL op {self.name}: {i.llvm_type} != $x{next_oload_idx}" + ) + next_oload_idx += 1 + if next_oload_idx != self.num_oloads: + raise ValueError( + "Extended overloads are not referenced for all overload " + f"dimensions in DXIL op {self.name}: {next_oload_idx} != " + f"{self.num_oloads}" + ) + class db_dxil_metadata(object): "A representation for a metadata record" @@ -477,9 +626,7 @@ def populate_categories_and_models(self): "closesthit", ) for i in "GeometryIndex".split(","): - self.name_idx[ - i - ].category = ( + self.name_idx[i].category = ( "Raytracing object space uint System Values, raytracing tier 1.1" ) self.name_idx[i].shader_model = 6, 5 @@ -574,9 +721,7 @@ def populate_categories_and_models(self): self.name_idx[i].shader_model = 6, 3 self.name_idx[i].shader_stages = ("library", "intersection") for i in "CreateHandleForLib".split(","): - self.name_idx[ - i - ].category = ( + self.name_idx[i].category = ( "Library create handle from resource struct (like HL intrinsic)" ) self.name_idx[i].shader_model = 6, 3 @@ -5652,18 +5797,6 @@ def UFI(name, **mappings): ) for i in self.instr: self.verify_dense(i.ops, lambda x: x.pos, lambda x: i.name) - for i in self.instr: - if i.is_dxil_op: - assert i.oload_types != "", ( - "overload for DXIL operation %s should not be empty - use void if n/a" - % (i.name) - ) - assert i.oload_types == "v" or i.oload_types.find("v") < 0, ( - "void overload should be exclusive to other types (%s)" % i.name - ) - assert ( - type(i.oload_types) is str - ), "overload for %s should be a string - use empty if n/a" % (i.name) # Verify that all operations in each class have the same signature. import itertools @@ -8391,6 +8524,7 @@ def __init__( self.template_id_idx = template_id_idx # Template ID numeric value self.component_id_idx = component_id_idx # Component ID numeric value + class db_hlsl(object): "A database of HLSL language data" diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 4580e6c12c..f0d8b0ebae 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -40,8 +40,10 @@ def get_hlsl_opcode_data(): g_hlsl_opcode_data = {} return g_hlsl_opcode_data + g_db_hlsl = None + def get_db_hlsl(): global g_db_hlsl if g_db_hlsl is None: @@ -51,6 +53,10 @@ def get_db_hlsl(): return g_db_hlsl +def get_max_oload_dims(): + return f"const unsigned kDxilMaxOloadDims = {dxil_max_overload_dims};" + + def format_comment(prefix, val): "Formats a value with a line-comment prefix." result = "" @@ -507,26 +513,15 @@ def print_opfunc_props(self): OP=self.OP ) ) - print( - "// OpCode OpCode name, OpCodeClass OpCodeClass name, void, h, f, d, i1, i8, i16, i32, i64, udt, obj, function attribute" - ) - # Example formatted string: - # { OC::TempRegLoad, "TempRegLoad", OCC::TempRegLoad, "tempRegLoad", false, true, true, false, true, false, true, true, false, Attribute::ReadOnly, }, - # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 - # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 last_category = None - # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong, u(dt) - f = lambda i, c: "true" if i.oload_types.find(c) >= 0 else "false" lower_exceptions = { "CBufferLoad": "cbufferLoad", "CBufferLoadLegacy": "cbufferLoadLegacy", "GSInstanceID": "gsInstanceID", } - lower_fn = ( - lambda t: lower_exceptions[t] - if t in lower_exceptions - else t[:1].lower() + t[1:] + lower_fn = lambda t: ( + lower_exceptions[t] if t in lower_exceptions else t[:1].lower() + t[1:] ) attr_dict = { "": "None", @@ -537,35 +532,47 @@ def print_opfunc_props(self): "nr": "NoReturn", "wv": "None", } - attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr] + "," + attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr] + oload_to_mask = lambda oload: sum( + [1 << dxil_all_user_oload_chars.find(c) for c in oload] + ) + oloads_fn = lambda oloads: ( + "{" + ",".join(["{0x%x}" % m for m in oloads]) + "}" + ) for i in self.instrs: if last_category != i.category: if last_category != None: print("") - print( - " // {category:118} void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute".format( - category=i.category - ) - ) + if not i.is_reserved: + print(f" // {i.category}") last_category = i.category + scalar_masks = [] + vector_masks = [] + if i.num_oloads > 0: + for n, o in enumerate(i.oload_types.split(",")): + if "<" in o: + v = o.split("<") + scalar_masks.append(oload_to_mask(v[0] + "<")) + vector_masks.append(oload_to_mask(v[1])) + else: + scalar_masks.append(oload_to_mask(o)) + vector_masks.append(0) print( - " {{ {OC}::{name:24} {quotName:27} {OCC}::{className:25} {classNameQuot:28} {{{v:>6},{h:>6},{f:>6},{d:>6},{b:>6},{e:>6},{w:>6},{i:>6},{l:>6},{u:>6},{o:>6}}}, {attr:20} }},".format( + ( + " {{ {OC}::{name:24} {quotName:27} {OCC}::{className:25} " + + "{classNameQuot:28} {attr:20}, {num_oloads}, " + + "{scalar_masks:16}, {vector_masks:16} }}, " + + "// Overloads: {oloads}" + ).format( name=i.name + ",", quotName='"' + i.name + '",', className=i.dxil_class + ",", classNameQuot='"' + lower_fn(i.dxil_class) + '",', - v=f(i, "v"), - h=f(i, "h"), - f=f(i, "f"), - d=f(i, "d"), - b=f(i, "1"), - e=f(i, "8"), - w=f(i, "w"), - i=f(i, "i"), - l=f(i, "l"), - u=f(i, "u"), - o=f(i, "o"), attr=attr_fn(i), + num_oloads=i.num_oloads, + scalar_masks=oloads_fn(scalar_masks), + vector_masks=oloads_fn(vector_masks), + oloads=i.oload_types, OC=self.OC, OCC=self.OCC, ) @@ -621,6 +628,9 @@ def print_opfunc_table(self): "nodeproperty": "A(nodeProperty);", "noderecordproperty": "A(nodeRecordProperty);", "hit_object": "A(pHit);", + # Extended overload slots, extend as needed: + "$x0": "EXT(0);", + "$x1": "EXT(1);", } last_category = None for i in self.instrs: @@ -651,14 +661,24 @@ def print_opfunc_oload_type(self): obj_ty = "obj" vec_ty = "$vec" gsptr_ty = "$gsptr" + extended_ty = "$x" last_category = None index_dict = collections.OrderedDict() ptr_index_dict = collections.OrderedDict() single_dict = collections.OrderedDict() + # extended_dict collects overloads with multiple overload types + # grouped by the set of overload parameter indices. + extended_dict = collections.OrderedDict() struct_list = [] + extended_list = [] for instr in self.instrs: + if instr.num_oloads > 1: + # Process extended overloads separately. + extended_list.append(instr) + continue + ret_ty = instr.ops[0].llvm_type # Skip case return type is overload type if ret_ty == elt_ty: @@ -730,8 +750,7 @@ def print_opfunc_oload_type(self): "i": "IntegerType::get(Ctx, 32)", "l": "IntegerType::get(Ctx, 64)", "v": "Type::getVoidTy(Ctx)", - "u": "Type::getInt32PtrTy(Ctx)", - "o": "Type::getInt32PtrTy(Ctx)", + # No other types should be referenced here. } assert ty in type_code_texts, "llvm type %s is unknown" % (ty) ty_code = type_code_texts[ty] @@ -791,6 +810,61 @@ def print_opfunc_oload_type(self): line = line + "}" print(line) + for instr in extended_list: + # Collect indices for overloaded return and types, make a tuple of + # indices the key, and add the opcode to a list of opcodes for that + # key. Indices start with 0 for return type, and 1 for the first + # function parameter, which is the DXIL OpCode. + indices = [] + for index, op in enumerate(instr.ops): + # Skip dxil opcode. + if op.pos == 1: + continue + + op_type = op.llvm_type + if op_type.startswith(extended_ty): + try: + extended_index = int(op_type[2:]) + except: + raise ValueError( + "Error parsing extended operand type " + + f"'{op_type}' for DXIL op '{instr.name}'" + ) + if extended_index != len(indices): + raise ValueError( + f"'$x{extended_index}' is not in sequential " + + f"order for DXIL op '{instr.name}'" + ) + indices.append(op.pos) + + if len(indices) != instr.num_oloads: + raise ValueError( + f"DXIL op {instr.name}: extended overload count " + + "mismatches the number of overload types" + ) + extended_dict.setdefault(tuple(indices), []).append(instr.name) + + def get_type_at_index(index): + if index == 0: + return "FT->getReturnType()" + return f"FT->getParamType({index - 1})" + + for index_tuple, opcodes in extended_dict.items(): + line = "" + for opcode in opcodes: + line = line + f"case OpCode::{opcode}:\n" + if index_tuple[-1] > 0: + line += ( + f" if (FT->getNumParams() < {index_tuple[-1]})\n" + + " return nullptr;\n" + ) + line += ( + " return llvm::StructType::get(Ctx, {" + + ", ".join([get_type_at_index(index) for index in index_tuple]) + + "});\n" + ) + print(line) + class db_valfns_gen: "A generator of validation functions." @@ -1599,6 +1673,7 @@ def get_highest_released_shader_model(): ) return result + def get_highest_shader_model(): result = """static const unsigned kHighestMajor = %d; static const unsigned kHighestMinor = %d;""" % ( @@ -1607,6 +1682,7 @@ def get_highest_shader_model(): ) return result + def get_dxil_version_minor(): return "const unsigned kDxilMinor = %d;" % highest_minor From a13938dd6bcd08b12ef086c834c35859f050ff3f Mon Sep 17 00:00:00 2001 From: Jeff Noyle Date: Tue, 1 Apr 2025 12:55:48 -0700 Subject: [PATCH 03/19] PIX: Check for existing PIX UAV in roots sigs before adding it again (#7238) The DXR invocation counting pass calls a function to add an output UAV twice. As part of adding the UAV, any DXIL-defined rootsigs will be extended to include this new UAV. If the UAV already exists in the rootsig, we should not add it again. (Doing so results in root sig that will fail validation.) Note: the test is not a file-check style because dxil-defined subobjects don't get rehydrated into the DxilModule when the output of dxc.exe is piped into the input of opt.exe, meaning that the broken case can't be exercised. --- lib/DxilPIXPasses/PixPassHelpers.cpp | 12 +++++++ tools/clang/unittests/HLSL/PixTest.cpp | 40 ++++++++++++++++++++- tools/clang/unittests/HLSL/PixTestUtils.cpp | 2 +- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp index dfb4b3aa83..69385ae048 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.cpp +++ b/lib/DxilPIXPasses/PixPassHelpers.cpp @@ -199,6 +199,18 @@ constexpr uint32_t toolsUAVRegister = 0; template void ExtendRootSig(RootSigDesc &rootSigDesc) { auto *existingParams = rootSigDesc.pParameters; + for (uint32_t i = 0; i < rootSigDesc.NumParameters; ++i) { + if (rootSigDesc.pParameters[i].ParameterType == + DxilRootParameterType::UAV) { + if (rootSigDesc.pParameters[i].Descriptor.RegisterSpace == + toolsRegisterSpace && + rootSigDesc.pParameters[i].Descriptor.ShaderRegister == + toolsUAVRegister) { + // Already added + return; + } + } + } auto *newParams = new RootParameterDesc[rootSigDesc.NumParameters + 1]; if (existingParams != nullptr) { memcpy(newParams, existingParams, diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index bb81c1c953..b97aa70c05 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -146,6 +146,7 @@ class PixTest : public ::testing::Test { TEST_METHOD(RootSignatureUpgrade_Annotation) TEST_METHOD(DxilPIXDXRInvocationsLog_SanityTest) + TEST_METHOD(DxilPIXDXRInvocationsLog_EmbeddedRootSigs) TEST_METHOD(DebugInstrumentation_TextOutput) TEST_METHOD(DebugInstrumentation_BlockReport) @@ -660,7 +661,7 @@ CComPtr PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) { CComPtr pOptimizedModule; CComPtr pText; VERIFY_SUCCEEDED(pOptimizer->RunOptimizer( - dxil, Options.data(), Options.size(), &pOptimizedModule, &pText)); + blob, Options.data(), Options.size(), &pOptimizedModule, &pText)); std::string outputText; if (pText->GetBufferSize() != 0) { @@ -2945,6 +2946,43 @@ void MyMiss(inout MyPayload payload) RunDxilPIXDXRInvocationsLog(compiledLib); } +TEST_F(PixTest, DxilPIXDXRInvocationsLog_EmbeddedRootSigs) { + + const char *source = R"x( + +GlobalRootSignature grs = {"CBV(b0)"}; +struct MyPayload +{ + float4 color; +}; + +[shader("raygeneration")] +void MyRayGen() +{ +} + +[shader("closesthit")] +void MyClosestHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ +} + +[shader("anyhit")] +void MyAnyHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ +} + +[shader("miss")] +void MyMiss(inout MyPayload payload) +{ +} + +)x"; + + auto compiledLib = Compile(m_dllSupport, source, L"lib_6_3", + {L"-Qstrip_reflect"}, L"RootSig"); + RunDxilPIXDXRInvocationsLog(compiledLib); +} + TEST_F(PixTest, DebugInstrumentation_TextOutput) { const char *source = R"x( diff --git a/tools/clang/unittests/HLSL/PixTestUtils.cpp b/tools/clang/unittests/HLSL/PixTestUtils.cpp index 91b6c4479c..61647ff5fa 100644 --- a/tools/clang/unittests/HLSL/PixTestUtils.cpp +++ b/tools/clang/unittests/HLSL/PixTestUtils.cpp @@ -397,7 +397,7 @@ CComPtr Compile(dxc::DxcDllSupport &dllSupport, const char *hlsl, CheckOperationSucceeded(pResult, &pProgram); CComPtr pLib; - VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib)); + VERIFY_SUCCEEDED(dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib)); const hlsl::DxilContainerHeader *pContainer = hlsl::IsDxilContainerLike( pProgram->GetBufferPointer(), pProgram->GetBufferSize()); VERIFY_IS_NOT_NULL(pContainer); From 2f357a9d625eaaa982ce1fac513e5f77a7d81900 Mon Sep 17 00:00:00 2001 From: Antonio Maiorano Date: Tue, 1 Apr 2025 21:38:26 -0400 Subject: [PATCH 04/19] Fix assert due to unreachable discard (#7289) When emitting discard in an unreachable code context (e.g. after an infinite loop), DXC would assert (if asserts enabled), or trigger a UBSAN failure because the discard instruction would have no parent. When an infinite loop is emitted during CodeGen, the InsertPt is cleared, thus subsequent discard instructions would be created, but no parent set. We skip emitting discard in this case, which follows the same pattern as is done for EmitIfStmt, and EmitSwitchStmt. --- tools/clang/lib/CodeGen/CGStmt.cpp | 4 ++++ .../FinishCodeGen/unreachable-discard.hlsl | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl diff --git a/tools/clang/lib/CodeGen/CGStmt.cpp b/tools/clang/lib/CodeGen/CGStmt.cpp index 080d824022..340550dbdd 100644 --- a/tools/clang/lib/CodeGen/CGStmt.cpp +++ b/tools/clang/lib/CodeGen/CGStmt.cpp @@ -525,6 +525,10 @@ void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) { // HLSL Change Begins. void CodeGenFunction::EmitDiscardStmt(const DiscardStmt &S) { + // Skip unreachable discard. + if (!HaveInsertPoint()) + return; + CGM.getHLSLRuntime().EmitHLSLDiscard(*this); } // HLSL Change Ends. diff --git a/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl new file mode 100644 index 0000000000..77c0f51911 --- /dev/null +++ b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl @@ -0,0 +1,21 @@ +// RUN: %dxc /T ps_6_5 -fcgl %s | FileCheck %s + +// Compiling this HLSL would trigger an assertion: +// While deleting: void (i32, float)* %dx.hl.op..void (i32, float) +// Use still stuck around after Def is destroyed: call void @"dx.hl.op..void (i32, float)"(i32 120, float -1.000000e+00), !dbg <0x503000001cc8> +// Error: assert(use_empty() && "Uses remain when a value is destroyed!") +// File: /src/external/DirectXShaderCompiler/lib/IR/Value.cpp(83) +// +// Bug was fixed in CodeGenFunction::EmitDiscardStmt by skipping the emission of +// an unreachable discard. + +// CHECK: define void @main() +// CHECK: br label % +// CHECK-NOT: call void @"dx.hl.op..void (i32, float)" +// CHECK: ret void + +void main() { + while (true) { + } + discard; +} From 572aef579dc90cb8de5df254ed3e7225c2c8a30e Mon Sep 17 00:00:00 2001 From: Chris B Date: Tue, 1 Apr 2025 22:18:50 -0500 Subject: [PATCH 05/19] Disable code owners in main (#7298) MS just changed policy to enforce code owners across the whole enterprise, which is _not_ what we want. So we need to disable this in main for the time being. --- .github/CODEOWNERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 01ad1577b7..6cbdeb6270 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1,2 @@ -* @microsoft/hlsl-release +# Uncomment the next line in release branches after ask-mode begins +# * @microsoft/hlsl-release From 9eb71198c9425ee77178e081e5188659ee2cf02c Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Wed, 2 Apr 2025 05:04:38 -0600 Subject: [PATCH 06/19] [SPIRV] Implements vk::BufferPointer proposal (#7163) Implements [vk::BufferPointer proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0010-vk-buffer-ref.md). Closes #6489. --- include/dxc/HlslIntrinsicOp.h | 5 +- include/dxc/dxcapi.internal.h | 8 +- lib/HLSL/HLOperationLower.cpp | 9 ++ tools/clang/include/clang/AST/HlslTypes.h | 33 ++++- .../clang/include/clang/AST/OperationKinds.h | 5 + tools/clang/include/clang/Basic/Attr.td | 17 +++ .../clang/Basic/DiagnosticSemaKinds.td | 9 +- .../clang/include/clang/SPIRV/SpirvBuilder.h | 11 ++ .../clang/include/clang/SPIRV/SpirvContext.h | 12 ++ .../include/clang/SPIRV/SpirvInstruction.h | 52 +++++++ tools/clang/include/clang/SPIRV/SpirvType.h | 24 ++++ .../clang/include/clang/SPIRV/SpirvVisitor.h | 6 + tools/clang/lib/AST/ASTContextHLSL.cpp | 79 +++++++++++ tools/clang/lib/AST/Expr.cpp | 9 +- tools/clang/lib/AST/ExprConstant.cpp | 9 ++ tools/clang/lib/AST/HlslTypes.cpp | 47 +++++++ tools/clang/lib/Lex/PPMacroExpansion.cpp | 7 +- .../lib/SPIRV/AlignmentSizeCalculator.cpp | 25 ++-- tools/clang/lib/SPIRV/CapabilityVisitor.cpp | 9 +- tools/clang/lib/SPIRV/EmitVisitor.cpp | 44 +++++- tools/clang/lib/SPIRV/EmitVisitor.h | 25 ++-- tools/clang/lib/SPIRV/LowerTypeVisitor.cpp | 33 ++++- tools/clang/lib/SPIRV/LowerTypeVisitor.h | 4 + tools/clang/lib/SPIRV/SpirvBuilder.cpp | 37 +++++ tools/clang/lib/SPIRV/SpirvContext.cpp | 26 ++++ tools/clang/lib/SPIRV/SpirvEmitter.cpp | 133 +++++++++++++++++- tools/clang/lib/SPIRV/SpirvEmitter.h | 13 ++ tools/clang/lib/SPIRV/SpirvInstruction.cpp | 28 ++++ tools/clang/lib/Sema/SemaCast.cpp | 17 +++ tools/clang/lib/Sema/SemaExprCXX.cpp | 28 ++++ tools/clang/lib/Sema/SemaHLSL.cpp | 111 ++++++++++++++- .../vk.buffer-pointer.alias.cs.hlsl | 28 ++++ .../CodeGenSPIRV/vk.buffer-pointer.alias.hlsl | 72 ++++++++++ .../vk.buffer-pointer.atomic.hlsl | 39 +++++ .../vk.buffer-pointer.error1.hlsl | 19 +++ .../vk.buffer-pointer.error2.hlsl | 19 +++ .../vk.buffer-pointer.error3.hlsl | 19 +++ .../vk.buffer-pointer.error4.hlsl | 18 +++ .../vk.buffer-pointer.error5.hlsl | 26 ++++ .../vk.buffer-pointer.error6.hlsl | 23 +++ .../vk.buffer-pointer.linked-list.hlsl | 101 +++++++++++++ .../CodeGenSPIRV/vk.buffer-pointer.read.hlsl | 48 +++++++ .../CodeGenSPIRV/vk.buffer-pointer.write.hlsl | 52 +++++++ utils/hct/gen_intrin_main.txt | 10 +- utils/hct/hctdb.py | 12 +- utils/hct/hlsl_intrinsic_opcodes.json | 7 +- 46 files changed, 1326 insertions(+), 42 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index 90f3fafd79..68b88822e8 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -231,6 +231,9 @@ enum class IntrinsicOp { IOP_VkReadClock = 223, IOP_Vkext_execution_mode = 224, IOP_Vkext_execution_mode_id = 225, + IOP_Vkreinterpret_pointer_cast = 360, + IOP_Vkstatic_pointer_cast = 361, + MOP_GetBufferContents = 362, MOP_Append = 226, MOP_RestartStrip = 227, MOP_CalculateLevelOfDetail = 228, @@ -366,7 +369,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 360, + Num_Intrinsics = 363, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index bf8a040673..f183bb6cf0 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -7,6 +7,9 @@ // // // Provides non-public declarations for the DirectX Compiler component. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // /////////////////////////////////////////////////////////////////////////////// #ifndef __DXC_API_INTERNAL__ @@ -35,6 +38,7 @@ typedef struct ID3D10Blob ID3D10Blob; static const BYTE INTRIN_TEMPLATE_FROM_TYPE = 0xff; static const BYTE INTRIN_TEMPLATE_VARARGS = 0xfe; static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION = 0xfd; +static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION_2 = 0xfc; // Use this enumeration to describe allowed templates (layouts) in intrinsics. enum LEGAL_INTRINSIC_TEMPLATES { @@ -128,7 +132,9 @@ enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_HIT_OBJECT = 51, - LICOMPTYPE_COUNT = 52 + LICOMPTYPE_VK_BUFFER_POINTER = 52, + + LICOMPTYPE_COUNT = 53 }; static const BYTE IA_SPECIAL_BASE = 0xf0; diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 3ab1f9fdec..445dbcc879 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7,6 +7,9 @@ // // // Lower functions to lower HL operations to DXIL operations. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // /////////////////////////////////////////////////////////////////////////////// #define _USE_MATH_DEFINES @@ -6818,6 +6821,12 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread, DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil // opcode + {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic, + DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic, + DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic, + DXIL::OpCode::NumOpCodes}, }; } // namespace static_assert( diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 3b517576fe..ab29e4bde7 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -6,6 +6,9 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // /// /// \file // /// \brief Defines the HLSL type system interface. // @@ -31,6 +34,7 @@ namespace clang { class ASTContext; class AttributeList; +class CXXConstructorDecl; class CXXMethodDecl; class CXXRecordDecl; class ClassTemplateDecl; @@ -402,6 +406,10 @@ DeclareNodeOrRecordType(clang::ASTContext &Ctx, DXIL::NodeIOKind Type, bool IsCompleteType = false); #ifdef ENABLE_SPIRV_CODEGEN +clang::CXXRecordDecl * +DeclareVkBufferPointerType(clang::ASTContext &context, + clang::DeclContext *declContext); + clang::CXXRecordDecl *DeclareInlineSpirvType(clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, @@ -427,7 +435,7 @@ clang::VarDecl *DeclareBuiltinGlobal(llvm::StringRef name, clang::QualType Ty, /// method. AST context in which to /// work. Class in which the function template /// is declared. Function for which a -/// template is created. Declarations for templates to the /// function. Count of /// template declarations. A new function template declaration @@ -533,6 +541,29 @@ bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator, clang::QualType paramType); bool IsPatchConstantFunctionDecl(const clang::FunctionDecl *FD); +#ifdef ENABLE_SPIRV_CODEGEN +bool IsVKBufferPointerType(clang::QualType type); +clang::QualType GetVKBufferPointerBufferType(clang::QualType type); +unsigned GetVKBufferPointerAlignment(clang::QualType type); +#endif + +/// Adds a constructor declaration to the specified class +/// record. ASTContext that owns +/// declarations. Record declaration in which +/// to add constructor. Result type for +/// constructor. Types for constructor +/// parameters. Names for constructor +/// parameters. Name for +/// constructor. Whether the constructor is a +/// const function. The method declaration for the +/// constructor. +clang::CXXConstructorDecl *CreateConstructorDeclarationWithParams( + clang::ASTContext &context, clang::CXXRecordDecl *recordDecl, + clang::QualType resultType, llvm::ArrayRef paramTypes, + llvm::ArrayRef paramNames, + clang::DeclarationName declarationName, bool isConst, + bool isTemplateFunction = false); + /// Adds a function declaration to the specified class /// record. ASTContext that owns /// declarations. Record declaration in which diff --git a/tools/clang/include/clang/AST/OperationKinds.h b/tools/clang/include/clang/AST/OperationKinds.h index 75e665a5e9..3909c8b5e8 100644 --- a/tools/clang/include/clang/AST/OperationKinds.h +++ b/tools/clang/include/clang/AST/OperationKinds.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file enumerates the different kinds of operations that can be @@ -321,6 +324,8 @@ enum CastKind { CK_HLSLCC_FloatingToIntegral, CK_HLSLCC_FloatingToBoolean, CK_HLSLCC_FloatingCast, + CK_VK_BufferPointerToIntegral, + CK_VK_IntegralToBufferPointer, // HLSL Change - Made CK_Invalid an enum case because otherwise it is UB to // assign it to a value of CastKind. diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 7a009aa7e1..9c117fb3ce 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// class DocumentationCategory { @@ -1447,6 +1450,20 @@ def VKStorageClassExt : InheritableAttr { let Documentation = [Undocumented]; } +def VKBufferPointer : InheritableAttr { + let Spellings = [CXX11<"", "hlsl_vk_buffer_pointer", 2021>]; + let LangOpts = [SPIRV]; + let Documentation = [Undocumented]; +} + +def VKAliasedPointer : InheritableAttr { + let Spellings = [CXX11<"vk", "aliased_pointer">]; + let Subjects = SubjectList<[Var, ParmVar], ErrorDiag>; + let Args = []; + let LangOpts = [SPIRV]; + let Documentation = [Undocumented]; +} + // Global variables that are of struct type def StructGlobalVar : SubsetSubjecthasGlobalStorage() && S->getType()->isStructureType()}]>; diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6ae59cac14..4f4dc28a4c 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -7838,7 +7841,7 @@ def warn_hlsl_intrinsic_in_wrong_shader_model : Warning< "intrinsic %0 potentially used by '%1' requires shader model %2 or greater">, DefaultError, InGroup; def warn_hlsl_intrinsic_overload_in_wrong_shader_model : Warning< - "overload of intrinsic %0 requires shader model %1 or greater">, + "overload of intrinsic %0 requires shader model %1 or greater">, DefaultError, InGroup; def err_hlsl_intrinsic_template_arg_unsupported: Error< "Explicit template arguments on intrinsic %0 are not supported">; @@ -8004,6 +8007,10 @@ def err_hlsl_hitobject_unsupported_stage : Error< // SPIRV Change Starts def err_hlsl_vulkan_specific_feature: Error<"%0 is a Vulkan specific feature">; +def err_hlsl_vk_pointer_cast_alignment: Error< + "Vulkan buffer pointer cannot be cast to greater alignment">; +def err_hlsl_vk_static_pointer_cast_type: Error< + "vk::static_pointer_cast() content type must be base class of argument's content type">; // SPIRV Change Ends let CategoryName = "OpenMP Issue" in { diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index f03735115b..ed2cb3b6fd 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVBUILDER_H #define LLVM_CLANG_SPIRV_SPIRVBUILDER_H @@ -273,6 +276,14 @@ class SpirvBuilder { SpirvInstruction *sample, SourceLocation); + /// \brief Creates an OpConverPtrToU SPIR-V instruction with the given + /// parameters. + SpirvConvertPtrToU *createConvertPtrToU(SpirvInstruction *ptr, QualType type); + + /// \brief Creates an OpConverUToPtr SPIR-V instruction with the given + /// parameters. + SpirvConvertUToPtr *createConvertUToPtr(SpirvInstruction *val, QualType type); + /// \brief Creates SPIR-V instructions for sampling the given image. /// /// If compareVal is given a non-zero value, *Dref* variants of OpImageSample* diff --git a/tools/clang/include/clang/SPIRV/SpirvContext.h b/tools/clang/include/clang/SPIRV/SpirvContext.h index e65097bedb..c18c139642 100644 --- a/tools/clang/include/clang/SPIRV/SpirvContext.h +++ b/tools/clang/include/clang/SPIRV/SpirvContext.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVCONTEXT_H #define LLVM_CLANG_SPIRV_SPIRVCONTEXT_H @@ -317,6 +320,13 @@ class SpirvContext { const HybridPointerType *getPointerType(QualType pointee, spv::StorageClass); + const ForwardPointerType *getForwardPointerType(QualType pointee); + + const SpirvPointerType *getForwardReference(QualType type); + + void registerForwardReference(QualType type, + const SpirvPointerType *pointerType); + /// Generates (or reuses an existing) OpString for the given string literal. SpirvString *getSpirvString(llvm::StringRef str); @@ -478,6 +488,8 @@ class SpirvContext { llvm::SmallVector hybridStructTypes; llvm::DenseMap pointerTypes; llvm::SmallVector hybridPointerTypes; + llvm::MapVector forwardPointerTypes; + llvm::MapVector forwardReferences; llvm::DenseSet functionTypes; llvm::DenseMap spirvIntrinsicTypesById; llvm::SmallVector spirvIntrinsicTypes; diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h index 7ec1375bde..7a7ad3aa4d 100644 --- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h +++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H #define LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H @@ -67,6 +71,10 @@ class SpirvInstruction { IK_ConstantComposite, IK_ConstantNull, + // Pointer <-> uint conversions. + IK_ConvertPtrToU, + IK_ConvertUToPtr, + // OpUndef IK_Undef, @@ -1306,6 +1314,50 @@ class SpirvConstantNull : public SpirvConstant { bool operator==(const SpirvConstantNull &that) const; }; +class SpirvConvertPtrToU : public SpirvInstruction { +public: + SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type, + SourceLocation loc = {}, SourceRange range = {}); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertPtrToU) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_ConvertPtrToU; + } + + bool operator==(const SpirvConvertPtrToU &that) const; + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPtr() const { return ptr; } + +private: + SpirvInstruction *ptr; +}; + +class SpirvConvertUToPtr : public SpirvInstruction { +public: + SpirvConvertUToPtr(SpirvInstruction *intValue, QualType type, + SourceLocation loc = {}, SourceRange range = {}); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertUToPtr) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_ConvertUToPtr; + } + + bool operator==(const SpirvConvertUToPtr &that) const; + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getVal() const { return val; } + +private: + SpirvInstruction *val; +}; + class SpirvUndef : public SpirvInstruction { public: SpirvUndef(QualType type); diff --git a/tools/clang/include/clang/SPIRV/SpirvType.h b/tools/clang/include/clang/SPIRV/SpirvType.h index 221f01e5ff..00a00ef238 100644 --- a/tools/clang/include/clang/SPIRV/SpirvType.h +++ b/tools/clang/include/clang/SPIRV/SpirvType.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVTYPE_H #define LLVM_CLANG_SPIRV_SPIRVTYPE_H @@ -53,6 +56,7 @@ class SpirvType { TK_RuntimeArray, TK_Struct, TK_Pointer, + TK_ForwardPointer, TK_Function, TK_AccelerationStructureNV, TK_RayQueryKHR, @@ -387,6 +391,26 @@ class SpirvPointerType : public SpirvType { spv::StorageClass storageClass; }; +/// Represents a SPIR-V forwarding pointer type. +class ForwardPointerType : public SpirvType { +public: + ForwardPointerType(QualType pointee) + : SpirvType(TK_ForwardPointer), pointeeType(pointee) {} + + static bool classof(const SpirvType *t) { + return t->getKind() == TK_ForwardPointer; + } + + const QualType getPointeeType() const { return pointeeType; } + + bool operator==(const ForwardPointerType &that) const { + return pointeeType == that.pointeeType; + } + +private: + const QualType pointeeType; +}; + /// Represents a SPIR-V function type. None of the parameters nor the return /// type is allowed to be a hybrid type. class FunctionType : public SpirvType { diff --git a/tools/clang/include/clang/SPIRV/SpirvVisitor.h b/tools/clang/include/clang/SPIRV/SpirvVisitor.h index 303a4600a1..93682518a1 100644 --- a/tools/clang/include/clang/SPIRV/SpirvVisitor.h +++ b/tools/clang/include/clang/SPIRV/SpirvVisitor.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVVISITOR_H #define LLVM_CLANG_SPIRV_SPIRVVISITOR_H @@ -89,6 +93,8 @@ class Visitor { DEFINE_VISIT_METHOD(SpirvConstantFloat) DEFINE_VISIT_METHOD(SpirvConstantComposite) DEFINE_VISIT_METHOD(SpirvConstantNull) + DEFINE_VISIT_METHOD(SpirvConvertPtrToU) + DEFINE_VISIT_METHOD(SpirvConvertUToPtr) DEFINE_VISIT_METHOD(SpirvUndef) DEFINE_VISIT_METHOD(SpirvCompositeConstruct) DEFINE_VISIT_METHOD(SpirvCompositeExtract) diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 3748f8f8f8..c7a031a219 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -6,6 +6,9 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // // This file implements the ASTContext interface for HLSL. // // // /////////////////////////////////////////////////////////////////////////////// @@ -1072,6 +1075,47 @@ static void CreateConstructorDeclaration( (*constructorDecl)->setAccess(AccessSpecifier::AS_public); } +CXXConstructorDecl *hlsl::CreateConstructorDeclarationWithParams( + ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType, + ArrayRef paramTypes, ArrayRef paramNames, + DeclarationName declarationName, bool isConst, bool isTemplateFunction) { + DXASSERT_NOMSG(recordDecl != nullptr); + DXASSERT_NOMSG(!resultType.isNull()); + DXASSERT_NOMSG(paramTypes.size() == paramNames.size()); + + TypeSourceInfo *tinfo; + CXXConstructorDecl *constructorDecl; + CreateConstructorDeclaration(context, recordDecl, resultType, paramTypes, + declarationName, isConst, &constructorDecl, + &tinfo); + + // Create and associate parameters to constructor. + SmallVector parmVarDecls; + if (!paramTypes.empty()) { + for (unsigned int i = 0; i < paramTypes.size(); ++i) { + IdentifierInfo *argIi = &context.Idents.get(paramNames[i]); + ParmVarDecl *parmVarDecl = ParmVarDecl::Create( + context, constructorDecl, NoLoc, NoLoc, argIi, paramTypes[i], + context.getTrivialTypeSourceInfo(paramTypes[i], NoLoc), + StorageClass::SC_None, nullptr); + parmVarDecl->setScopeInfo(0, i); + DXASSERT(parmVarDecl->getFunctionScopeIndex() == i, + "otherwise failed to set correct index"); + parmVarDecls.push_back(parmVarDecl); + } + constructorDecl->setParams(ArrayRef(parmVarDecls)); + AssociateParametersToFunctionPrototype(tinfo, &parmVarDecls.front(), + parmVarDecls.size()); + } + + // If this is going to be part of a template function decl, don't add it to + // the record because the template function decl will be added instead. + if (!isTemplateFunction) + recordDecl->addDecl(constructorDecl); + + return constructorDecl; +} + static void CreateObjectFunctionDeclaration( ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType, ArrayRef args, DeclarationName declarationName, bool isConst, @@ -1324,6 +1368,41 @@ CXXRecordDecl *hlsl::DeclareNodeOrRecordType( } #ifdef ENABLE_SPIRV_CODEGEN +CXXRecordDecl *hlsl::DeclareVkBufferPointerType(ASTContext &context, + DeclContext *declContext) { + BuiltinTypeDeclBuilder Builder(declContext, "BufferPointer", + TagDecl::TagKind::TTK_Struct); + TemplateTypeParmDecl *TyParamDecl = + Builder.addTypeTemplateParam("recordtype"); + Builder.addIntegerTemplateParam("alignment", context.UnsignedIntTy, 0); + + Builder.startDefinition(); + + QualType paramType = QualType(TyParamDecl->getTypeForDecl(), 0); + CXXRecordDecl *recordDecl = Builder.getRecordDecl(); + + CXXMethodDecl *methodDecl = CreateObjectFunctionDeclarationWithParams( + context, recordDecl, context.getLValueReferenceType(paramType), {}, {}, + DeclarationName(&context.Idents.get("Get")), true); + CanQualType canQualType = + recordDecl->getTypeForDecl()->getCanonicalTypeUnqualified(); + CreateConstructorDeclarationWithParams( + context, recordDecl, context.VoidTy, + {context.getRValueReferenceType(canQualType)}, {"bufferPointer"}, + context.DeclarationNames.getCXXConstructorName(canQualType), false); + CreateConstructorDeclarationWithParams( + context, recordDecl, context.VoidTy, {context.UnsignedIntTy}, {"address"}, + context.DeclarationNames.getCXXConstructorName(canQualType), false); + + StringRef OpcodeGroup = GetHLOpcodeGroupName(HLOpcodeGroup::HLIntrinsic); + unsigned Opcode = static_cast(IntrinsicOp::MOP_GetBufferContents); + methodDecl->addAttr( + HLSLIntrinsicAttr::CreateImplicit(context, OpcodeGroup, "", Opcode)); + methodDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context)); + + return Builder.completeDefinition(); +} + CXXRecordDecl *hlsl::DeclareInlineSpirvType(clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, diff --git a/tools/clang/lib/AST/Expr.cpp b/tools/clang/lib/AST/Expr.cpp index 0e2ec8c6c2..c6dc21217e 100644 --- a/tools/clang/lib/AST/Expr.cpp +++ b/tools/clang/lib/AST/Expr.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the Expr class and subclasses. @@ -1716,7 +1719,11 @@ const char *CastExpr::getCastKindName() const { return "HLSLCC_FloatingToBoolean"; case CK_HLSLCC_FloatingCast: return "HLSLCC_FloatingCast"; - // HLSL Change Ends + case CK_VK_BufferPointerToIntegral: + return "VK_BufferPointerToIntegral"; + case CK_VK_IntegralToBufferPointer: + return "VK_IntegralToBufferPointer"; + // HLSL Change Ends } llvm_unreachable("Unhandled cast kind!"); diff --git a/tools/clang/lib/AST/ExprConstant.cpp b/tools/clang/lib/AST/ExprConstant.cpp index 5e8d4700bd..69e0760bce 100644 --- a/tools/clang/lib/AST/ExprConstant.cpp +++ b/tools/clang/lib/AST/ExprConstant.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the Expr constant evaluator. @@ -7829,6 +7832,12 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { return false; return Success(Value, E); } + + // HLSL Change Starts + case CK_VK_BufferPointerToIntegral: { + return false; + // HLSL Change Ends + } } llvm_unreachable("unknown cast resulting in integral value"); diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index eaf8273413..d853125954 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -5,6 +5,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. // // /// /// \file // @@ -734,6 +737,50 @@ bool IsHLSLRayQueryType(clang::QualType type) { return false; } +#ifdef ENABLE_SPIRV_CODEGEN +static llvm::Optional> +MaybeGetVKBufferPointerParams(clang::QualType type) { + const RecordType *RT = dyn_cast(type.getCanonicalType()); + if (!RT) + return llvm::None; + + const ClassTemplateSpecializationDecl *templateDecl = + dyn_cast(RT->getAsCXXRecordDecl()); + if (!templateDecl || !templateDecl->getName().equals("BufferPointer")) + return llvm::None; + + auto *namespaceDecl = + dyn_cast_or_null(templateDecl->getDeclContext()); + if (!namespaceDecl || !namespaceDecl->getName().equals("vk")) + return llvm::None; + + const TemplateArgumentList &argList = templateDecl->getTemplateArgs(); + QualType bufferType = argList[0].getAsType(); + unsigned align = + argList.size() > 1 ? argList[1].getAsIntegral().getLimitedValue() : 0; + return std::make_pair(bufferType, align); +} + +bool IsVKBufferPointerType(clang::QualType type) { + return MaybeGetVKBufferPointerParams(type).hasValue(); +} + +QualType GetVKBufferPointerBufferType(clang::QualType type) { + auto bpParams = MaybeGetVKBufferPointerParams(type); + assert(bpParams.hasValue() && + "cannot get pointer type for type that is not a vk::BufferPointer"); + return bpParams.getValue().first; +} + +unsigned GetVKBufferPointerAlignment(clang::QualType type) { + auto bpParams = MaybeGetVKBufferPointerParams(type); + assert( + bpParams.hasValue() && + "cannot get pointer alignment for type that is not a vk::BufferPointer"); + return bpParams.getValue().second; +} +#endif + QualType GetHLSLResourceResultType(QualType type) { // Don't canonicalize the type as to not lose snorm in Buffer const RecordType *RT = type->getAs(); diff --git a/tools/clang/lib/Lex/PPMacroExpansion.cpp b/tools/clang/lib/Lex/PPMacroExpansion.cpp index 64ce8c9182..ebfb93df2e 100644 --- a/tools/clang/lib/Lex/PPMacroExpansion.cpp +++ b/tools/clang/lib/Lex/PPMacroExpansion.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the top level handling of macro expansion for the @@ -1080,7 +1083,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("nullability", true) .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory)) .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread)) - .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow)) + .Case("dataflow_sanitizer", + LangOpts.Sanitize.has(SanitizerKind::DataFlow)) // Objective-C features .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE? .Case("objc_arc", LangOpts.ObjCAutoRefCount) @@ -1180,6 +1184,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("has_trivial_constructor", LangOpts.CPlusPlus) .Case("has_trivial_destructor", LangOpts.CPlusPlus) .Case("has_virtual_destructor", LangOpts.CPlusPlus) + .Case("hlsl_vk_buffer_pointer", LangOpts.SPIRV) .Case("is_abstract", LangOpts.CPlusPlus) .Case("is_base_of", LangOpts.CPlusPlus) .Case("is_class", LangOpts.CPlusPlus) diff --git a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp index 492640c493..db140f4766 100644 --- a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp +++ b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "AlignmentSizeCalculator.h" @@ -277,14 +280,20 @@ std::pair AlignmentSizeCalculator::getAlignmentAndSize( if (recordType != nullptr) { const llvm::StringRef name = recordType->getDecl()->getName(); - if (isTypeInVkNamespace(recordType) && name == "SpirvType") { - const ClassTemplateSpecializationDecl *templateDecl = - cast(recordType->getDecl()); - const uint64_t size = - templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue(); - const uint64_t alignment = - templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue(); - return {alignment, size}; + if (isTypeInVkNamespace(recordType)) { + if (name == "BufferPointer") { + return {8, 8}; // same as uint64_t + } + + if (name == "SpirvType") { + const ClassTemplateSpecializationDecl *templateDecl = + cast(recordType->getDecl()); + const uint64_t size = + templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue(); + const uint64_t alignment = + templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue(); + return {alignment, size}; + } } } diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp index c2b5acff53..6fd0c6d950 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "CapabilityVisitor.h" @@ -200,8 +203,10 @@ void CapabilityVisitor::addCapabilityForType(const SpirvType *type, } // Pointer type else if (const auto *ptrType = dyn_cast(type)) { - addCapabilityForType(ptrType->getPointeeType(), loc, sc); - if (sc == spv::StorageClass::PhysicalStorageBuffer) { + addCapabilityForType(ptrType->getPointeeType(), loc, + ptrType->getStorageClass()); + if (ptrType->getStorageClass() == + spv::StorageClass::PhysicalStorageBuffer) { addExtension(Extension::KHR_physical_storage_buffer, "SPV_KHR_physical_storage_buffer", loc); addCapability(spv::Capability::PhysicalStorageBufferAddresses); diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index 6f6f5f88cd..9c0368f7a1 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // Do not change the inclusion order between "dxc/Support/*" files. @@ -488,6 +491,7 @@ std::vector EmitVisitor::takeBinary() { debugVariableBinary.end()); result.insert(result.end(), annotationsBinary.begin(), annotationsBinary.end()); + result.insert(result.end(), fwdDeclBinary.begin(), fwdDeclBinary.end()); result.insert(result.end(), typeConstantBinary.begin(), typeConstantBinary.end()); result.insert(result.end(), globalVarsBinary.begin(), globalVarsBinary.end()); @@ -1016,6 +1020,28 @@ bool EmitVisitor::visit(SpirvConstantNull *inst) { return true; } +bool EmitVisitor::visit(SpirvConvertPtrToU *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back(getOrAssignResultId(inst->getPtr())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvConvertUToPtr *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back(getOrAssignResultId(inst->getVal())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + bool EmitVisitor::visit(SpirvUndef *inst) { typeHandler.getOrCreateUndef(inst); emitDebugNameForInstruction(getOrAssignResultId(inst), @@ -2012,10 +2038,11 @@ void EmitTypeHandler::initTypeInstruction(spv::Op op) { curTypeInst.push_back(static_cast(op)); } -void EmitTypeHandler::finalizeTypeInstruction() { +void EmitTypeHandler::finalizeTypeInstruction(bool isFwdDecl) { curTypeInst[0] |= static_cast(curTypeInst.size()) << 16; - typeConstantBinary->insert(typeConstantBinary->end(), curTypeInst.begin(), - curTypeInst.end()); + auto binarySection = isFwdDecl ? fwdDeclBinary : typeConstantBinary; + binarySection->insert(binarySection->end(), curTypeInst.begin(), + curTypeInst.end()); } uint32_t EmitTypeHandler::getResultIdForType(const SpirvType *type, @@ -2594,6 +2621,17 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) { curTypeInst.push_back(pointeeType); finalizeTypeInstruction(); } + // Forward pointer types + else if (const auto *fwdPtrType = dyn_cast(type)) { + const SpirvPointerType *ptrType = + context.getForwardReference(fwdPtrType->getPointeeType()); + const uint32_t refId = emitType(ptrType); + initTypeInstruction(spv::Op::OpTypeForwardPointer); + curTypeInst.push_back(refId); + curTypeInst.push_back(static_cast(ptrType->getStorageClass())); + finalizeTypeInstruction(true); + return refId; + } // Function types else if (const auto *fnType = dyn_cast(type)) { const uint32_t retTypeId = emitType(fnType->getReturnType()); diff --git a/tools/clang/lib/SPIRV/EmitVisitor.h b/tools/clang/lib/SPIRV/EmitVisitor.h index 2f5d99b89d..1f9b0939e6 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.h +++ b/tools/clang/lib/SPIRV/EmitVisitor.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_EMITVISITOR_H #define LLVM_CLANG_SPIRV_EMITVISITOR_H @@ -49,15 +53,15 @@ class EmitTypeHandler { EmitTypeHandler(ASTContext &astCtx, SpirvContext &spvContext, const SpirvCodeGenOptions &opts, FeatureManager &featureMgr, std::vector *debugVec, - std::vector *decVec, + std::vector *decVec, std::vector *fwdVec, std::vector *typesVec, const std::function &takeNextIdFn) : astContext(astCtx), context(spvContext), featureManager(featureMgr), debugVariableBinary(debugVec), annotationsBinary(decVec), - typeConstantBinary(typesVec), takeNextIdFunction(takeNextIdFn), - emittedConstantInts({}), emittedConstantFloats({}), - emittedConstantComposites({}), emittedConstantNulls({}), - emittedUndef({}), emittedConstantBools() { + fwdDeclBinary(fwdVec), typeConstantBinary(typesVec), + takeNextIdFunction(takeNextIdFn), emittedConstantInts({}), + emittedConstantFloats({}), emittedConstantComposites({}), + emittedConstantNulls({}), emittedUndef({}), emittedConstantBools() { assert(decVec); assert(typesVec); } @@ -120,7 +124,7 @@ class EmitTypeHandler { private: void initTypeInstruction(spv::Op op); - void finalizeTypeInstruction(); + void finalizeTypeInstruction(bool isFwdDecl = false); // Returns the result-id for the given type and decorations. If a type with // the same decorations have already been used, it returns the existing @@ -161,6 +165,7 @@ class EmitTypeHandler { std::vector curDecorationInst; std::vector *debugVariableBinary; std::vector *annotationsBinary; + std::vector *fwdDeclBinary; std::vector *typeConstantBinary; std::function takeNextIdFunction; @@ -207,7 +212,7 @@ class EmitVisitor : public Visitor { : Visitor(opts, spvCtx), astContext(astCtx), featureManager(featureMgr), id(0), typeHandler(astCtx, spvCtx, opts, featureMgr, &debugVariableBinary, - &annotationsBinary, &typeConstantBinary, + &annotationsBinary, &fwdDeclBinary, &typeConstantBinary, [this]() -> uint32_t { return takeNextId(); }), debugMainFileId(0), debugInfoExtInstId(0), debugLineStart(0), debugLineEnd(0), debugColumnStart(0), debugColumnEnd(0), @@ -254,6 +259,8 @@ class EmitVisitor : public Visitor { bool visit(SpirvConstantFloat *) override; bool visit(SpirvConstantComposite *) override; bool visit(SpirvConstantNull *) override; + bool visit(SpirvConvertPtrToU *) override; + bool visit(SpirvConvertUToPtr *) override; bool visit(SpirvUndef *) override; bool visit(SpirvCompositeConstruct *) override; bool visit(SpirvCompositeExtract *) override; @@ -438,7 +445,9 @@ class EmitVisitor : public Visitor { // All annotation instructions: OpDecorate, OpMemberDecorate, OpGroupDecorate, // OpGroupMemberDecorate, and OpDecorationGroup. std::vector annotationsBinary; - // All type and constant instructions + // All forward pointer type declaration instructions + std::vector fwdDeclBinary; + // All other type and constant instructions std::vector typeConstantBinary; // All global variable declarations (all OpVariable instructions whose Storage // Class is not Function) diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index a5bc4a4aa8..b31d19b5d8 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "LowerTypeVisitor.h" @@ -549,7 +552,9 @@ const SpirvType *LowerTypeVisitor::lowerType(QualType type, // checking the general struct type. if (const auto *spvType = lowerResourceType(type, rule, isRowMajor, srcLoc)) { - spvContext.registerStructDeclForSpirvType(spvType, decl); + if (!isa(spvType)) { + spvContext.registerStructDeclForSpirvType(spvType, decl); + } return spvType; } @@ -809,6 +814,32 @@ const SpirvType *LowerTypeVisitor::lowerVkTypeInVkNamespace( QualType realType = hlsl::GetHLSLResourceTemplateParamType(type); return lowerType(realType, rule, llvm::None, srcLoc); } + if (name == "BufferPointer") { + const size_t visitedTypeStackSize = visitedTypeStack.size(); + (void)visitedTypeStackSize; // suppress unused warning (used only in assert) + + for (QualType t : visitedTypeStack) { + if (t == type) { + return spvContext.getForwardPointerType(type); + } + } + + QualType realType = hlsl::GetHLSLResourceTemplateParamType(type); + if (rule == SpirvLayoutRule::Void) { + rule = spvOptions.sBufferLayoutRule; + } + visitedTypeStack.push_back(type); + + const SpirvType *spirvType = lowerType(realType, rule, llvm::None, srcLoc); + const auto *pointerType = spvContext.getPointerType( + spirvType, spv::StorageClass::PhysicalStorageBuffer); + spvContext.registerForwardReference(type, pointerType); + + assert(visitedTypeStack.back() == type); + visitedTypeStack.pop_back(); + assert(visitedTypeStack.size() == visitedTypeStackSize); + return pointerType; + } emitError("unknown type %0 in vk namespace", srcLoc) << type; return nullptr; } diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.h b/tools/clang/lib/SPIRV/LowerTypeVisitor.h index 96235d1508..5b26b67e3a 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.h +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_LIB_SPIRV_LOWERTYPEVISITOR_H @@ -137,6 +140,7 @@ class LowerTypeVisitor : public Visitor { AlignmentSizeCalculator alignmentCalc; /// alignment calculator bool useArrayForMat1xN; /// SPIR-V array for HLSL Matrix 1xN SpirvBuilder &spvBuilder; + SmallVector visitedTypeStack; // for type recursion detection }; } // end namespace spirv diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index 1275e2b252..6b3f43fc77 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "clang/SPIRV/SpirvBuilder.h" @@ -202,6 +205,14 @@ SpirvInstruction *SpirvBuilder::createLoad(QualType resultType, instruction->setLayoutRule(pointer->getLayoutRule()); instruction->setRValue(true); + if (pointer->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer) { + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t align, size, stride; + std::tie(align, size) = alignmentCalc.getAlignmentAndSize( + resultType, pointer->getLayoutRule(), llvm::None, &stride); + instruction->setAlignment(align); + } + if (pointer->containsAliasComponent() && isAKindOfStructuredOrByteBuffer(resultType)) { instruction->setStorageClass(spv::StorageClass::Uniform); @@ -300,6 +311,16 @@ SpirvStore *SpirvBuilder::createStore(SpirvInstruction *address, new (context) SpirvStore(loc, address, source, llvm::None, range); insertPoint->addInstruction(instruction); + if (address->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer && + address->getAstResultType() != QualType()) { // exclude raw buffer + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t align, size, stride; + std::tie(align, size) = alignmentCalc.getAlignmentAndSize( + address->getAstResultType(), address->getLayoutRule(), llvm::None, + &stride); + instruction->setAlignment(align); + } + if (address->isRasterizerOrdered()) { createEndInvocationInterlockEXT(loc, range); } @@ -491,6 +512,22 @@ SpirvImageTexelPointer *SpirvBuilder::createImageTexelPointer( return instruction; } +SpirvConvertPtrToU *SpirvBuilder::createConvertPtrToU(SpirvInstruction *ptr, + QualType type) { + auto *instruction = new (context) SpirvConvertPtrToU(ptr, type); + instruction->setRValue(true); + insertPoint->addInstruction(instruction); + return instruction; +} + +SpirvConvertUToPtr *SpirvBuilder::createConvertUToPtr(SpirvInstruction *val, + QualType type) { + auto *instruction = new (context) SpirvConvertUToPtr(val, type); + instruction->setRValue(false); + insertPoint->addInstruction(instruction); + return instruction; +} + spv::ImageOperandsMask SpirvBuilder::composeImageOperandsMask( SpirvInstruction *bias, SpirvInstruction *lod, const std::pair &grad, diff --git a/tools/clang/lib/SPIRV/SpirvContext.cpp b/tools/clang/lib/SPIRV/SpirvContext.cpp index 6af36eb691..47dfc67433 100644 --- a/tools/clang/lib/SPIRV/SpirvContext.cpp +++ b/tools/clang/lib/SPIRV/SpirvContext.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include @@ -328,6 +331,29 @@ const HybridPointerType *SpirvContext::getPointerType(QualType pointee, return result; } +const ForwardPointerType * +SpirvContext::getForwardPointerType(QualType pointee) { + assert(hlsl::IsVKBufferPointerType(pointee)); + + auto foundPointee = forwardPointerTypes.find(pointee); + if (foundPointee != forwardPointerTypes.end()) { + return foundPointee->second; + } + + return forwardPointerTypes[pointee] = new (this) ForwardPointerType(pointee); +} + +const SpirvPointerType *SpirvContext::getForwardReference(QualType type) { + return forwardReferences[type]; +} + +void SpirvContext::registerForwardReference( + QualType type, const SpirvPointerType *pointerType) { + assert(pointerType->getStorageClass() == + spv::StorageClass::PhysicalStorageBuffer); + forwardReferences[type] = pointerType; +} + FunctionType * SpirvContext::getFunctionType(const SpirvType *ret, llvm::ArrayRef param) { diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 579af04ea6..7cc84fa2fc 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements a SPIR-V emitter class that takes in HLSL AST and emits @@ -1233,12 +1237,17 @@ SpirvInstruction *SpirvEmitter::doExpr(const Expr *expr, } else if (isa(expr)) { assert(curThis); result = curThis; - } else if (isa(expr)) { + } else if (const auto *constructExpr = dyn_cast(expr)) { // For RayQuery type, we should not explicitly initialize it using // CXXConstructExpr e.g., RayQuery<0> r = RayQuery<0>() is the same as we do // not have a variable initialization. Setting nullptr for the SPIR-V // instruction used for expr will let us skip the variable initialization. - if (!hlsl::IsHLSLRayQueryType(expr->getType())) + if (hlsl::IsVKBufferPointerType(expr->getType())) { + const Expr *arg = constructExpr->getArg(0); + SpirvInstruction *value = loadIfGLValue(arg, arg->getSourceRange()); + result = spvBuilder.createConvertUToPtr(value, expr->getType()); + result->setRValue(); + } else if (!hlsl::IsHLSLRayQueryType(expr->getType())) result = curThis; } else if (const auto *unaryExpr = dyn_cast(expr)) { result = doUnaryExprOrTypeTraitExpr(unaryExpr); @@ -1543,7 +1552,23 @@ void SpirvEmitter::doFunctionDecl(const FunctionDecl *decl) { // Create all parameters. for (uint32_t i = 0; i < decl->getNumParams(); ++i) { const ParmVarDecl *paramDecl = decl->getParamDecl(i); - (void)declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn); + QualType paramType = paramDecl->getType(); + auto *param = + declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn); +#ifdef ENABLE_SPIRV_CODEGEN + if (hlsl::IsVKBufferPointerType(paramType)) { + Optional isRowMajor = llvm::None; + QualType desugaredType = desugarType(paramType, &isRowMajor); + if (hlsl::IsVKBufferPointerType(desugaredType)) { + spvBuilder.decorateWithLiterals( + param, + static_cast(paramDecl->hasAttr() + ? spv::Decoration::AliasedPointer + : spv::Decoration::RestrictPointer), + {}, loc); + } + } +#endif } if (decl->hasBody()) { @@ -1644,6 +1669,15 @@ bool SpirvEmitter::validateVKAttributes(const NamedDecl *decl) { loc); success = false; } + +#ifdef ENABLE_SPIRV_CODEGEN + if (hlsl::IsVKBufferPointerType(cast(decl)->getType())) { + emitError("vk::push_constant attribute cannot be used on declarations " + "with vk::BufferPointer type", + loc); + success = false; + } +#endif } // vk::shader_record_nv is supported only on cbuffer/ConstantBuffer @@ -1951,6 +1985,11 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) { return; } + if (hlsl::IsVKBufferPointerType(decl->getType()) && !decl->hasInit()) { + emitError("vk::BufferPointer has no default constructor", loc); + return; + } + // We can have VarDecls inside cbuffer/tbuffer. For those VarDecls, we need // to emit their cbuffer/tbuffer as a whole and access each individual one // using access chains. @@ -2037,10 +2076,24 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) { needsLegalization = true; } - if (var != nullptr && decl->hasAttrs()) { - declIdMapper.decorateWithIntrinsicAttrs(decl, var); - if (auto attr = decl->getAttr()) { - var->setStorageClass(static_cast(attr->getStclass())); + if (var != nullptr) { + Optional isRowMajor = llvm::None; + QualType desugaredType = desugarType(decl->getType(), &isRowMajor); + if (hlsl::IsVKBufferPointerType(desugaredType)) { + spvBuilder.decorateWithLiterals( + var, + static_cast(decl->hasAttr() + ? spv::Decoration::AliasedPointer + : spv::Decoration::RestrictPointer), + {}, loc); + } + + if (decl->hasAttrs()) { + declIdMapper.decorateWithIntrinsicAttrs(decl, var); + if (auto attr = decl->getAttr()) { + var->setStorageClass( + static_cast(attr->getStclass())); + } } } @@ -3665,6 +3718,12 @@ SpirvInstruction *SpirvEmitter::doCastExpr(const CastExpr *expr, } case CastKind::CK_ToVoid: return nullptr; + case CastKind::CK_VK_BufferPointerToIntegral: { + return spvBuilder.createConvertPtrToU(doExpr(subExpr, range), toType); + } + case CastKind::CK_VK_IntegralToBufferPointer: { + return spvBuilder.createConvertUToPtr(doExpr(subExpr, range), toType); + } default: emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc()) << expr->getCastKindName() << expr->getSourceRange(); @@ -5442,6 +5501,8 @@ SpirvEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr, case IntrinsicOp::MOP_WorldRayDirection: case IntrinsicOp::MOP_WorldRayOrigin: return processRayQueryIntrinsics(expr, opcode); + case IntrinsicOp::MOP_GetBufferContents: + return processIntrinsicGetBufferContents(expr); default: emitError("intrinsic '%0' method unimplemented", expr->getCallee()->getExprLoc()) @@ -7021,6 +7082,12 @@ SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal, if (const auto *recordType = valType->getAs()) { assert(recordType->isStructureType()); + if (isTypeInVkNamespace(recordType) && + recordType->getDecl()->getName().equals("BufferPointer")) { + // Uniquely among structs, vk::BufferPointer lowers to a pointer type. + return srcVal; + } + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, spvBuilder); const StructType *spirvStructType = @@ -9403,6 +9470,14 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { case hlsl::IntrinsicOp::IOP_EvaluateAttributeSnapped: { retVal = processEvaluateAttributeAt(callExpr, hlslOpcode, srcLoc, srcRange); break; + } + case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: { + retVal = processIntrinsicPointerCast(callExpr, false); + break; + } + case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: { + retVal = processIntrinsicPointerCast(callExpr, true); + break; } INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true); INTRINSIC_SPIRV_OP_CASE(ddx_coarse, DPdxCoarse, false); @@ -10782,6 +10857,50 @@ SpirvEmitter::processIntrinsicClamp(const CallExpr *callExpr) { loc, range); } +SpirvInstruction * +SpirvEmitter::processIntrinsicPointerCast(const CallExpr *callExpr, + bool isStatic) { + const Expr *argExpr = callExpr->getArg(0); + SpirvInstruction *ptr = doExpr(argExpr); + QualType srcType = argExpr->getType(); + QualType destType = callExpr->getType(); + QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType); + QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType); + return srcTypeArg == destTypeArg + ? ptr + : spvBuilder.createUnaryOp(spv::Op::OpBitcast, destType, ptr, + callExpr->getExprLoc(), + callExpr->getSourceRange()); +} + +SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents( + const CXXMemberCallExpr *callExpr) { + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, + spvBuilder); + Expr *obj = callExpr->getImplicitObjectArgument(); + SpirvInstruction *bufferPointer = doExpr(obj); + if (!bufferPointer) + return nullptr; + unsigned align = hlsl::GetVKBufferPointerAlignment(obj->getType()); + lowerTypeVisitor.visitInstruction(bufferPointer); + + const SpirvPointerType *bufferPointerType = + dyn_cast(bufferPointer->getResultType()); + SpirvLoad *retVal = + spvBuilder.createLoad(bufferPointerType->getPointeeType(), bufferPointer, + callExpr->getLocStart()); + if (!align) { + QualType bufferType = hlsl::GetVKBufferPointerBufferType(obj->getType()); + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t stride; + std::tie(align, std::ignore) = alignmentCalc.getAlignmentAndSize( + bufferType, retVal->getLayoutRule(), llvm::None, &stride); + } + retVal->setAlignment(align); + retVal->setRValue(false); + return retVal; +} + SpirvInstruction * SpirvEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr, bool isDevice, bool groupSync, diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index eca038527f..0a5ff308c2 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file defines a SPIR-V emitter class that takes in HLSL AST and emits @@ -491,6 +495,15 @@ class SpirvEmitter : public ASTConsumer { /// Processes the 'lit' intrinsic function. SpirvInstruction *processIntrinsicLit(const CallExpr *); + /// Processes the 'vk::static_pointer_cast' and 'vk_reinterpret_pointer_cast' + /// intrinsic functions. + SpirvInstruction *processIntrinsicPointerCast(const CallExpr *, + bool isStatic); + + /// Processes the vk::BufferPointer intrinsic function 'Get'. + SpirvInstruction * + processIntrinsicGetBufferContents(const CXXMemberCallExpr *); + /// Processes the 'GroupMemoryBarrier', 'GroupMemoryBarrierWithGroupSync', /// 'DeviceMemoryBarrier', 'DeviceMemoryBarrierWithGroupSync', /// 'AllMemoryBarrier', and 'AllMemoryBarrierWithGroupSync' intrinsic diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp index 21aada9e82..6deb11d946 100644 --- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp +++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the in-memory representation of SPIR-V instructions. @@ -57,6 +61,8 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantInteger) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantFloat) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantComposite) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantNull) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertPtrToU) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertUToPtr) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvUndef) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeConstruct) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeExtract) @@ -620,6 +626,28 @@ bool SpirvConstantNull::operator==(const SpirvConstantNull &that) const { astResultType == that.astResultType; } +SpirvConvertPtrToU::SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type, + SourceLocation loc, SourceRange range) + : SpirvInstruction(IK_ConvertPtrToU, spv::Op::OpConvertPtrToU, type, loc, + range), + ptr(ptr) {} + +bool SpirvConvertPtrToU::operator==(const SpirvConvertPtrToU &that) const { + return opcode == that.opcode && resultType == that.resultType && + astResultType == that.astResultType && ptr == that.ptr; +} + +SpirvConvertUToPtr::SpirvConvertUToPtr(SpirvInstruction *val, QualType type, + SourceLocation loc, SourceRange range) + : SpirvInstruction(IK_ConvertUToPtr, spv::Op::OpConvertUToPtr, type, loc, + range), + val(val) {} + +bool SpirvConvertUToPtr::operator==(const SpirvConvertUToPtr &that) const { + return opcode == that.opcode && resultType == that.resultType && + astResultType == that.astResultType && val == that.val; +} + SpirvUndef::SpirvUndef(QualType type) : SpirvInstruction(IK_Undef, spv::Op::OpUndef, type, /*SourceLocation*/ {}) {} diff --git a/tools/clang/lib/Sema/SemaCast.cpp b/tools/clang/lib/Sema/SemaCast.cpp index 10668dc388..f5a864e2b6 100644 --- a/tools/clang/lib/Sema/SemaCast.cpp +++ b/tools/clang/lib/Sema/SemaCast.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements semantic analysis for cast expressions, including @@ -1543,6 +1546,20 @@ TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr, QualType DestType, if (InitSeq.isConstructorInitialization()) Kind = CK_ConstructorConversion; +#ifdef ENABLE_SPIRV_CODEGEN + // Special cases for vk::BufferPointer. + else if (hlsl::IsVKBufferPointerType(SrcExpr.get()->getType()) && + DestType->isIntegerType() && CCK == Sema::CCK_CStyleCast) { + Kind = CK_VK_BufferPointerToIntegral; + SrcExpr = Result; + return TC_Success; + } else if (hlsl::IsVKBufferPointerType(DestType) && + SrcExpr.get()->getType()->isIntegerType()) { + Kind = CK_VK_IntegralToBufferPointer; + SrcExpr = Result; + return TC_Success; + } +#endif else Kind = CK_NoOp; diff --git a/tools/clang/lib/Sema/SemaExprCXX.cpp b/tools/clang/lib/Sema/SemaExprCXX.cpp index f46bb0ad9f..4723bc93e9 100644 --- a/tools/clang/lib/Sema/SemaExprCXX.cpp +++ b/tools/clang/lib/Sema/SemaExprCXX.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// /// /// \file @@ -1052,6 +1055,31 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, // corresponding cast expression. if (Exprs.size() == 1 && !ListInitialization) { Expr *Arg = Exprs[0]; +#ifdef ENABLE_SPIRV_CODEGEN + if (hlsl::IsVKBufferPointerType(Ty) && Arg->getType()->isIntegerType()) { + for (auto *ctor : Ty->getAsCXXRecordDecl()->ctors()) { + if (auto *functionType = ctor->getType()->getAs()) { + if (functionType->getNumParams() != 1 || + !functionType->getParamType(0)->isIntegerType()) + continue; + + CanQualType argType = Arg->getType()->getCanonicalTypeUnqualified(); + if (!Arg->isRValue()) { + Arg = ImpCastExprToType(Arg, argType, CK_LValueToRValue).get(); + } + if (argType != Context.UnsignedLongLongTy) { + Arg = ImpCastExprToType(Arg, Context.UnsignedLongLongTy, + CK_IntegralCast) + .get(); + } + return CXXConstructExpr::Create( + Context, Ty, TyBeginLoc, ctor, false, {Arg}, false, false, false, + false, CXXConstructExpr::ConstructionKind::CK_Complete, + SourceRange(LParenLoc, RParenLoc)); + } + } + } +#endif return BuildCXXFunctionalCastExpr(TInfo, LParenLoc, Arg, RParenLoc); } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index d20daa0ac0..f001cb70d9 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6,6 +6,9 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // // This file implements the semantic support for HLSL. // // // /////////////////////////////////////////////////////////////////////////////// @@ -195,6 +198,7 @@ enum ArBasicKind { AR_OBJECT_VK_LITERAL, AR_OBJECT_VK_SPV_INTRINSIC_TYPE, AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID, + AR_OBJECT_VK_BUFFER_POINTER, #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -550,6 +554,7 @@ const UINT g_uBasicKindProps[] = { BPROP_OBJECT, // AR_OBJECT_VK_LITERAL, BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE use recordType BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID use recordType + BPROP_OBJECT, // AR_OBJECT_VK_BUFFER_POINTER use recordType #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1232,6 +1237,9 @@ static const ArBasicKind g_AnyOutputRecordCT[] = { static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT, AR_BASIC_UNKNOWN}; +static const ArBasicKind g_VKBufferPointerCT[] = {AR_OBJECT_VK_BUFFER_POINTER, + AR_BASIC_UNKNOWN}; + // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value. const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_NullCT, // LICOMPTYPE_VOID @@ -1287,6 +1295,7 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_GroupNodeOutputRecordsCT, // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS g_DxHitObjectCT, // LICOMPTYPE_HIT_OBJECT + g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER }; static_assert( ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT, @@ -1345,6 +1354,7 @@ static const ArBasicKind g_ArBasicKindsAsTypes[] = { AR_OBJECT_VK_SPIRV_TYPE, AR_OBJECT_VK_SPIRV_OPAQUE_TYPE, AR_OBJECT_VK_INTEGRAL_CONSTANT, AR_OBJECT_VK_LITERAL, AR_OBJECT_VK_SPV_INTRINSIC_TYPE, AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID, + AR_OBJECT_VK_BUFFER_POINTER, #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1451,6 +1461,7 @@ static const uint8_t g_ArBasicKindsTemplateCount[] = { 1, // AR_OBJECT_VK_LITERAL, 1, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE 1, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID + 2, // AR_OBJECT_VK_BUFFER_POINTER #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1599,6 +1610,7 @@ static const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = { {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_LITERAL, {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID + {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_BUFFER_POINTER #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1763,6 +1775,7 @@ static const char *g_ArBasicTypeNames[] = { "Literal", "ext_type", "ext_result_id", + "BufferPointer", #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -2981,6 +2994,7 @@ class HLSLExternalSource : public ExternalSemaSource { ClassTemplateDecl *m_vkIntegralConstantTemplateDecl; ClassTemplateDecl *m_vkLiteralTemplateDecl; + ClassTemplateDecl *m_vkBufferPointerTemplateDecl; // Declarations for Work Graph Output Record types ClassTemplateDecl *m_GroupNodeOutputRecordsTemplateDecl; @@ -3486,6 +3500,25 @@ class HLSLExternalSource : public ExternalSemaSource { templateTypeParmDecls.push_back(templateTypeParmDecl); continue; } + if (pArgs[i].uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) { + if (TInfo == nullptr) { + TInfo = m_sema->getASTContext().CreateTypeSourceInfo( + m_context->UnsignedIntTy, 0); + } + IdentifierInfo *idT = &context.Idents.get("T"); + IdentifierInfo *idA = &context.Idents.get("A"); + TemplateTypeParmDecl *templateTypeParmDecl = + TemplateTypeParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, 0, + 0, idT, TypenameTrue, + ParameterPackFalse); + NonTypeTemplateParmDecl *nonTypeTemplateParmDecl = + NonTypeTemplateParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, + 0, 1, idA, context.UnsignedIntTy, + ParameterPackFalse, TInfo); + templateTypeParmDecl->setDefaultArgument(TInfo); + templateTypeParmDecls.push_back(templateTypeParmDecl); + templateTypeParmDecls.push_back(nonTypeTemplateParmDecl); + } } return templateTypeParmDecls; } @@ -3554,6 +3587,19 @@ class HLSLExternalSource : public ExternalSemaSource { case LICOMPTYPE_HIT_OBJECT: paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT)); break; + case LICOMPTYPE_VK_BUFFER_POINTER: { + const ArBasicKind *match = + std::find(g_ArBasicKindsAsTypes, + &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], + AR_OBJECT_VK_BUFFER_POINTER); + DXASSERT(match != + &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], + "otherwise can't find constant in basic kinds"); + size_t index = match - g_ArBasicKindsAsTypes; + paramTypes.push_back( + m_sema->getASTContext().getTypeDeclType(m_objectTypeDecls[index])); + break; + } default: DXASSERT(false, "Argument type of intrinsic function is not " "supported"); @@ -3932,6 +3978,12 @@ class HLSLExternalSource : public ExternalSemaSource { recordDecl = DeclareTemplateTypeWithHandleInDeclContext( *m_context, m_vkNSDecl, typeName, 1, nullptr); recordDecl->setImplicit(true); + } else if (kind == AR_OBJECT_VK_BUFFER_POINTER) { + if (!m_vkNSDecl) + continue; + recordDecl = DeclareVkBufferPointerType(*m_context, m_vkNSDecl); + recordDecl->setImplicit(true); + m_vkBufferPointerTemplateDecl = recordDecl->getDescribedClassTemplate(); } #endif else if (templateArgCount == 0) { @@ -4044,7 +4096,8 @@ class HLSLExternalSource : public ExternalSemaSource { HLSLExternalSource() : m_matrixTemplateDecl(nullptr), m_vectorTemplateDecl(nullptr), m_vkIntegralConstantTemplateDecl(nullptr), - m_vkLiteralTemplateDecl(nullptr), m_hlslNSDecl(nullptr), + m_vkLiteralTemplateDecl(nullptr), + m_vkBufferPointerTemplateDecl(nullptr), m_hlslNSDecl(nullptr), m_vkNSDecl(nullptr), m_dxNSDecl(nullptr), m_context(nullptr), m_sema(nullptr), m_hlslStringTypedef(nullptr) { memset(m_matrixTypes, 0, sizeof(m_matrixTypes)); @@ -4802,7 +4855,8 @@ class HLSLExternalSource : public ExternalSemaSource { case AR_OBJECT_NODE_OUTPUT_ARRAY: case AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY: case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS: - case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: { + case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: + case AR_OBJECT_VK_BUFFER_POINTER: { const ArBasicKind *match = std::find( g_ArBasicKindsAsTypes, &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind); @@ -5318,6 +5372,8 @@ class HLSLExternalSource : public ExternalSemaSource { << type << GetMatrixOrVectorElementType(type); } return valid; + } else if (hlsl::IsVKBufferPointerType(qt)) { + return true; } else if (qt->isStructureOrClassType()) { const RecordType *recordType = qt->getAs(); objectKind = ClassifyRecordType(recordType); @@ -6790,6 +6846,7 @@ bool HLSLExternalSource::MatchArguments( if (pIntrinsic->pArgs[0].qwUsage && pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_TYPE && pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION && + pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION_2 && pIntrinsic->pArgs[0].uComponentTypeId != INTRIN_COMPTYPE_FROM_NODEOUTPUT) { CAB(pIntrinsic->pArgs[0].uTemplateId < MaxIntrinsicArgs, 0); @@ -6830,7 +6887,8 @@ bool HLSLExternalSource::MatchArguments( // Check template. if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE || - pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION) { + pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION || + pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) { continue; // Already verified that this is available. } if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) { @@ -6999,6 +7057,14 @@ bool HLSLExternalSource::MatchArguments( } else { pNewType = functionTemplateTypeArg; } + } else if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) { + if (i == 0 && + (builtinOp == hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast || + builtinOp == hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast)) { + pNewType = Args[0]->getType(); + } else { + badArgIdx = std::min(badArgIdx, i); + } } else if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) { if (objectElement.isNull()) { @@ -9685,6 +9751,11 @@ bool HLSLExternalSource::CanConvert(SourceLocation loc, Expr *sourceExpr, return false; } + // Cast vk::BufferPointer to pointer address. + if (SourceInfo.EltKind == AR_OBJECT_VK_BUFFER_POINTER) { + return TargetInfo.EltKind == AR_BASIC_UINT64; + } + // Cast cbuffer to its result value. if ((SourceInfo.EltKind == AR_OBJECT_CONSTANT_BUFFER || SourceInfo.EltKind == AR_OBJECT_TEXTURE_BUFFER) && @@ -11533,6 +11604,30 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) { return false; } +static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, + bool isStatic) { + const Expr *argExpr = CE->getArg(0); + QualType srcType = argExpr->getType(); + QualType destType = CE->getType(); + QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType); + QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType); + + if (isStatic && srcTypeArg != destTypeArg && + !S.IsDerivedFrom(srcTypeArg, destTypeArg)) { + S.Diags.Report(CE->getExprLoc(), + diag::err_hlsl_vk_static_pointer_cast_type); + return true; + } + + if (hlsl::GetVKBufferPointerAlignment(destType) > + hlsl::GetVKBufferPointerAlignment(srcType)) { + S.Diags.Report(CE->getExprLoc(), diag::err_hlsl_vk_pointer_cast_alignment); + return true; + } + + return false; +} + // Check HLSL call constraints, not fatal to creating the AST. void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, const FunctionProtoType *Proto) { @@ -11551,6 +11646,12 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, case hlsl::IntrinsicOp::IOP_Barrier: CheckBarrierCall(*this, FDecl, TheCall); break; + case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: + CheckVKBufferPointerCast(*this, FDecl, TheCall, false); + break; + case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: + CheckVKBufferPointerCast(*this, FDecl, TheCall, true); + break; default: break; } @@ -13801,6 +13902,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A, A.getRange(), S.Context, A.getAttributeSpellingListIndex()); break; // SPIRV Change Starts + case AttributeList::AT_VKAliasedPointer: { + declAttr = ::new (S.Context) VKAliasedPointerAttr( + A.getRange(), S.Context, A.getAttributeSpellingListIndex()); + } break; case AttributeList::AT_VKDecorateIdExt: { if (A.getNumArgs() == 0 || !A.getArg(0).is()) { Handled = false; diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl new file mode 100644 index 0000000000..f0f5c54a16 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -spirv -E main -T cs_6_7 %s | FileCheck %s + +// Bug was causing alignment miss + +struct Content { + int a; +}; + +typedef vk::BufferPointer BufferContent; +typedef vk::BufferPointer BufferBuffer; + +RWStructuredBuffer rwbuf; + +void foo(BufferContent bc) { + bc.Get().a = 1; +} + +[numthreads(1, 1, 1)] +void main() { + foo(rwbuf[0].Get()); +} + +// CHECK: [[L0:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} %{{[_0-9A-Za-z]*}} Aligned 8 +// CHECK: [[L1:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} [[L0]] Aligned 8 +// CHECK: [[L2:%[_0-9A-Za-z]*]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[L1]] %int_0 +// CHECK: OpStore [[L2]] %int_1 Aligned 4 + + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl new file mode 100644 index 0000000000..fc5b9edad0 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl @@ -0,0 +1,72 @@ +// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s + +struct Globals_s +{ + float4 g_vSomeConstantA; + float4 g_vTestFloat4; + float4 g_vSomeConstantB; +}; + +typedef vk::BufferPointer Globals_p; + +struct TestPushConstant_t +{ + Globals_p m_nBufferDeviceAddress; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +cbuffer cbuf { + [[vk::aliased_pointer]] Globals_p bp; +} + +// CHECK: OpDecorate [[BP0:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: OpDecorate [[BP1:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: OpDecorate [[BP:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1 +// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK: [[V4C:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]] +// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[I1:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 1 +// CHECK: [[GS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]] +// CHECK: [[PGS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GS]] +// CHECK: [[TT:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGS]] +// CHECK: [[PTT:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[TT]] +// CHECK: [[PFV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[V4FLOAT]] +// CHECK: [[PPGS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGS]] +// CHECK: [[PBV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] + +void f([[vk::aliased_pointer]] Globals_p bp) { +} + +float4 MainPs(void) : SV_Target0 +{ + float4 vTest = float4(1.0,0.0,0.0,0.0); + [[vk::aliased_pointer]] Globals_p bp0 = Globals_p(g_PushConstants.m_nBufferDeviceAddress); + [[vk::aliased_pointer]] Globals_p bp1 = Globals_p(g_PushConstants.m_nBufferDeviceAddress); + bp0.Get().g_vTestFloat4 = vTest; + f(bp0); + return bp1.Get().g_vTestFloat4; // Returns float4(1.0,0.0,0.0,0.0) +} + +// CHECK: [[GP:%[_0-9A-Za-z]*]] = OpVariable [[PTT]] PushConstant +// CHECK: [[VTEST:%[0-9A-Za-z]*]] = OpVariable [[PFV4FLOAT]] Function +// CHECK: OpStore [[VTEST]] [[V4C]] +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad %_ptr_PhysicalStorageBuffer_Globals_s [[X1]] +// CHECK: OpStore [[BP0]] [[X2]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]] +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[X3]] +// CHECK: OpStore [[BP1]] [[X4]] +// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[VTEST]] +// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP0]] Aligned 16 +// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X6]] [[I1]] +// CHECK: OpStore [[X7]] [[X5]] Aligned 16 +// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP1]] Aligned 16 +// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X8]] [[I1]] +// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X9]] Aligned 16 +// CHECK: OpReturnValue [[X10]] + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl new file mode 100644 index 0000000000..992d8b39fd --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl @@ -0,0 +1,39 @@ +// RUN: %dxc -spirv -fcgl -T ps_6_0 %s | FileCheck %s + +struct S { + uint u; +}; + +typedef vk::BufferPointer BP; + +struct PC { + BP bp; +}; + +[[vk::push_constant]] PC pc; + +// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0 +// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0 +// CHECK: [[S:%[_0-9A-Za-z]*]] = OpTypeStruct [[UINT]] +// CHECK: [[PS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[S]] +// CHECK: [[PU:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[UINT]] +// CHECK: [[U1:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 1 +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpVariable %{{[_0-9A-Za-z]*}} PushConstant + +void main() +{ +// CHECK: [[IN:%[_0-9A-Za-z]*]] = OpVariable +// CHECK: [[OUT:%[_0-9A-Za-z]*]] = OpVariable + uint u0, u1; + +// CHECK: [[X1:%[_0-9]+]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[PC]] [[I0]] +// CHECK: [[X2:%[_0-9]+]] = OpLoad [[PS]] [[X1]] Aligned 4 +// CHECK: [[X3:%[_0-9]+]] = OpAccessChain [[PU]] [[X2]] [[I0]] +// CHECK: [[X4:%[_0-9]+]] = OpLoad [[UINT]] [[IN]] +// CHECK: [[X5:%[_0-9]+]] = OpAtomicExchange [[UINT]] [[X3]] [[U1]] [[U0]] [[X4]] +// CHECK: OpStore [[OUT]] [[X5]] + InterlockedExchange(pc.bp.Get().u, u0, u1); +} + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl new file mode 100644 index 0000000000..86cf48c41e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl @@ -0,0 +1,19 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + float a; +}; + +typedef vk::BufferPointer BufferContent; + +[[vk::push_constant]] +BufferContent buffer; + +[numthreads(1, 1, 1)] +void main() { + float tmp = buffer.Get().a; + buffer.Get().a = tmp; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl new file mode 100644 index 0000000000..09585a7664 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl @@ -0,0 +1,19 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Globals_s { + float4 a; +}; + +typedef vk::BufferPointer Globals_p; +typedef vk::BufferPointer Globals_pp; + +[[vk::push_constant]] +Globals_pp bda; + +[numthreads(1, 1, 1)] +void main() { + float4 r = bda.Get().Get().a; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl new file mode 100644 index 0000000000..e803b5b754 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl @@ -0,0 +1,19 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + uint a; +}; + +typedef vk::BufferPointer BufferContent; + +[[vk::push_constant]] +BufferContent buffer; + +[numthreads(1, 1, 1)] +void main() { + uint data = buffer.Get(); + buffer.Get() = data; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl new file mode 100644 index 0000000000..1029aa7f2e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl @@ -0,0 +1,18 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + uint a; +}; + +typedef vk::BufferPointer BufferContent; + +[[vk::push_constant]] +BufferContent buffer; + +[numthreads(1, 1, 1)] +void main() { + buffer.Get() = 1; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl new file mode 100644 index 0000000000..62bdb7f3cb --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl @@ -0,0 +1,26 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + int a; +}; + +typedef vk::BufferPointer BufferContent; +typedef vk::BufferPointer BufferBuffer; + +//[[vk::push_constant]] +//BufferContent buffer; + +RWStructuredBuffer rwbuf; + +// Wrong type in the parameter. +void foo(BufferContent bc) { + bc.Get().a = 1; +} + +[numthreads(1, 1, 1)] +void main() { + foo(rwbuf[0]); +} + +// CHECK: no matching function for call to 'foo' + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl new file mode 100644 index 0000000000..a89b286edf --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl @@ -0,0 +1,23 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + int a; +}; + +typedef vk::BufferPointer BufferContent; +typedef vk::BufferPointer BufferBuffer; + +RWStructuredBuffer buf; + +void foo(const BufferContent bc) { + bc.Get().a = 1; +} + +[numthreads(1, 1, 1)] +void main() { + static BufferContent bcs = buf[0]; + static BufferBuffer bbs = (BufferContent)bcs; +} + +// CHECK: cannot initialize a variable of type 'BufferPointer' with an lvalue of type 'BufferPointer' + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl new file mode 100644 index 0000000000..71fee1a795 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl @@ -0,0 +1,101 @@ +// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s + +// CHECK: OpCapability PhysicalStorageBufferAddresses +// CHECK: OpExtension "SPV_KHR_physical_storage_buffer" +// CHECK: OpMemoryModel PhysicalStorageBuffer64 GLSL450 +// CHECK: OpEntryPoint Fragment [[MAIN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]] + +// Forward declaration +typedef struct block_s block_t; +typedef vk::BufferPointer block_p; + +struct block_s +{ + float4 x; + block_p next; +}; + +struct TestPushConstant_t +{ + block_p root; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +// CHECK: OpDecorate [[GP:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: OpDecorate [[COPY1:%[_0-9A-Za-z]*]] RestrictPointer +// CHECK: OpDecorate [[COPY2:%[_0-9A-Za-z]*]] RestrictPointer +// CHECK: OpMemberDecorate [[BLOCK:%[_0-9A-Za-z]*]] 1 Offset 16 +// CHECK: OpTypeForwardPointer [[PBLOCK:%[_0-9A-Za-z]*]] PhysicalStorageBuffer +// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0 +// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1 +// CHECK: [[ULONG:%[_0-9A-Za-z]*]] = OpTypeInt 64 0 +// CHECK: [[UL0:%[_0-9A-Za-z]*]] = OpConstant [[ULONG]] 0 +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F0]] [[F0]] [[F0]] [[F0]] +// CHECK: [[BLOCK]] = OpTypeStruct [[V4FLOAT]] [[PBLOCK]] +// CHECK: [[PBLOCK]] = OpTypePointer PhysicalStorageBuffer [[BLOCK]] +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PBLOCK]] +// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]] +// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]] +// CHECK: [[PPBLOCK0:%[_0-9A-Za-z]*]] = OpTypePointer Function %_ptr_PhysicalStorageBuffer_block_s +// CHECK: [[PPBLOCK1:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PBLOCK]] +// CHECK: [[PPBLOCK2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[PBLOCK]] +// CHECK: [[BOOL:%[_0-9A-Za-z]*]] = OpTypeBool +// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] +// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant +// CHECK: [[OUT]] = OpVariable [[PV4FLOAT1]] Output + +[numthreads(1,1,1)] +float4 MainPs(void) : SV_Target0 +{ + if (__has_feature(hlsl_vk_buffer_pointer)) { + [[vk::aliased_pointer]] block_p g_p = + vk::static_pointer_cast(g_PushConstants.root); + g_p = g_p.Get().next; + uint64_t addr = (uint64_t)g_p; + block_p copy1 = block_p(addr); + block_p copy2 = block_p(copy1); + if (addr == 0) // Null pointer test + return float4(0.0,0.0,0.0,0.0); + return g_p.Get().x; + } + return float4(0.0,0.0,0.0,0.0); +} + +// CHECK: [[MAIN]] = OpFunction +// CHECK-NEXT: OpLabel +// CHECK-NEXT: [[RESULT:%[_0-9A-Za-z]*]] = OpFunctionCall [[V4FLOAT]] [[FUN:%[_0-9A-Za-z]*]] +// CHECK: OpStore [[OUT]] [[RESULT]] +// CHECK: OpFunctionEnd +// CHECK: [[FUN]] = OpFunction [[V4FLOAT]] +// CHECK: [[GP]] = OpVariable [[PPBLOCK0]] Function +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK1]] [[GPC]] [[S0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X1]] +// CHECK: OpStore [[GP]] [[X2]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32 +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK2]] [[X3]] [[S1]] +// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X4]] Aligned 8 +// CHECK: OpStore [[GP]] [[X5]] +// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] +// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpConvertPtrToU [[ULONG]] [[X6]] +// CHECK: OpStore [[ADDR:%[_0-9A-Za-z]*]] [[X7]] +// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]] +// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PBLOCK]] [[X8]] +// CHECK: OpStore [[COPY1]] [[X9]] +// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[COPY1]] +// CHECK: OpStore [[COPY2]] [[X10]] +// CHECK: [[X11:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]] +// CHECK: [[X12:%[_0-9A-Za-z]*]] = OpIEqual %bool [[X11]] [[UL0]] +// CHECK: OpBranchConditional [[X12]] [[IF_TRUE:%[_0-9A-Za-z]*]] [[IF_MERGE:%[_0-9A-Za-z]*]] +// CHECK: [[IF_TRUE]] = OpLabel +// CHECK: OpReturnValue [[CV4FLOAT]] +// CHECK: [[IF_MERGE]] = OpLabel +// CHECK: [[X13:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32 +// CHECK: [[X14:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X13]] [[S0]] +// CHECK: [[X15:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X14]] Aligned 16 +// CHECK: OpReturnValue [[X15]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl new file mode 100644 index 0000000000..c7d6f0ed2b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl @@ -0,0 +1,48 @@ +// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s + +// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]] + +struct Globals_s +{ + float4 g_vSomeConstantA; + float4 g_vTestFloat4; + float4 g_vSomeConstantB; +}; + +typedef vk::BufferPointer Globals_p; + +struct TestPushConstant_t +{ + Globals_p m_nBufferDeviceAddress; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0 +// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1 +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]] +// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]] +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]] +// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]] +// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]] +// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]] +// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] +// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant +// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output + +float4 MainPs(void) : SV_Target0 +{ + float4 vTest = g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4; + return vTest; +} + +// CHECK: [[FUN]] = OpFunction +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]] +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X3]] Aligned 16 +// CHECK: OpStore [[OUT]] [[X4]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl new file mode 100644 index 0000000000..b2efd02cbd --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl @@ -0,0 +1,52 @@ +// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s + +// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]] + +struct Globals_s +{ + float4 g_vSomeConstantA; + float4 g_vTestFloat4; + float4 g_vSomeConstantB; +}; + +typedef vk::BufferPointer Globals_p; + +struct TestPushConstant_t +{ + Globals_p m_nBufferDeviceAddress; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0 +// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK-DAG: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]] +// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0 +// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1 +// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]] +// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]] +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]] +// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]] +// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]] +// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]] +// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] +// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant +// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output + +float4 MainPs(void) : SV_Target0 +{ + float4 vTest = float4(1.0,0.0,0.0,0.0); + g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4 = vTest; + return vTest; +} + +// CHECK: [[FUN]] = OpFunction +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]] +// CHECK: OpStore [[X3]] [[CV4FLOAT]] Aligned 16 +// CHECK: OpStore [[OUT]] [[CV4FLOAT]] +// CHECK: OpFunctionEnd diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 0ca5b0716b..55c3643d95 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1,6 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// // See hctdb.py for the implementation of intrinsic file processing. // // Intrinsic declarations are grouped into namespaces that @@ -393,7 +396,13 @@ void [[]] RawBufferStore(in u64 addr, in $funcT value); void [[]] RawBufferStore(in u64 addr, in $funcT value, in uint alignment); void [[]] ext_execution_mode(in uint mode, ...); void [[]] ext_execution_mode_id(in uint mode, ...); +$funcT2 [[]] static_pointer_cast(in VkBufferPointer ptr); +$funcT2 [[]] reinterpret_pointer_cast(in VkBufferPointer ptr); + +} namespace +namespace BufferPointerMethods { +$classT [[ro]] GetBufferContents(); } namespace // SPIRV Change Ends @@ -1147,4 +1156,3 @@ $classT [[]] SubpassLoad(in int sample) : subpassinputms_load; } namespace // SPIRV Change Ends - diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 05bc7d472d..5eb35fb52a 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1,5 +1,7 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. +# Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +# All rights reserved. ############################################################################### # DXIL information. # ############################################################################### @@ -8584,6 +8586,7 @@ def __init__(self, intrinsic_defs, opcode_data): "GroupNodeOutputRecords": "LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS", "ThreadNodeOutputRecords": "LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS", "DxHitObject": "LICOMPTYPE_HIT_OBJECT", + "VkBufferPointer": "LICOMPTYPE_VK_BUFFER_POINTER", } self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"} @@ -8645,7 +8648,8 @@ def load_intrinsics(self, intrinsic_defs): (?:RW)?(?:Texture\w*|ByteAddressBuffer) | acceleration_struct | ray_desc | RayQuery | DxHitObject | Node\w* | RWNode\w* | EmptyNode\w* | - AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w* + AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w* | + VkBufferPointer $)""", flags=re.VERBOSE, ) @@ -8697,6 +8701,10 @@ def process_arg(desc, idx, done_args, intrinsic_name): template_id = "-3" component_id = "0" type_name = "void" + elif type_name == "$funcT2": + template_id = "-4" + component_id = "0" + type_name = "void" elif type_name == "...": assert idx != 0, "'...' can only be used in the parameter list" template_id = "-2" @@ -8825,6 +8833,8 @@ def do_object(m): template_id = "INTRIN_TEMPLATE_VARARGS" elif template_id == "-3": template_id = "INTRIN_TEMPLATE_FROM_FUNCTION" + elif template_id == "-4": + template_id = "INTRIN_TEMPLATE_FROM_FUNCTION_2" if component_id == "-1": component_id = "INTRIN_COMPTYPE_FROM_TYPE_ELT0" if component_id == "-2": diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json index 4c85069488..c4527277cd 100644 --- a/utils/hct/hlsl_intrinsic_opcodes.json +++ b/utils/hct/hlsl_intrinsic_opcodes.json @@ -1,6 +1,6 @@ { "IntrinsicOpCodes": { - "Num_Intrinsics": 360, + "Num_Intrinsics": 363, "IOP_AcceptHitAndEndSearch": 0, "IOP_AddUint64": 1, "IOP_AllMemoryBarrier": 2, @@ -360,6 +360,9 @@ "MOP_InterlockedUMax": 356, "MOP_InterlockedUMin": 357, "MOP_DxHitObject_MakeNop": 358, - "IOP_DxMaybeReorderThread": 359 + "IOP_DxMaybeReorderThread": 359, + "IOP_Vkreinterpret_pointer_cast": 360, + "IOP_Vkstatic_pointer_cast": 361, + "MOP_GetBufferContents": 362 } } From 2b1c2e640dae09adf1cb2dd52bc5ce860d73b02b Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Wed, 2 Apr 2025 10:09:22 -0700 Subject: [PATCH 07/19] Fix typo in exec tests comment (#7299) Keep seeing this comment typo and wanted to rectify. --- .../unittests/HLSLExec/ExecutionTest.cpp | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp index 91b42f6b79..6db27d7a41 100644 --- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp +++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp @@ -5632,7 +5632,7 @@ void ExecutionTest::RunBasicShaderModelTest(CComPtr pDevice, std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test is creating the resource to run + // this callback is called when the test is creating the resource to run // the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { UNREFERENCED_PARAMETER(Name); @@ -6999,7 +6999,7 @@ TEST_F(ExecutionTest, UnaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp")); @@ -7067,7 +7067,7 @@ TEST_F(ExecutionTest, BinaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); @@ -7157,7 +7157,7 @@ TEST_F(ExecutionTest, TertiaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); @@ -7234,7 +7234,7 @@ TEST_F(ExecutionTest, UnaryHalfOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp")); @@ -7314,7 +7314,7 @@ TEST_F(ExecutionTest, BinaryHalfOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); @@ -7424,7 +7424,7 @@ TEST_F(ExecutionTest, TertiaryHalfOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); @@ -7494,7 +7494,7 @@ TEST_F(ExecutionTest, UnaryIntOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp")); @@ -7554,7 +7554,7 @@ TEST_F(ExecutionTest, UnaryUintOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp")); @@ -7619,7 +7619,7 @@ TEST_F(ExecutionTest, BinaryIntOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp")); @@ -7707,7 +7707,7 @@ TEST_F(ExecutionTest, TertiaryIntOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp")); @@ -7777,7 +7777,7 @@ TEST_F(ExecutionTest, BinaryUintOpTest) { int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp")); @@ -7869,7 +7869,7 @@ TEST_F(ExecutionTest, TertiaryUintOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp")); @@ -7948,7 +7948,7 @@ TEST_F(ExecutionTest, UnaryInt16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp")); @@ -8016,7 +8016,7 @@ TEST_F(ExecutionTest, UnaryUint16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp")); @@ -8091,7 +8091,7 @@ TEST_F(ExecutionTest, BinaryInt16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp")); @@ -8187,7 +8187,7 @@ TEST_F(ExecutionTest, TertiaryInt16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp")); @@ -8264,7 +8264,7 @@ TEST_F(ExecutionTest, BinaryUint16OpTest) { int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp")); @@ -8363,7 +8363,7 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp")); @@ -8948,7 +8948,7 @@ TEST_F(ExecutionTest, DotTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "DotOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SDotOp")); @@ -9240,7 +9240,7 @@ TEST_F(ExecutionTest, Msad4Test) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "Msad4", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SMsad4")); @@ -9342,7 +9342,7 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); @@ -9455,7 +9455,7 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); @@ -9883,7 +9883,7 @@ void ExecutionTest::WaveIntrinsicsActivePrefixTest( ++maskIndex) { std::shared_ptr test = RunShaderOpTestAfterParse( pDevice, m_support, "WaveIntrinsicsOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp")); @@ -12609,7 +12609,7 @@ TEST_F(ExecutionTest, HelperLaneTest) { std::shared_ptr test = RunShaderOpTestAfterParse( pDevice, m_support, "HelperLaneTestNoWave", - // this callbacked is called when the test is creating the resource to + // this callback is called when the test is creating the resource to // run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0")); From 3b1a29bf89520c0159669487feaaac5a98ab8ed5 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 2 Apr 2025 16:19:43 -0700 Subject: [PATCH 08/19] [OMM] Add DXR Entry point test, non-library target test, conforming tests to spec. (#7281) This PR adds 2 tests that were mentioned in the spec that haven't yet been added. 1. A test that makes sure that restricted flags are diagnosed in DXR entry shaders. 2. A test that makes sure that no diagnostics are emitted when a restricted flag is used for a subobject in a non-library shadaer target. Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7282 --- .../SemaHLSL/rayquery-omm-DXR-entry-point.hlsl | 17 +++++++++++++++++ .../test/SemaHLSL/rayquery-omm-type-diag.hlsl | 4 ++-- .../raytracingpipelineconfig1-no-errors.hlsl | 12 ++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl create mode 100644 tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl new file mode 100644 index 0000000000..722187cf43 --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl @@ -0,0 +1,17 @@ +// RUN: %dxc -T lib_6_3 -validator-version 1.8 -verify %s + +// expected-warning@+1{{potential misuse of built-in constant 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}} +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; + +RaytracingAccelerationStructure RTAS; +// DXR entry to test that restricted flags are diagnosed. +[shader("raygeneration")] +void main(void) { + RayDesc rayDesc; + + // expected-warning@+2{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}} + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}} + RayQuery rayQuery; + // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}} + rayQuery.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc); +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl index 981788a688..5e484d193e 100644 --- a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl +++ b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl @@ -1,5 +1,5 @@ -// RUN: %dxc -T vs_6_9 -E RayQueryTests -verify %s -// RUN: %dxc -T vs_6_5 -E RayQueryTests2 -verify %s +// RUN: %dxc -T vs_6_9 -verify %s +// RUN: %dxc -T vs_6_5 -verify %s // validate 2nd template argument flags // expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} diff --git a/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl new file mode 100644 index 0000000000..272a46a87e --- /dev/null +++ b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl @@ -0,0 +1,12 @@ +// RUN: %dxc -T ps_6_0 -verify %s + +// expected-no-diagnostics +// No diagnostic is expected because this is a non-library target, +// and SubObjects are ignored on non-library targets. + +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; + +[shader("pixel")] +int main(int i : INDEX) : SV_Target { + return 1; +} From 65564102a78a99b191228cc88ef4ccee2f987783 Mon Sep 17 00:00:00 2001 From: Cassandra Beckley Date: Wed, 2 Apr 2025 18:38:02 -0700 Subject: [PATCH 09/19] [SPIR-V] Implement QuadAny and QuadAll (#7266) If `"SPV_KHR_quad_control"` can be used, uses `OpGroupNonUniformQuadAnyKHR` and `OpGroupNonUniformQuadAllKHR`. If not, falls back to constructing the value using `OpGroupNonUniformQuadSwap`. Fixes #7247 --- docs/SPIR-V.rst | 8 +++ .../include/clang/SPIRV/FeatureManager.h | 1 + .../clang/include/clang/SPIRV/SpirvBuilder.h | 2 +- .../include/clang/SPIRV/SpirvInstruction.h | 8 +-- tools/clang/lib/SPIRV/CapabilityVisitor.cpp | 3 ++ tools/clang/lib/SPIRV/EmitVisitor.cpp | 7 +-- tools/clang/lib/SPIRV/FeatureManager.cpp | 3 ++ tools/clang/lib/SPIRV/SpirvBuilder.cpp | 2 +- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 51 +++++++++++++++++++ tools/clang/lib/SPIRV/SpirvEmitter.h | 4 ++ tools/clang/lib/SPIRV/SpirvInstruction.cpp | 9 +++- .../test/CodeGenSPIRV/sm6.quad-any-all.hlsl | 41 +++++++++++++++ 12 files changed, 130 insertions(+), 9 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index 899b587492..b5e9c05079 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -320,6 +320,7 @@ Supported extensions * SPV_KHR_maximal_reconvergence * SPV_KHR_float_controls * SPV_NV_shader_subgroup_partitioned +* SPV_KHR_quad_control Vulkan specific attributes -------------------------- @@ -4008,6 +4009,8 @@ Quad ``QuadReadAcrossX()`` ``OpGroupNonUniformQuadSwap`` Quad ``QuadReadAcrossY()`` ``OpGroupNonUniformQuadSwap`` Quad ``QuadReadAcrossDiagonal()`` ``OpGroupNonUniformQuadSwap`` Quad ``QuadReadLaneAt()`` ``OpGroupNonUniformQuadBroadcast`` +Quad ``QuadAny()`` ``OpGroupNonUniformQuadAnyKHR`` +Quad ``QuadAll()`` ``OpGroupNonUniformQuadAllKHR`` N/A ``WaveMatch()`` ``OpGroupNonUniformPartitionNV`` Multiprefix ``WaveMultiPrefixSum()`` ``OpGroupNonUniform*Add`` ``PartitionedExclusiveScanNV`` Multiprefix ``WaveMultiPrefixProduct()`` ``OpGroupNonUniform*Mul`` ``PartitionedExclusiveScanNV`` @@ -4016,6 +4019,11 @@ Multiprefix ``WaveMultiPrefixBitOr()`` ``OpGroupNonUniformLogicalOr`` ` Multiprefix ``WaveMultiPrefixBitXor()`` ``OpGroupNonUniformLogicalXor`` ``PartitionedExclusiveScanNV`` ============= ============================ =================================== ============================== +``QuadAny`` and ``QuadAll`` will use the ``OpGroupNonUniformQuadAnyKHR`` and +``OpGroupNonUniformQuadAllKHR`` instructions if the ``SPV_KHR_quad_control`` +extension is enabled. If it is not, they will fall back to constructing the +value using multiple calls to ``OpGroupNonUniformQuadBroadcast``. + The Implicit ``vk`` Namespace ============================= diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h index 8a9755ae79..3c1871df37 100644 --- a/tools/clang/include/clang/SPIRV/FeatureManager.h +++ b/tools/clang/include/clang/SPIRV/FeatureManager.h @@ -64,6 +64,7 @@ enum class Extension { KHR_maximal_reconvergence, KHR_float_controls, NV_shader_subgroup_partitioned, + KHR_quad_control, Unknown, }; diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index ed2cb3b6fd..5e03d1ef96 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -242,7 +242,7 @@ class SpirvBuilder { /// \brief Creates an operation with the given OpGroupNonUniform* SPIR-V /// opcode. SpirvGroupNonUniformOp *createGroupNonUniformOp( - spv::Op op, QualType resultType, spv::Scope execScope, + spv::Op op, QualType resultType, llvm::Optional execScope, llvm::ArrayRef operands, SourceLocation, llvm::Optional groupOp = llvm::None); diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h index 7a7ad3aa4d..f49a295610 100644 --- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h +++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h @@ -1566,7 +1566,8 @@ class SpirvFunctionCall : public SpirvInstruction { /// \brief OpGroupNonUniform* instructions class SpirvGroupNonUniformOp : public SpirvInstruction { public: - SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType, spv::Scope scope, + SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType, + llvm::Optional scope, llvm::ArrayRef operands, SourceLocation loc, llvm::Optional group); @@ -1580,7 +1581,8 @@ class SpirvGroupNonUniformOp : public SpirvInstruction { bool invokeVisitor(Visitor *v) override; - spv::Scope getExecutionScope() const { return execScope; } + bool hasExecutionScope() const { return execScope.hasValue(); } + spv::Scope getExecutionScope() const { return execScope.getValue(); } llvm::ArrayRef getOperands() const { return operands; } @@ -1598,7 +1600,7 @@ class SpirvGroupNonUniformOp : public SpirvInstruction { } private: - spv::Scope execScope; + llvm::Optional execScope; llvm::SmallVector operands; llvm::Optional groupOp; }; diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp index 6fd0c6d950..24dfdc2e9a 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp @@ -887,6 +887,9 @@ bool CapabilityVisitor::visit(SpirvModule *, Visitor::Phase phase) { addCapability(spv::Capability::InterpolationFunction); + addExtensionAndCapabilitiesIfEnabled(Extension::KHR_quad_control, + {spv::Capability::QuadControlKHR}); + return true; } diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index 9c0368f7a1..eb00f59632 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -1134,9 +1134,10 @@ bool EmitVisitor::visit(SpirvGroupNonUniformOp *inst) { initInstruction(inst); curInst.push_back(inst->getResultTypeId()); curInst.push_back(getOrAssignResultId(inst)); - curInst.push_back(typeHandler.getOrCreateConstantInt( - llvm::APInt(32, static_cast(inst->getExecutionScope())), - context.getUIntType(32), /* isSpecConst */ false)); + if (inst->hasExecutionScope()) + curInst.push_back(typeHandler.getOrCreateConstantInt( + llvm::APInt(32, static_cast(inst->getExecutionScope())), + context.getUIntType(32), /* isSpecConst */ false)); if (inst->hasGroupOp()) curInst.push_back(static_cast(inst->getGroupOp())); for (auto *operand : inst->getOperands()) diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp index a8ee1de000..7fb449fee9 100644 --- a/tools/clang/lib/SPIRV/FeatureManager.cpp +++ b/tools/clang/lib/SPIRV/FeatureManager.cpp @@ -226,6 +226,7 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) { .Case("SPV_KHR_float_controls", Extension::KHR_float_controls) .Case("SPV_NV_shader_subgroup_partitioned", Extension::NV_shader_subgroup_partitioned) + .Case("SPV_KHR_quad_control", Extension::KHR_quad_control) .Default(Extension::Unknown); } @@ -297,6 +298,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) { return "SPV_KHR_float_controls"; case Extension::NV_shader_subgroup_partitioned: return "SPV_NV_shader_subgroup_partitioned"; + case Extension::KHR_quad_control: + return "SPV_KHR_quad_control"; default: break; } diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index 6b3f43fc77..689fc0715f 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -453,7 +453,7 @@ SpirvSpecConstantBinaryOp *SpirvBuilder::createSpecConstantBinaryOp( } SpirvGroupNonUniformOp *SpirvBuilder::createGroupNonUniformOp( - spv::Op op, QualType resultType, spv::Scope execScope, + spv::Op op, QualType resultType, llvm::Optional execScope, llvm::ArrayRef operands, SourceLocation loc, llvm::Optional groupOp) { assert(insertPoint && "null insert point"); diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 7cc84fa2fc..eed4f6369f 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -9271,6 +9271,10 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { case hlsl::IntrinsicOp::IOP_QuadReadLaneAt: retVal = processWaveQuadWideShuffle(callExpr, hlslOpcode); break; + case hlsl::IntrinsicOp::IOP_QuadAny: + case hlsl::IntrinsicOp::IOP_QuadAll: + retVal = processWaveQuadAnyAll(callExpr, hlslOpcode); + break; case hlsl::IntrinsicOp::IOP_abort: case hlsl::IntrinsicOp::IOP_GetRenderTargetSampleCount: case hlsl::IntrinsicOp::IOP_GetRenderTargetSamplePosition: { @@ -10233,6 +10237,53 @@ SpirvEmitter::processWaveQuadWideShuffle(const CallExpr *callExpr, opcode, retType, spv::Scope::Subgroup, {value, target}, srcLoc); } +SpirvInstruction *SpirvEmitter::processWaveQuadAnyAll(const CallExpr *callExpr, + hlsl::IntrinsicOp op) { + // Signatures: + // bool QuadAny(bool localValue) + // bool QuadAll(bool localValue) + assert(callExpr->getNumArgs() == 1); + assert(op == hlsl::IntrinsicOp::IOP_QuadAny || + op == hlsl::IntrinsicOp::IOP_QuadAll); + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_1, "Wave Operation", + callExpr->getExprLoc()); + + auto *predicate = doExpr(callExpr->getArg(0)); + const auto srcLoc = callExpr->getExprLoc(); + + if (!featureManager.isExtensionEnabled(Extension::KHR_quad_control)) { + // We can't use QuadAny/QuadAll, so implement them using QuadSwap. We + // will read the value at each quad invocation, then combine them. + + spv::Op reducer = op == hlsl::IntrinsicOp::IOP_QuadAny + ? spv::Op::OpLogicalOr + : spv::Op::OpLogicalAnd; + + SpirvInstruction *result = predicate; + + for (size_t i = 0; i < 3; i++) { + SpirvInstruction *invocationValue = spvBuilder.createGroupNonUniformOp( + spv::Op::OpGroupNonUniformQuadSwap, astContext.BoolTy, + spv::Scope::Subgroup, + {predicate, spvBuilder.getConstantInt(astContext.UnsignedIntTy, + llvm::APInt(32, i))}, + srcLoc); + result = spvBuilder.createBinaryOp(reducer, astContext.BoolTy, result, + invocationValue, srcLoc); + } + + return result; + } + + spv::Op opcode = op == hlsl::IntrinsicOp::IOP_QuadAny + ? spv::Op::OpGroupNonUniformQuadAnyKHR + : spv::Op::OpGroupNonUniformQuadAllKHR; + + return spvBuilder.createGroupNonUniformOp(opcode, astContext.BoolTy, + llvm::Optional(), + {predicate}, srcLoc); +} + SpirvInstruction * SpirvEmitter::processWaveActiveAllEqual(const CallExpr *callExpr) { assert(callExpr->getNumArgs() == 1); diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 0a5ff308c2..79d2c43c35 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -670,6 +670,10 @@ class SpirvEmitter : public ASTConsumer { SpirvInstruction *processWaveQuadWideShuffle(const CallExpr *, hlsl::IntrinsicOp op); + /// Processes SM6.7 quad any/all. + SpirvInstruction *processWaveQuadAnyAll(const CallExpr *, + hlsl::IntrinsicOp op); + /// Generates the Spir-V instructions needed to implement the given call to /// WaveActiveAllEqual. Returns a pointer to the instruction that produces the /// final result. diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp index 6deb11d946..f41de03adc 100644 --- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp +++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp @@ -705,7 +705,7 @@ SpirvFunctionCall::SpirvFunctionCall(QualType resultType, SourceLocation loc, function(fn), args(argsVec.begin(), argsVec.end()) {} SpirvGroupNonUniformOp::SpirvGroupNonUniformOp( - spv::Op op, QualType resultType, spv::Scope scope, + spv::Op op, QualType resultType, llvm::Optional scope, llvm::ArrayRef operandsVec, SourceLocation loc, llvm::Optional group) : SpirvInstruction(IK_GroupNonUniformOp, op, resultType, loc), @@ -737,6 +737,8 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp( case spv::Op::OpGroupNonUniformLogicalAnd: case spv::Op::OpGroupNonUniformLogicalOr: case spv::Op::OpGroupNonUniformLogicalXor: + case spv::Op::OpGroupNonUniformQuadAnyKHR: + case spv::Op::OpGroupNonUniformQuadAllKHR: assert(operandsVec.size() == 1); break; @@ -768,6 +770,11 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp( assert(false && "Unexpected Group non-uniform opcode"); break; } + + if (op != spv::Op::OpGroupNonUniformQuadAnyKHR && + op != spv::Op::OpGroupNonUniformQuadAllKHR) { + assert(scope.hasValue()); + } } SpirvImageOp::SpirvImageOp( diff --git a/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl new file mode 100644 index 0000000000..fb9f6e0d76 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl @@ -0,0 +1,41 @@ +// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fspv-extension=SPV_KHR_16bit_storage -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,NOQUAD +// RUN: not %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.0 -fcgl %s -spirv 2>&1 | FileCheck %s --check-prefixes=ERROR + +// CHECK: ; Version: 1.3 + +// QUAD: OpCapability QuadControlKHR +// QUAD: OpExtension "SPV_KHR_quad_control" + +RWStructuredBuffer values; + +[numthreads(32, 1, 1)] +void main(uint3 id: SV_DispatchThreadID) { + uint outIdx = (id.y * 8) + id.x; + +// CHECK: [[val1:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}} +// QUAD-NEXT: {{%[0-9]+}} = OpGroupNonUniformQuadAnyKHR %bool [[val1]] + +// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_0 +// NOQUAD-NEXT: [[or0:%[0-9]+]] = OpLogicalOr %bool [[val1]] [[inv0]] +// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_1 +// NOQUAD-NEXT: [[or1:%[0-9]+]] = OpLogicalOr %bool [[or0]] [[inv1]] +// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_2 +// NOQUAD-NEXT: [[or2:%[0-9]+]] = OpLogicalOr %bool [[or1]] [[inv2]] + +// ERROR: 27:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use + values[outIdx].x = QuadAny(outIdx % 4 == 0) ? 1.0 : 2.0; + +// CHECK: [[val2:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}} +// QUAD-NEXT: {{%[0-9]+}} = OpGroupNonUniformQuadAllKHR %bool [[val2]] + +// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_0 +// NOQUAD-NEXT: [[or0:%[0-9]+]] = OpLogicalAnd %bool [[val2]] [[inv0]] +// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_1 +// NOQUAD-NEXT: [[or1:%[0-9]+]] = OpLogicalAnd %bool [[or0]] [[inv1]] +// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_2 +// NOQUAD-NEXT: [[or2:%[0-9]+]] = OpLogicalAnd %bool [[or1]] [[inv2]] + +// ERROR: 40:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use + values[outIdx].y = QuadAll(outIdx % 2 == 0) ? 3.0 : 4.0; +} From 90102440f822dde23d1ee1e6b2970db2aaf1f849 Mon Sep 17 00:00:00 2001 From: Urs Hanselmann <6864721+urshanselmann@users.noreply.github.com> Date: Thu, 3 Apr 2025 15:55:41 +0200 Subject: [PATCH 10/19] Add UUID compiler extension check on Clang (#7286) Fixes #7248 Fix Clang Compilation on Linux without Microsoft extensions enabled. ## Rationale Clang support depends on the `-fms-extensions` compiler flag. [[1]](https://clang.llvm.org/docs/UsersManual.html#microsoft-extensions) If enabled, the `_MSC_EXTENSIONS` macro is defined. [[2]](https://github.com/llvm/llvm-project/blob/19a319667b567a26a20f9829a0ae7e6a5c259cba/clang/lib/Basic/Targets/OSTargets.cpp#L248) --- include/dxc/WinAdapter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/dxc/WinAdapter.h b/include/dxc/WinAdapter.h index b8c6646871..d02ad1ac38 100644 --- a/include/dxc/WinAdapter.h +++ b/include/dxc/WinAdapter.h @@ -51,7 +51,8 @@ #define _countof(a) (sizeof(a) / sizeof(*(a))) // If it is GCC, there is no UUID support and we must emulate it. -#ifndef __clang__ +// Clang support depends on the -fms-extensions compiler flag. +#if !defined(__clang__) || !defined(_MSC_EXTENSIONS) #define __EMULATE_UUID 1 #endif // __clang__ From 6a73640b91f823c4b9d9cc2c89eb2d3d93b0377f Mon Sep 17 00:00:00 2001 From: Chris B Date: Thu, 3 Apr 2025 08:56:07 -0500 Subject: [PATCH 11/19] Update DXC's CONTRIBUTING file (#7265) This change seeks to address some recent questions about how the LLLVM Coding Standards are applied in DXC. --------- Co-authored-by: Ashley Coleman --- CONTRIBUTING.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 233211f150..840b4f0f17 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -40,10 +40,32 @@ Before submitting a feature or substantial code contribution please discuss it w ### Coding guidelines -The coding, style, and general engineering guidelines follow those described in the docs/CodingStandards.rst. For additional guidelines in code specific to HLSL, see the docs/HLSLChanges.rst file. +The coding, style, and general engineering guidelines follow those described in the [LLVM Coding Standards](docs/CodingStandards.rst). For additional guidelines in code specific to HLSL, see the [HLSL Changes](docs/HLSLChanges.rst) docs. DXC has adopted a clang-format requirement for all incoming changes to C and C++ files. PRs to DXC should have the *changed code* clang formatted to the LLVM style, and leave the remaining portions of the file unchanged. This can be done using the `git-clang-format` tool or IDE driven workflows. A GitHub action will run on all PRs to validate that the change is properly formatted. +#### Applying LLVM Standards + +All new code contributed to DXC should follow the LLVM coding standards. + +Note that the LLVM Coding Standards have a golden rule: + +> **If you are extending, enhancing, or bug fixing already implemented code, use the style that is already being used so that the source is uniform and easy to follow.** + +The golden rule should continue to be applied to places where DXC is self-consistent. A good example is DXC's common use of `PascalCase` instead of `camelCase` for APIs in some parts of the HLSL implementation. In any place where DXC is not self-consistent new code should follow the LLVM Coding Standard. + +A good secondary rule to follow is: + +> **When in doubt, follow LLVM.** + +Adopting LLVM's coding standards provides a consistent set of rules and guidelines to hold all contributions to. This allows patch authors to clearly understand the expectations placed on contributions, and allows reviewers to have a bar to measure contributions against. Aligning with LLVM by default ensures the path of least resistance for everyone. + +Since many of the LLVM Coding Standards are not enforced automatically we rely on code reviews to provide feedback and ensure contributions align with the expected coding standards. Since we rely on reviewers for enforcement and humans make mistakes, please keep in mind: + +> **Code review is a conversation.** + +It is completely reasonable for a patch author to question feedback and provide additional context about why something was done the way it was. Reviewers often see narrow slices in diffs rather than the full context of a file or part of the compiler, so they may not always provide perfect feedback. This is especially true with the application of the "golden rule" since it depends on understanding a wider context. + ### Documenting Pull Requests Pull request descriptions should have the following format: From c9170e5fc5d39d472af1d5e5c2cf368a4501bc1a Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Thu, 3 Apr 2025 12:42:15 -0400 Subject: [PATCH 12/19] Update SPIRV-Tools (#7303) Fixes #7181 --- external/SPIRV-Tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index 393d5c7df1..4bd1536ed7 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit 393d5c7df150532045c50affffea2df22e8231b0 +Subproject commit 4bd1536ed79003a5194a4bd8c9aa2fa17a84c15b From 85f34327588ded72e949ed438d85653576f144e4 Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Thu, 3 Apr 2025 14:44:09 -0600 Subject: [PATCH 13/19] Fixes non-SPIR-V build, broken by PR #7163 ([SPIRV] Implements vk::BufferPointer proposal) (#7306) #ifdef ENABLE_SPIRV_CODEGEN was omitted in several places. --- include/dxc/dxcapi.internal.h | 5 ++++- tools/clang/lib/Sema/SemaHLSL.cpp | 21 +++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index f183bb6cf0..d37054194b 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -132,9 +132,12 @@ enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_HIT_OBJECT = 51, +#ifdef ENABLE_SPIRV_CODEGEN LICOMPTYPE_VK_BUFFER_POINTER = 52, - LICOMPTYPE_COUNT = 53 +#else + LICOMPTYPE_COUNT = 52 +#endif }; static const BYTE IA_SPECIAL_BASE = 0xf0; diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index f001cb70d9..f9e011f8d4 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -1237,8 +1237,10 @@ static const ArBasicKind g_AnyOutputRecordCT[] = { static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT, AR_BASIC_UNKNOWN}; +#ifdef ENABLE_SPIRV_CODEGEN static const ArBasicKind g_VKBufferPointerCT[] = {AR_OBJECT_VK_BUFFER_POINTER, AR_BASIC_UNKNOWN}; +#endif // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value. const ArBasicKind *g_LegalIntrinsicCompTypes[] = { @@ -1295,7 +1297,9 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_GroupNodeOutputRecordsCT, // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS g_DxHitObjectCT, // LICOMPTYPE_HIT_OBJECT - g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER +#ifdef ENABLE_SPIRV_CODEGEN + g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER +#endif }; static_assert( ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT, @@ -3587,6 +3591,7 @@ class HLSLExternalSource : public ExternalSemaSource { case LICOMPTYPE_HIT_OBJECT: paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT)); break; +#ifdef ENABLE_SPIRV_CODEGEN case LICOMPTYPE_VK_BUFFER_POINTER: { const ArBasicKind *match = std::find(g_ArBasicKindsAsTypes, @@ -3600,6 +3605,7 @@ class HLSLExternalSource : public ExternalSemaSource { m_sema->getASTContext().getTypeDeclType(m_objectTypeDecls[index])); break; } +#endif default: DXASSERT(false, "Argument type of intrinsic function is not " "supported"); @@ -4856,7 +4862,10 @@ class HLSLExternalSource : public ExternalSemaSource { case AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY: case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS: case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: - case AR_OBJECT_VK_BUFFER_POINTER: { +#ifdef ENABLE_SPIRV_CODEGEN + case AR_OBJECT_VK_BUFFER_POINTER: +#endif + { const ArBasicKind *match = std::find( g_ArBasicKindsAsTypes, &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind); @@ -5372,8 +5381,10 @@ class HLSLExternalSource : public ExternalSemaSource { << type << GetMatrixOrVectorElementType(type); } return valid; +#ifdef ENABLE_SPIRV_CODEGEN } else if (hlsl::IsVKBufferPointerType(qt)) { return true; +#endif } else if (qt->isStructureOrClassType()) { const RecordType *recordType = qt->getAs(); objectKind = ClassifyRecordType(recordType); @@ -9751,10 +9762,12 @@ bool HLSLExternalSource::CanConvert(SourceLocation loc, Expr *sourceExpr, return false; } +#ifdef ENABLE_SPIRV_CODEGEN // Cast vk::BufferPointer to pointer address. if (SourceInfo.EltKind == AR_OBJECT_VK_BUFFER_POINTER) { return TargetInfo.EltKind == AR_BASIC_UINT64; } +#endif // Cast cbuffer to its result value. if ((SourceInfo.EltKind == AR_OBJECT_CONSTANT_BUFFER || @@ -11604,6 +11617,7 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) { return false; } +#ifdef ENABLE_SPIRV_CODEGEN static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, bool isStatic) { const Expr *argExpr = CE->getArg(0); @@ -11627,6 +11641,7 @@ static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, return false; } +#endif // Check HLSL call constraints, not fatal to creating the AST. void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, @@ -11646,12 +11661,14 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, case hlsl::IntrinsicOp::IOP_Barrier: CheckBarrierCall(*this, FDecl, TheCall); break; +#ifdef ENABLE_SPIRV_CODEGEN case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: CheckVKBufferPointerCast(*this, FDecl, TheCall, false); break; case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: CheckVKBufferPointerCast(*this, FDecl, TheCall, true); break; +#endif default: break; } From e50f599ff302a0ecf08146f6986c738dc4149abb Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Fri, 4 Apr 2025 09:44:57 -0700 Subject: [PATCH 14/19] [NFC] Standardize DxilValidation variable capitalization (#7307) Capitalize all the variables and rename a few in DxilValidation.cpp in keeping with https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly As this file was easily mistaken for applying to the golden rule: https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly it is at serious risk of receiving changes that will get hung up by requirements to follow the LLVM coding guidelines. This brings the cases where variable capitalization is not in line with the coding standards to avoid such pitfalls in the future. --- lib/DxilValidation/DxilValidation.cpp | 3288 ++++++++++++------------- 1 file changed, 1644 insertions(+), 1644 deletions(-) diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index cac074adc3..97bde6ca24 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -65,8 +65,8 @@ using std::vector; namespace hlsl { // PrintDiagnosticContext methods. -PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &printer) - : m_Printer(printer), m_errorsFound(false), m_warningsFound(false) {} +PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &Printer) + : m_Printer(Printer), m_errorsFound(false), m_warningsFound(false) {} bool PrintDiagnosticContext::HasErrors() const { return m_errorsFound; } bool PrintDiagnosticContext::HasWarnings() const { return m_warningsFound; } @@ -97,68 +97,68 @@ struct PSExecutionInfo { }; static unsigned ValidateSignatureRowCol(Instruction *I, - DxilSignatureElement &SE, Value *rowVal, - Value *colVal, EntryStatus &Status, + DxilSignatureElement &SE, Value *RowVal, + Value *ColVal, EntryStatus &Status, ValidationContext &ValCtx) { - if (ConstantInt *constRow = dyn_cast(rowVal)) { - unsigned row = constRow->getLimitedValue(); - if (row >= SE.GetRows()) { - std::string range = std::string("0~") + std::to_string(SE.GetRows()); + if (ConstantInt *ConstRow = dyn_cast(RowVal)) { + unsigned Row = ConstRow->getLimitedValue(); + if (Row >= SE.GetRows()) { + std::string Range = std::string("0~") + std::to_string(SE.GetRows()); ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange, - {"Row", range, std::to_string(row)}); + {"Row", Range, std::to_string(Row)}); } } - if (!isa(colVal)) { - // col must be const + if (!isa(ColVal)) { + // Col must be const ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst, {"Col", "LoadInput/StoreOutput"}); return 0; } - unsigned col = cast(colVal)->getLimitedValue(); + unsigned Col = cast(ColVal)->getLimitedValue(); - if (col > SE.GetCols()) { - std::string range = std::string("0~") + std::to_string(SE.GetCols()); + if (Col > SE.GetCols()) { + std::string Range = std::string("0~") + std::to_string(SE.GetCols()); ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange, - {"Col", range, std::to_string(col)}); + {"Col", Range, std::to_string(Col)}); } else { if (SE.IsOutput()) - Status.outputCols[SE.GetID()] |= 1 << col; + Status.outputCols[SE.GetID()] |= 1 << Col; if (SE.IsPatchConstOrPrim()) - Status.patchConstOrPrimCols[SE.GetID()] |= 1 << col; + Status.patchConstOrPrimCols[SE.GetID()] |= 1 << Col; } - return col; + return Col; } static DxilSignatureElement * -ValidateSignatureAccess(Instruction *I, DxilSignature &sig, Value *sigID, - Value *rowVal, Value *colVal, EntryStatus &Status, +ValidateSignatureAccess(Instruction *I, DxilSignature &Sig, Value *SigId, + Value *RowVal, Value *ColVal, EntryStatus &Status, ValidationContext &ValCtx) { - if (!isa(sigID)) { + if (!isa(SigId)) { // inputID must be const ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst, {"SignatureID", "LoadInput/StoreOutput"}); return nullptr; } - unsigned SEIdx = cast(sigID)->getLimitedValue(); - if (sig.GetElements().size() <= SEIdx) { + unsigned SEIdx = cast(SigId)->getLimitedValue(); + if (Sig.GetElements().size() <= SEIdx) { ValCtx.EmitInstrError(I, ValidationRule::InstrOpConstRange); return nullptr; } - DxilSignatureElement &SE = sig.GetElement(SEIdx); - bool isOutput = sig.IsOutput(); + DxilSignatureElement &SE = Sig.GetElement(SEIdx); + bool IsOutput = Sig.IsOutput(); - unsigned col = ValidateSignatureRowCol(I, SE, rowVal, colVal, Status, ValCtx); + unsigned Col = ValidateSignatureRowCol(I, SE, RowVal, ColVal, Status, ValCtx); - if (isOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) { - unsigned mask = Status.OutputPositionMask[SE.GetOutputStream()]; - mask |= 1 << col; + if (IsOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) { + unsigned Mask = Status.OutputPositionMask[SE.GetOutputStream()]; + Mask |= 1 << Col; if (SE.GetOutputStream() < DXIL::kNumOutputStreams) - Status.OutputPositionMask[SE.GetOutputStream()] = mask; + Status.OutputPositionMask[SE.GetOutputStream()] = Mask; } return &SE; } @@ -183,9 +183,9 @@ static DxilResourceProperties GetResourceFromHandle(Value *Handle, return RP; } -static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle, +static DXIL::SamplerKind GetSamplerKind(Value *SamplerHandle, ValidationContext &ValCtx) { - DxilResourceProperties RP = GetResourceFromHandle(samplerHandle, ValCtx); + DxilResourceProperties RP = GetResourceFromHandle(SamplerHandle, ValCtx); if (RP.getResourceClass() != DXIL::ResourceClass::Sampler) { // must be sampler. @@ -200,14 +200,14 @@ static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle, } static DXIL::ResourceKind -GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy, +GetResourceKindAndCompTy(Value *Handle, DXIL::ComponentType &CompTy, DXIL::ResourceClass &ResClass, ValidationContext &ValCtx) { CompTy = DXIL::ComponentType::Invalid; ResClass = DXIL::ResourceClass::Invalid; // TODO: validate ROV is used only in PS. - DxilResourceProperties RP = GetResourceFromHandle(handle, ValCtx); + DxilResourceProperties RP = GetResourceFromHandle(Handle, ValCtx); ResClass = RP.getResourceClass(); switch (ResClass) { @@ -230,19 +230,19 @@ GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy, return RP.getResourceKind(); } -DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys, - std::deque &offsets) { +DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &TypeSys, + std::deque &Offsets) { unsigned CurIdx = 1; - unsigned LastIdx = offsets.size() - 1; + unsigned LastIdx = Offsets.size() - 1; DxilStructAnnotation *StructAnnot = nullptr; - for (; CurIdx < offsets.size(); ++CurIdx) { + for (; CurIdx < Offsets.size(); ++CurIdx) { if (const StructType *EltST = dyn_cast(Ty)) { - if (DxilStructAnnotation *EltAnnot = typeSys.GetStructAnnotation(EltST)) { + if (DxilStructAnnotation *EltAnnot = TypeSys.GetStructAnnotation(EltST)) { StructAnnot = EltAnnot; - Ty = EltST->getElementType(offsets[CurIdx]); + Ty = EltST->getElementType(Offsets[CurIdx]); if (CurIdx == LastIdx) { - return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]); + return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]); } } else { return nullptr; @@ -252,16 +252,16 @@ DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys, StructAnnot = nullptr; } else { if (StructAnnot) - return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]); + return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]); } } return nullptr; } -DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) { - auto it = ResPropMap.find(resVal); - if (it != ResPropMap.end()) { - return it->second; +DxilResourceProperties ValidationContext::GetResourceFromVal(Value *ResVal) { + auto It = ResPropMap.find(ResVal); + if (It != ResPropMap.end()) { + return It->second; } else { DxilResourceProperties RP; return RP; @@ -269,34 +269,34 @@ DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) { } struct ResRetUsage { - bool x; - bool y; - bool z; - bool w; - bool status; - ResRetUsage() : x(false), y(false), z(false), w(false), status(false) {} + bool X; + bool Y; + bool Z; + bool W; + bool Status; + ResRetUsage() : X(false), Y(false), Z(false), W(false), Status(false) {} }; -static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet, +static void CollectGetDimResRetUsage(ResRetUsage &Usage, Instruction *ResRet, ValidationContext &ValCtx) { for (User *U : ResRet->users()) { if (ExtractValueInst *EVI = dyn_cast(U)) { - for (unsigned idx : EVI->getIndices()) { - switch (idx) { + for (unsigned Idx : EVI->getIndices()) { + switch (Idx) { case 0: - usage.x = true; + Usage.X = true; break; case 1: - usage.y = true; + Usage.Y = true; break; case 2: - usage.z = true; + Usage.Z = true; break; case 3: - usage.w = true; + Usage.W = true; break; case DXIL::kResRetStatusIndex: - usage.status = true; + Usage.Status = true; break; default: // Emit index out of bound. @@ -306,7 +306,7 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet, } } } else if (PHINode *PHI = dyn_cast(U)) { - CollectGetDimResRetUsage(usage, PHI, ValCtx); + CollectGetDimResRetUsage(Usage, PHI, ValCtx); } else { Instruction *User = cast(U); ValCtx.EmitInstrError(User, ValidationRule::InstrDxilStructUser); @@ -314,18 +314,18 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet, } } -static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind, - ArrayRef coords, +static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind ResKind, + ArrayRef Coords, ValidationContext &ValCtx) { - const unsigned kMaxNumCoords = 4; - unsigned numCoords = DxilResource::GetNumCoords(resKind); - for (unsigned i = 0; i < kMaxNumCoords; i++) { - if (i < numCoords) { - if (isa(coords[i])) { + const unsigned KMaxNumCoords = 4; + unsigned NumCoords = DxilResource::GetNumCoords(ResKind); + for (unsigned I = 0; I < KMaxNumCoords; I++) { + if (I < NumCoords) { + if (isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss); } } else { - if (!isa(coords[i])) { + if (!isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateTooMany); } @@ -334,18 +334,18 @@ static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind, } static void ValidateCalcLODResourceDimensionCoord(CallInst *CI, - DXIL::ResourceKind resKind, - ArrayRef coords, + DXIL::ResourceKind ResKind, + ArrayRef Coords, ValidationContext &ValCtx) { const unsigned kMaxNumDimCoords = 3; - unsigned numCoords = DxilResource::GetNumDimensionsForCalcLOD(resKind); - for (unsigned i = 0; i < kMaxNumDimCoords; i++) { - if (i < numCoords) { - if (isa(coords[i])) { + unsigned NumCoords = DxilResource::GetNumDimensionsForCalcLOD(ResKind); + for (unsigned I = 0; I < kMaxNumDimCoords; I++) { + if (I < NumCoords) { + if (isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss); } } else { - if (!isa(coords[i])) { + if (!isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateTooMany); } @@ -353,21 +353,21 @@ static void ValidateCalcLODResourceDimensionCoord(CallInst *CI, } } -static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind, - ArrayRef offsets, +static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind ResKind, + ArrayRef Offsets, ValidationContext &ValCtx) { const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); - unsigned numOffsets = DxilResource::GetNumOffsets(resKind); - bool hasOffset = !isa(offsets[0]); + unsigned NumOffsets = DxilResource::GetNumOffsets(ResKind); + bool HasOffset = !isa(Offsets[0]); - auto validateOffset = [&](Value *offset) { + auto ValidateOffset = [&](Value *Offset) { // 6.7 Advanced Textures allow programmable offsets if (pSM->IsSM67Plus()) return; - if (ConstantInt *cOffset = dyn_cast(offset)) { - int offset = cOffset->getValue().getSExtValue(); - if (offset > 7 || offset < -8) { + if (ConstantInt *cOffset = dyn_cast(Offset)) { + int Offset = cOffset->getValue().getSExtValue(); + if (Offset > 7 || Offset < -8) { ValCtx.EmitInstrError(CI, ValidationRule::InstrTextureOffset); } } else { @@ -375,20 +375,20 @@ static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind, } }; - if (hasOffset) { - validateOffset(offsets[0]); + if (HasOffset) { + ValidateOffset(Offsets[0]); } - for (unsigned i = 1; i < offsets.size(); i++) { - if (i < numOffsets) { - if (hasOffset) { - if (isa(offsets[i])) + for (unsigned I = 1; I < Offsets.size(); I++) { + if (I < NumOffsets) { + if (HasOffset) { + if (isa(Offsets[I])) ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss); else - validateOffset(offsets[i]); + ValidateOffset(Offsets[I]); } } else { - if (!isa(offsets[i])) { + if (!isa(Offsets[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany); } } @@ -405,53 +405,53 @@ static void ValidateDerivativeOp(CallInst *CI, ValidationContext &ValCtx) { {"Derivatives in CS/MS/AS", "Shader Model 6.6+"}); } -static void ValidateSampleInst(CallInst *CI, Value *srvHandle, - Value *samplerHandle, ArrayRef coords, - ArrayRef offsets, bool IsSampleC, +static void ValidateSampleInst(CallInst *CI, Value *SrvHandle, + Value *SamplerHandle, ArrayRef Coords, + ArrayRef Offsets, bool IsSampleC, ValidationContext &ValCtx) { if (!IsSampleC) { - if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) { + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample); } } else { - if (GetSamplerKind(samplerHandle, ValCtx) != + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Comparison) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC); } } - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx); - bool isSampleCompTy = compTy == DXIL::ComponentType::F32; - isSampleCompTy |= compTy == DXIL::ComponentType::SNormF32; - isSampleCompTy |= compTy == DXIL::ComponentType::UNormF32; - isSampleCompTy |= compTy == DXIL::ComponentType::F16; - isSampleCompTy |= compTy == DXIL::ComponentType::SNormF16; - isSampleCompTy |= compTy == DXIL::ComponentType::UNormF16; + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx); + bool IsSampleCompTy = CompTy == DXIL::ComponentType::F32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::F16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF16; const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); if (pSM->IsSM67Plus() && !IsSampleC) { - isSampleCompTy |= compTy == DXIL::ComponentType::I16; - isSampleCompTy |= compTy == DXIL::ComponentType::U16; - isSampleCompTy |= compTy == DXIL::ComponentType::I32; - isSampleCompTy |= compTy == DXIL::ComponentType::U32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::I16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::U16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::I32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::U32; } - if (!isSampleCompTy) { + if (!IsSampleCompTy) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleCompType); } - if (resClass != DXIL::ResourceClass::SRV) { + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather); } - ValidationRule rule = ValidationRule::InstrResourceKindForSample; + ValidationRule Rule = ValidationRule::InstrResourceKindForSample; if (IsSampleC) { - rule = ValidationRule::InstrResourceKindForSampleC; + Rule = ValidationRule::InstrResourceKindForSampleC; } - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -461,64 +461,64 @@ static void ValidateSampleInst(CallInst *CI, Value *srvHandle, break; case DXIL::ResourceKind::Texture3D: if (IsSampleC) { - ValCtx.EmitInstrError(CI, rule); + ValCtx.EmitInstrError(CI, Rule); } break; default: - ValCtx.EmitInstrError(CI, rule); + ValCtx.EmitInstrError(CI, Rule); return; } // Coord match resource kind. - ValidateResourceCoord(CI, resKind, coords, ValCtx); + ValidateResourceCoord(CI, ResKind, Coords, ValCtx); // Offset match resource kind. - ValidateResourceOffset(CI, resKind, offsets, ValCtx); + ValidateResourceOffset(CI, ResKind, Offsets, ValCtx); } -static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle, - ArrayRef coords, ArrayRef offsets, +static void ValidateGather(CallInst *CI, Value *SrvHandle, Value *SamplerHandle, + ArrayRef Coords, ArrayRef Offsets, bool IsSampleC, ValidationContext &ValCtx) { if (!IsSampleC) { - if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) { + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample); } } else { - if (GetSamplerKind(samplerHandle, ValCtx) != + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Comparison) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC); } } - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::SRV) { + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather); return; } // Coord match resource kind. - ValidateResourceCoord(CI, resKind, coords, ValCtx); + ValidateResourceCoord(CI, ResKind, Coords, ValCtx); // Offset match resource kind. - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture2D: case DXIL::ResourceKind::Texture2DArray: { - bool hasOffset = !isa(offsets[0]); - if (hasOffset) { - if (isa(offsets[1])) { + bool HasOffset = !isa(Offsets[0]); + if (HasOffset) { + if (isa(Offsets[1])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss); } } } break; case DXIL::ResourceKind::TextureCube: case DXIL::ResourceKind::TextureCubeArray: { - if (!isa(offsets[0])) { + if (!isa(Offsets[0])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany); } - if (!isa(offsets[1])) { + if (!isa(Offsets[1])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany); } } break; @@ -529,21 +529,21 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle, } } -static unsigned StoreValueToMask(ArrayRef vals) { - unsigned mask = 0; - for (unsigned i = 0; i < 4; i++) { - if (!isa(vals[i])) { - mask |= 1 << i; +static unsigned StoreValueToMask(ArrayRef Vals) { + unsigned Mask = 0; + for (unsigned I = 0; I < 4; I++) { + if (!isa(Vals[I])) { + Mask |= 1 << I; } } - return mask; + return Mask; } -static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) { - DxilResourceProperties RP = GetResourceFromHandle(cbHandle, ValCtx); +static int GetCBufSize(Value *CbHandle, ValidationContext &ValCtx) { + DxilResourceProperties RP = GetResourceFromHandle(CbHandle, ValCtx); if (RP.getResourceClass() != DXIL::ResourceClass::CBuffer) { - ValCtx.EmitInstrError(cast(cbHandle), + ValCtx.EmitInstrError(cast(CbHandle), ValidationRule::InstrCBufferClassForCBufferHandle); return -1; } @@ -554,7 +554,7 @@ static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) { // Make sure none of the handle arguments are undef / zero-initializer, // Also, do not accept any resource handles with invalid dxil resource // properties -void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode, +void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { for (Value *op : CI->operands()) { @@ -563,13 +563,13 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode, const Type *pNodeRecordHandleTy = ValCtx.DxilMod.GetOP()->GetNodeRecordHandleType(); - const Type *argTy = op->getType(); - if (argTy == pNodeHandleTy || argTy == pNodeRecordHandleTy || - argTy == pHandleTy) { + const Type *ArgTy = op->getType(); + if (ArgTy == pNodeHandleTy || ArgTy == pNodeRecordHandleTy || + ArgTy == pHandleTy) { if (isa(op) || isa(op)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); - } else if (argTy == pHandleTy) { + } else if (ArgTy == pHandleTy) { // GetResourceFromHandle will emit an error on an invalid handle GetResourceFromHandle(op, ValCtx); } @@ -577,10 +577,10 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode, } } -void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode, +void ValidateHandleArgs(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { - switch (opcode) { + switch (Opcode) { // TODO: add case DXIL::OpCode::IndexNodeRecordHandle: case DXIL::OpCode::AnnotateHandle: @@ -591,12 +591,12 @@ void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode, break; default: - ValidateHandleArgsForInstruction(CI, opcode, ValCtx); + ValidateHandleArgsForInstruction(CI, Opcode, ValCtx); break; } } -static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) { +static unsigned GetNumVertices(DXIL::InputPrimitive InputPrimitive) { const unsigned InputPrimitiveVertexTab[] = { 0, // Undefined = 0, 1, // Point = 1, @@ -641,26 +641,26 @@ static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) { 0, // LastEntry, }; - unsigned primitiveIdx = static_cast(inputPrimitive); - return InputPrimitiveVertexTab[primitiveIdx]; + unsigned PrimitiveIdx = static_cast(InputPrimitive); + return InputPrimitiveVertexTab[PrimitiveIdx]; } -static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, +static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { Function *F = CI->getParent()->getParent(); DxilModule &DM = ValCtx.DxilMod; - bool bIsPatchConstantFunc = false; + bool IsPatchConstantFunc = false; if (!DM.HasDxilEntryProps(F)) { - auto it = ValCtx.PatchConstantFuncMap.find(F); - if (it == ValCtx.PatchConstantFuncMap.end()) { + auto It = ValCtx.PatchConstantFuncMap.find(F); + if (It == ValCtx.PatchConstantFuncMap.end()) { // Missing entry props. ValCtx.EmitInstrError(CI, ValidationRule::InstrSignatureOperationNotInEntry); return; } // Use hull entry instead of patch constant function. - F = it->second.front(); - bIsPatchConstantFunc = true; + F = It->second.front(); + IsPatchConstantFunc = true; } if (!ValCtx.HasEntryStatus(F)) { return; @@ -668,67 +668,67 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, EntryStatus &Status = ValCtx.GetEntryStatus(F); DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F); - DxilFunctionProps &props = EntryProps.props; + DxilFunctionProps &Props = EntryProps.props; DxilEntrySignature &S = EntryProps.sig; - switch (opcode) { + switch (Opcode) { case DXIL::OpCode::LoadInput: { - Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); - DxilSignature &inputSig = S.InputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); - ValidateSignatureAccess(CI, inputSig, inputID, row, col, Status, ValCtx); - - // Check vertexID in ps/vs. and none array input. - Value *vertexID = + Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); + DxilSignature &InputSig = S.InputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); + ValidateSignatureAccess(CI, InputSig, InputId, Row, Col, Status, ValCtx); + + // Check VertexId in ps/vs. and none array input. + Value *VertexId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputVertexIDOpIdx); - bool usedVertexID = vertexID && !isa(vertexID); - if (props.IsVS() || props.IsPS()) { - if (usedVertexID) { - // use vertexID in VS/PS input. + bool UsedVertexId = VertexId && !isa(VertexId); + if (Props.IsVS() || Props.IsPS()) { + if (UsedVertexId) { + // Use VertexId in VS/PS input. ValCtx.EmitInstrError(CI, ValidationRule::SmOperand); return; } } else { - if (ConstantInt *cVertexID = dyn_cast(vertexID)) { - int immVertexID = cVertexID->getValue().getLimitedValue(); - if (cVertexID->getValue().isNegative()) { - immVertexID = cVertexID->getValue().getSExtValue(); + if (ConstantInt *cVertexId = dyn_cast(VertexId)) { + int ImmVertexId = cVertexId->getValue().getLimitedValue(); + if (cVertexId->getValue().isNegative()) { + ImmVertexId = cVertexId->getValue().getSExtValue(); } - const int low = 0; - int high = 0; - if (props.IsGS()) { - DXIL::InputPrimitive inputPrimitive = - props.ShaderProps.GS.inputPrimitive; - high = GetNumVertices(inputPrimitive); - } else if (props.IsDS()) { - high = props.ShaderProps.DS.inputControlPoints; - } else if (props.IsHS()) { - high = props.ShaderProps.HS.inputControlPoints; + const int Low = 0; + int High = 0; + if (Props.IsGS()) { + DXIL::InputPrimitive InputPrimitive = + Props.ShaderProps.GS.inputPrimitive; + High = GetNumVertices(InputPrimitive); + } else if (Props.IsDS()) { + High = Props.ShaderProps.DS.inputControlPoints; + } else if (Props.IsHS()) { + High = Props.ShaderProps.HS.inputControlPoints; } else { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"LoadInput", "VS/HS/DS/GS/PS"}); } - if (immVertexID < low || immVertexID >= high) { - std::string range = std::to_string(low) + "~" + std::to_string(high); + if (ImmVertexId < Low || ImmVertexId >= High) { + std::string Range = std::to_string(Low) + "~" + std::to_string(High); ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrOperandRange, - {"VertexID", range, std::to_string(immVertexID)}); + {"VertexID", Range, std::to_string(ImmVertexId)}); } } } } break; case DXIL::OpCode::DomainLocation: { - Value *colValue = + Value *ColValue = CI->getArgOperand(DXIL::OperandIndex::kDomainLocationColOpIdx); - if (!isa(colValue)) { - // col must be const + if (!isa(ColValue)) { + // Col must be const ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst, {"Col", "DomainLocation"}); } else { - unsigned col = cast(colValue)->getLimitedValue(); - if (col >= Status.domainLocSize) { + unsigned Col = cast(ColValue)->getLimitedValue(); + if (Col >= Status.domainLocSize) { ValCtx.EmitInstrError(CI, ValidationRule::SmDomainLocationIdxOOB); } } @@ -736,60 +736,60 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::OpCode::StoreOutput: case DXIL::OpCode::StoreVertexOutput: case DXIL::OpCode::StorePrimitiveOutput: { - Value *outputID = + Value *OutputId = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx); - DxilSignature &outputSig = opcode == DXIL::OpCode::StorePrimitiveOutput + DxilSignature &OutputSig = Opcode == DXIL::OpCode::StorePrimitiveOutput ? S.PatchConstOrPrimSignature : S.OutputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); - ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx); + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); + ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx); } break; case DXIL::OpCode::OutputControlPointID: { // Only used in hull shader. - Function *func = CI->getParent()->getParent(); + Function *Func = CI->getParent()->getParent(); // Make sure this is inside hs shader entry function. - if (!(props.IsHS() && F == func)) { + if (!(Props.IsHS() && F == Func)) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"OutputControlPointID", "hull function"}); } } break; case DXIL::OpCode::LoadOutputControlPoint: { // Only used in patch constant function. - Function *func = CI->getParent()->getParent(); - if (ValCtx.entryFuncCallSet.count(func) > 0) { + Function *Func = CI->getParent()->getParent(); + if (ValCtx.entryFuncCallSet.count(Func) > 0) { ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcodeInInvalidFunction, {"LoadOutputControlPoint", "PatchConstant function"}); } - Value *outputID = + Value *OutputId = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx); - DxilSignature &outputSig = S.OutputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); - ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx); + DxilSignature &OutputSig = S.OutputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); + ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx); } break; case DXIL::OpCode::StorePatchConstant: { // Only used in patch constant function. - Function *func = CI->getParent()->getParent(); - if (!bIsPatchConstantFunc) { + Function *Func = CI->getParent()->getParent(); + if (!IsPatchConstantFunc) { ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcodeInInvalidFunction, {"StorePatchConstant", "PatchConstant function"}); } else { - auto &hullShaders = ValCtx.PatchConstantFuncMap[func]; - for (Function *F : hullShaders) { + auto &HullShaders = ValCtx.PatchConstantFuncMap[Func]; + for (Function *F : HullShaders) { EntryStatus &Status = ValCtx.GetEntryStatus(F); DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F); DxilEntrySignature &S = EntryProps.sig; - Value *outputID = + Value *OutputId = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx); - DxilSignature &outputSig = S.PatchConstOrPrimSignature; - Value *row = + DxilSignature &OutputSig = S.PatchConstOrPrimSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); - Value *col = + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); - ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, + ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx); } } @@ -807,12 +807,12 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::OpCode::EvalSampleIndex: case DXIL::OpCode::EvalSnapped: { // Eval* share same operand index with load input. - Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); - DxilSignature &inputSig = S.InputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); + Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); + DxilSignature &InputSig = S.InputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); DxilSignatureElement *pSE = ValidateSignatureAccess( - CI, inputSig, inputID, row, col, Status, ValCtx); + CI, InputSig, InputId, Row, Col, Status, ValCtx); if (pSE) { switch (pSE->GetInterpolationMode()->GetKind()) { case DXIL::InterpolationMode::Linear: @@ -836,11 +836,11 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; case DXIL::OpCode::AttributeAtVertex: { Value *Attribute = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx); - DxilSignature &inputSig = S.InputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); + DxilSignature &InputSig = S.InputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); DxilSignatureElement *pSE = ValidateSignatureAccess( - CI, inputSig, Attribute, row, col, Status, ValCtx); + CI, InputSig, Attribute, Row, Col, Status, ValCtx); if (pSE && pSE->GetInterpolationMode()->GetKind() != hlsl::InterpolationMode::Kind::Constant) { ValCtx.EmitInstrFormatError( @@ -851,35 +851,35 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::OpCode::CutStream: case DXIL::OpCode::EmitThenCutStream: case DXIL::OpCode::EmitStream: { - if (props.IsGS()) { - auto &GS = props.ShaderProps.GS; - unsigned streamMask = 0; - for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) { - if (GS.streamPrimitiveTopologies[i] != + if (Props.IsGS()) { + auto &GS = Props.ShaderProps.GS; + unsigned StreamMask = 0; + for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) { + if (GS.streamPrimitiveTopologies[I] != DXIL::PrimitiveTopology::Undefined) { - streamMask |= 1 << i; + StreamMask |= 1 << I; } } - Value *streamID = + Value *StreamId = CI->getArgOperand(DXIL::OperandIndex::kStreamEmitCutIDOpIdx); - if (ConstantInt *cStreamID = dyn_cast(streamID)) { - int immStreamID = cStreamID->getValue().getLimitedValue(); - if (cStreamID->getValue().isNegative() || immStreamID >= 4) { + if (ConstantInt *cStreamId = dyn_cast(StreamId)) { + int ImmStreamId = cStreamId->getValue().getLimitedValue(); + if (cStreamId->getValue().isNegative() || ImmStreamId >= 4) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrOperandRange, - {"StreamID", "0~4", std::to_string(immStreamID)}); + {"StreamID", "0~4", std::to_string(ImmStreamId)}); } else { - unsigned immMask = 1 << immStreamID; - if ((streamMask & immMask) == 0) { - std::string range; - for (unsigned i = 0; i < 4; i++) { - if (streamMask & (1 << i)) { - range += std::to_string(i) + " "; + unsigned ImmMask = 1 << ImmStreamId; + if ((StreamMask & ImmMask) == 0) { + std::string Range; + for (unsigned I = 0; I < 4; I++) { + if (StreamMask & (1 << I)) { + Range += std::to_string(I) + " "; } } ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrOperandRange, - {"StreamID", range, std::to_string(immStreamID)}); + {"StreamID", Range, std::to_string(ImmStreamId)}); } } @@ -893,25 +893,25 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, } } break; case DXIL::OpCode::EmitIndices: { - if (!props.IsMS()) { + if (!Props.IsMS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"EmitIndices", "Mesh shader"}); } } break; case DXIL::OpCode::SetMeshOutputCounts: { - if (!props.IsMS()) { + if (!Props.IsMS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"SetMeshOutputCounts", "Mesh shader"}); } } break; case DXIL::OpCode::GetMeshPayload: { - if (!props.IsMS()) { + if (!Props.IsMS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"GetMeshPayload", "Mesh shader"}); } } break; case DXIL::OpCode::DispatchMesh: { - if (!props.IsAS()) { + if (!Props.IsAS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"DispatchMesh", "Amplification shader"}); } @@ -925,9 +925,9 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, } } -static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode, +static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { - switch (opcode) { + switch (Opcode) { // Imm input value validation. case DXIL::OpCode::Asin: { DxilInst_Asin I(CI); @@ -973,77 +973,77 @@ static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode, // Validate the type-defined mask compared to the store value mask which // indicates which parts were defined returns true if caller should continue // validation -static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode opcode, - ConstantInt *mask, unsigned stValMask, - bool isTyped, ValidationContext &ValCtx) { - if (!mask) { +static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode Opcode, + ConstantInt *Mask, unsigned StValMask, + bool IsTyped, ValidationContext &ValCtx) { + if (!Mask) { // Mask for buffer store should be immediate. ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst, - {"Mask", hlsl::OP::GetOpCodeName(opcode)}); + {"Mask", hlsl::OP::GetOpCodeName(Opcode)}); return false; } - unsigned uMask = mask->getLimitedValue(); - if (isTyped && uMask != 0xf) { + unsigned UMask = Mask->getLimitedValue(); + if (IsTyped && UMask != 0xf) { ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskForTypedUAVStore); } // write mask must be contiguous (.x .xy .xyz or .xyzw) - if (!((uMask == 0xf) || (uMask == 0x7) || (uMask == 0x3) || (uMask == 0x1))) { + if (!((UMask == 0xf) || (UMask == 0x7) || (UMask == 0x3) || (UMask == 0x1))) { ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskGapForUAV); } - // If a bit is set in the uMask (expected values) that isn't set in stValMask + // If a bit is set in the UMask (expected values) that isn't set in StValMask // (user provided values) then the user failed to define some of the output // values. - if (uMask & ~stValMask) + if (UMask & ~StValMask) ValCtx.EmitInstrError(I, ValidationRule::InstrUndefinedValueForUAVStore); - else if (uMask != stValMask) + else if (UMask != StValMask) ValCtx.EmitInstrFormatError( I, ValidationRule::InstrWriteMaskMatchValueForUAVStore, - {std::to_string(uMask), std::to_string(stValMask)}); + {std::to_string(UMask), std::to_string(StValMask)}); return true; } -static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, +static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { - switch (opcode) { + switch (Opcode) { case DXIL::OpCode::GetDimensions: { - DxilInst_GetDimensions getDim(CI); - Value *handle = getDim.get_handle(); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx); + DxilInst_GetDimensions GetDim(CI); + Value *Handle = GetDim.get_handle(); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); // Check the result component use. - ResRetUsage usage; - CollectGetDimResRetUsage(usage, CI, ValCtx); + ResRetUsage Usage; + CollectGetDimResRetUsage(Usage, CI, ValCtx); // Mip level only for texture. - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: - if (usage.y) { + if (Usage.Y) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"y", "Texture1D"}); } - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture1D"}); } break; case DXIL::ResourceKind::Texture1DArray: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture1DArray"}); } break; case DXIL::ResourceKind::Texture2D: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture2D"}); @@ -1052,7 +1052,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::ResourceKind::Texture2DArray: break; case DXIL::ResourceKind::Texture2DMS: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture2DMS"}); @@ -1063,7 +1063,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::ResourceKind::Texture3D: break; case DXIL::ResourceKind::TextureCube: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "TextureCube"}); @@ -1075,12 +1075,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: { - Value *mip = getDim.get_mipLevel(); - if (!isa(mip)) { + Value *Mip = GetDim.get_mipLevel(); + if (!isa(Mip)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrMipLevelForGetDimension); } - if (resKind != DXIL::ResourceKind::Invalid) { - if (usage.y || usage.z || usage.w) { + if (ResKind != DXIL::ResourceKind::Invalid) { + if (Usage.Y || Usage.Z || Usage.W) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"invalid", "resource"}); @@ -1092,38 +1092,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; } - if (usage.status) { + if (Usage.Status) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"invalid", "resource"}); } } break; case DXIL::OpCode::CalculateLOD: { - DxilInst_CalculateLOD lod(CI); - Value *samplerHandle = lod.get_sampler(); - DXIL::SamplerKind samplerKind = GetSamplerKind(samplerHandle, ValCtx); - if (samplerKind != DXIL::SamplerKind::Default) { + DxilInst_CalculateLOD LOD(CI); + Value *SamplerHandle = LOD.get_sampler(); + DXIL::SamplerKind SamplerKind = GetSamplerKind(SamplerHandle, ValCtx); + if (SamplerKind != DXIL::SamplerKind::Default) { // After SM68, Comparison is supported. if (!ValCtx.DxilMod.GetShaderModel()->IsSM68Plus() || - samplerKind != DXIL::SamplerKind::Comparison) + SamplerKind != DXIL::SamplerKind::Comparison) ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForLOD); } - Value *handle = lod.get_handle(); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx); - if (resClass != DXIL::ResourceClass::SRV) { + Value *Handle = LOD.get_handle(); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather); return; } // Coord match resource. ValidateCalcLODResourceDimensionCoord( - CI, resKind, {lod.get_coord0(), lod.get_coord1(), lod.get_coord2()}, + CI, ResKind, {LOD.get_coord0(), LOD.get_coord1(), LOD.get_coord2()}, ValCtx); - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -1140,67 +1140,67 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::TextureGather: { - DxilInst_TextureGather gather(CI); - ValidateGather(CI, gather.get_srv(), gather.get_sampler(), - {gather.get_coord0(), gather.get_coord1(), - gather.get_coord2(), gather.get_coord3()}, - {gather.get_offset0(), gather.get_offset1()}, + DxilInst_TextureGather Gather(CI); + ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(), + {Gather.get_coord0(), Gather.get_coord1(), + Gather.get_coord2(), Gather.get_coord3()}, + {Gather.get_offset0(), Gather.get_offset1()}, /*IsSampleC*/ false, ValCtx); } break; case DXIL::OpCode::TextureGatherCmp: { - DxilInst_TextureGatherCmp gather(CI); - ValidateGather(CI, gather.get_srv(), gather.get_sampler(), - {gather.get_coord0(), gather.get_coord1(), - gather.get_coord2(), gather.get_coord3()}, - {gather.get_offset0(), gather.get_offset1()}, + DxilInst_TextureGatherCmp Gather(CI); + ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(), + {Gather.get_coord0(), Gather.get_coord1(), + Gather.get_coord2(), Gather.get_coord3()}, + {Gather.get_offset0(), Gather.get_offset1()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::Sample: { - DxilInst_Sample sample(CI); + DxilInst_Sample Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleCmp: { - DxilInst_SampleCmp sample(CI); + DxilInst_SampleCmp Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleCmpLevel: { // sampler must be comparison mode. - DxilInst_SampleCmpLevel sample(CI); + DxilInst_SampleCmpLevel Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::SampleCmpLevelZero: { // sampler must be comparison mode. - DxilInst_SampleCmpLevelZero sample(CI); + DxilInst_SampleCmpLevelZero Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::SampleBias: { - DxilInst_SampleBias sample(CI); - Value *bias = sample.get_bias(); - if (ConstantFP *cBias = dyn_cast(bias)) { - float fBias = cBias->getValueAPF().convertToFloat(); - if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) { + DxilInst_SampleBias Sample(CI); + Value *Bias = Sample.get_bias(); + if (ConstantFP *cBias = dyn_cast(Bias)) { + float FBias = cBias->getValueAPF().convertToFloat(); + if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrImmBiasForSampleB, {std::to_string(DXIL::kMinMipLodBias), @@ -1210,19 +1210,19 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleCmpBias: { - DxilInst_SampleCmpBias sample(CI); - Value *bias = sample.get_bias(); - if (ConstantFP *cBias = dyn_cast(bias)) { - float fBias = cBias->getValueAPF().convertToFloat(); - if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) { + DxilInst_SampleCmpBias Sample(CI); + Value *Bias = Sample.get_bias(); + if (ConstantFP *cBias = dyn_cast(Bias)) { + float FBias = cBias->getValueAPF().convertToFloat(); + if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrImmBiasForSampleB, {std::to_string(DXIL::kMinMipLodBias), @@ -1232,38 +1232,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleGrad: { - DxilInst_SampleGrad sample(CI); + DxilInst_SampleGrad Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); } break; case DXIL::OpCode::SampleCmpGrad: { - DxilInst_SampleCmpGrad sample(CI); + DxilInst_SampleCmpGrad Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::SampleLevel: { - DxilInst_SampleLevel sample(CI); + DxilInst_SampleLevel Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); } break; case DXIL::OpCode::CheckAccessFullyMapped: { @@ -1273,53 +1273,53 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } else { Value *V = EVI->getOperand(0); - bool isLegal = EVI->getNumIndices() == 1 && + bool IsLegal = EVI->getNumIndices() == 1 && EVI->getIndices()[0] == DXIL::kResRetStatusIndex && ValCtx.DxilMod.GetOP()->IsResRetType(V->getType()); - if (!isLegal) { + if (!IsLegal) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } } } break; case DXIL::OpCode::BufferStore: { - DxilInst_BufferStore bufSt(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx); + DxilInst_BufferStore BufSt(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::UAV) { + if (ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); } - ConstantInt *mask = dyn_cast(bufSt.get_mask()); - unsigned stValMask = - StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(), - bufSt.get_value2(), bufSt.get_value3()}); + ConstantInt *Mask = dyn_cast(BufSt.get_mask()); + unsigned StValMask = + StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(), + BufSt.get_value2(), BufSt.get_value3()}); - if (!ValidateStorageMasks(CI, opcode, mask, stValMask, - resKind == DXIL::ResourceKind::TypedBuffer || - resKind == DXIL::ResourceKind::TBuffer, + if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, + ResKind == DXIL::ResourceKind::TypedBuffer || + ResKind == DXIL::ResourceKind::TBuffer, ValCtx)) return; - Value *offset = bufSt.get_coord1(); + Value *Offset = BufSt.get_coord1(); - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1332,26 +1332,26 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; case DXIL::OpCode::TextureStore: { - DxilInst_TextureStore texSt(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(texSt.get_srv(), compTy, resClass, ValCtx); + DxilInst_TextureStore TexSt(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(TexSt.get_srv(), CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::UAV) { + if (ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); } - ConstantInt *mask = dyn_cast(texSt.get_mask()); - unsigned stValMask = - StoreValueToMask({texSt.get_value0(), texSt.get_value1(), - texSt.get_value2(), texSt.get_value3()}); + ConstantInt *Mask = dyn_cast(TexSt.get_mask()); + unsigned StValMask = + StoreValueToMask({TexSt.get_value0(), TexSt.get_value1(), + TexSt.get_value2(), TexSt.get_value3()}); - if (!ValidateStorageMasks(CI, opcode, mask, stValMask, true /*isTyped*/, + if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, true /*IsTyped*/, ValCtx)) return; - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -1367,30 +1367,30 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } } break; case DXIL::OpCode::BufferLoad: { - DxilInst_BufferLoad bufLd(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx); - - if (resClass != DXIL::ResourceClass::SRV && - resClass != DXIL::ResourceClass::UAV) { + DxilInst_BufferLoad BufLd(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx); + + if (ResClass != DXIL::ResourceClass::SRV && + ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); } - Value *offset = bufLd.get_wot(); + Value *Offset = BufLd.get_wot(); - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1403,33 +1403,33 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; case DXIL::OpCode::TextureLoad: { - DxilInst_TextureLoad texLd(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(texLd.get_srv(), compTy, resClass, ValCtx); - - Value *mipLevel = texLd.get_mipLevelOrSampleCount(); - - if (resClass == DXIL::ResourceClass::UAV) { - bool noOffset = isa(texLd.get_offset0()); - noOffset &= isa(texLd.get_offset1()); - noOffset &= isa(texLd.get_offset2()); - if (!noOffset) { + DxilInst_TextureLoad TexLd(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(TexLd.get_srv(), CompTy, ResClass, ValCtx); + + Value *MipLevel = TexLd.get_mipLevelOrSampleCount(); + + if (ResClass == DXIL::ResourceClass::UAV) { + bool NoOffset = isa(TexLd.get_offset0()); + NoOffset &= isa(TexLd.get_offset1()); + NoOffset &= isa(TexLd.get_offset2()); + if (!NoOffset) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOffsetOnUAVLoad); } - if (!isa(mipLevel)) { - if (resKind != DXIL::ResourceKind::Texture2DMS && - resKind != DXIL::ResourceKind::Texture2DMSArray) + if (!isa(MipLevel)) { + if (ResKind != DXIL::ResourceKind::Texture2DMS && + ResKind != DXIL::ResourceKind::Texture2DMSArray) ValCtx.EmitInstrError(CI, ValidationRule::InstrMipOnUAVLoad); } } else { - if (resClass != DXIL::ResourceClass::SRV) { + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); } } - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -1438,7 +1438,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, break; case DXIL::ResourceKind::Texture2DMS: case DXIL::ResourceKind::Texture2DMSArray: { - if (isa(mipLevel)) { + if (isa(MipLevel)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleIndexForLoad2DMS); } } break; @@ -1449,28 +1449,28 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } ValidateResourceOffset( - CI, resKind, - {texLd.get_offset0(), texLd.get_offset1(), texLd.get_offset2()}, + CI, ResKind, + {TexLd.get_offset0(), TexLd.get_offset1(), TexLd.get_offset2()}, ValCtx); } break; case DXIL::OpCode::CBufferLoad: { DxilInst_CBufferLoad CBLoad(CI); - Value *regIndex = CBLoad.get_byteOffset(); - if (ConstantInt *cIndex = dyn_cast(regIndex)) { - int offset = cIndex->getLimitedValue(); - int size = GetCBufSize(CBLoad.get_handle(), ValCtx); - if (size > 0 && offset >= size) { + Value *RegIndex = CBLoad.get_byteOffset(); + if (ConstantInt *cIndex = dyn_cast(RegIndex)) { + int Offset = cIndex->getLimitedValue(); + int Size = GetCBufSize(CBLoad.get_handle(), ValCtx); + if (Size > 0 && Offset >= Size) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound); } } } break; case DXIL::OpCode::CBufferLoadLegacy: { DxilInst_CBufferLoadLegacy CBLoad(CI); - Value *regIndex = CBLoad.get_regIndex(); - if (ConstantInt *cIndex = dyn_cast(regIndex)) { - int offset = cIndex->getLimitedValue() * 16; // 16 bytes align - int size = GetCBufSize(CBLoad.get_handle(), ValCtx); - if (size > 0 && offset >= size) { + Value *RegIndex = CBLoad.get_regIndex(); + if (ConstantInt *cIndex = dyn_cast(RegIndex)) { + int Offset = cIndex->getLimitedValue() * 16; // 16 bytes align + int Size = GetCBufSize(CBLoad.get_handle(), ValCtx); + if (Size > 0 && Offset >= Size) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound); } } @@ -1483,35 +1483,35 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore); } } - DxilInst_RawBufferLoad bufLd(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx); + DxilInst_RawBufferLoad BufLd(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::SRV && - resClass != DXIL::ResourceClass::UAV) { + if (ResClass != DXIL::ResourceClass::SRV && + ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); } - Value *offset = bufLd.get_elementOffset(); - Value *align = bufLd.get_alignment(); - unsigned alignSize = 0; - if (!isa(align)) { + Value *Offset = BufLd.get_elementOffset(); + Value *Align = BufLd.get_alignment(); + unsigned AlignSize = 0; + if (!isa(Align)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } else { - alignSize = bufLd.get_alignment_val(); + AlignSize = BufLd.get_alignment_val(); } - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1530,43 +1530,43 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore); } } - DxilInst_RawBufferStore bufSt(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx); + DxilInst_RawBufferStore BufSt(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::UAV) { + if (ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); } - ConstantInt *mask = dyn_cast(bufSt.get_mask()); - unsigned stValMask = - StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(), - bufSt.get_value2(), bufSt.get_value3()}); + ConstantInt *Mask = dyn_cast(BufSt.get_mask()); + unsigned StValMask = + StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(), + BufSt.get_value2(), BufSt.get_value3()}); - if (!ValidateStorageMasks(CI, opcode, mask, stValMask, false /*isTyped*/, + if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, false /*IsTyped*/, ValCtx)) return; - Value *offset = bufSt.get_elementOffset(); - Value *align = bufSt.get_alignment(); - unsigned alignSize = 0; - if (!isa(align)) { + Value *Offset = BufSt.get_elementOffset(); + Value *Align = BufSt.get_alignment(); + unsigned AlignSize = 0; + if (!isa(Align)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } else { - alignSize = bufSt.get_alignment_val(); + AlignSize = BufSt.get_alignment_val(); } - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1578,9 +1578,9 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } } break; case DXIL::OpCode::TraceRay: { - DxilInst_TraceRay traceRay(CI); - Value *hdl = traceRay.get_AccelerationStructure(); - DxilResourceProperties RP = ValCtx.GetResourceFromVal(hdl); + DxilInst_TraceRay TraceRay(CI); + Value *Hdl = TraceRay.get_AccelerationStructure(); + DxilResourceProperties RP = ValCtx.GetResourceFromVal(Hdl); if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay); return; @@ -1595,12 +1595,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } static void ValidateBarrierFlagArg(ValidationContext &ValCtx, CallInst *CI, - Value *Arg, unsigned validMask, - StringRef flagName, StringRef opName) { + Value *Arg, unsigned ValidMask, + StringRef FlagName, StringRef OpName) { if (ConstantInt *CArg = dyn_cast(Arg)) { - if ((CArg->getLimitedValue() & (uint32_t)(~validMask)) != 0) { + if ((CArg->getLimitedValue() & (uint32_t)(~ValidMask)) != 0) { ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrBarrierFlagInvalid, - {flagName, opName}); + {FlagName, OpName}); } } else { ValCtx.EmitInstrError(CI, @@ -1622,35 +1622,35 @@ std::string GetLaunchTypeStr(DXIL::NodeLaunchType LT) { } static void ValidateDxilOperationCallInProfile(CallInst *CI, - DXIL::OpCode opcode, + DXIL::OpCode Opcode, const ShaderModel *pSM, ValidationContext &ValCtx) { - DXIL::ShaderKind shaderKind = + DXIL::ShaderKind ShaderKind = pSM ? pSM->GetKind() : DXIL::ShaderKind::Invalid; llvm::Function *F = CI->getParent()->getParent(); - DXIL::NodeLaunchType nodeLaunchType = DXIL::NodeLaunchType::Invalid; - if (DXIL::ShaderKind::Library == shaderKind) { + DXIL::NodeLaunchType NodeLaunchType = DXIL::NodeLaunchType::Invalid; + if (DXIL::ShaderKind::Library == ShaderKind) { if (ValCtx.DxilMod.HasDxilFunctionProps(F)) { - DxilEntryProps &entryProps = ValCtx.DxilMod.GetDxilEntryProps(F); - shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; - if (shaderKind == DXIL::ShaderKind::Node) - nodeLaunchType = entryProps.props.Node.LaunchType; + DxilEntryProps &EntryProps = ValCtx.DxilMod.GetDxilEntryProps(F); + ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; + if (ShaderKind == DXIL::ShaderKind::Node) + NodeLaunchType = EntryProps.props.Node.LaunchType; } else if (ValCtx.DxilMod.IsPatchConstantShader(F)) - shaderKind = DXIL::ShaderKind::Hull; + ShaderKind = DXIL::ShaderKind::Hull; } // These shader models are treted like compute - bool isCSLike = shaderKind == DXIL::ShaderKind::Compute || - shaderKind == DXIL::ShaderKind::Mesh || - shaderKind == DXIL::ShaderKind::Amplification || - shaderKind == DXIL::ShaderKind::Node; + bool IsCSLike = ShaderKind == DXIL::ShaderKind::Compute || + ShaderKind == DXIL::ShaderKind::Mesh || + ShaderKind == DXIL::ShaderKind::Amplification || + ShaderKind == DXIL::ShaderKind::Node; // Is called from a library function - bool isLibFunc = shaderKind == DXIL::ShaderKind::Library; + bool IsLibFunc = ShaderKind == DXIL::ShaderKind::Library; - ValidateHandleArgs(CI, opcode, ValCtx); + ValidateHandleArgs(CI, Opcode, ValCtx); - switch (opcode) { + switch (Opcode) { // Imm input value validation. case DXIL::OpCode::Asin: case DXIL::OpCode::Acos: @@ -1659,7 +1659,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::DerivFineY: case DXIL::OpCode::DerivCoarseX: case DXIL::OpCode::DerivCoarseY: - ValidateImmOperandForMathDxilOp(CI, opcode, ValCtx); + ValidateImmOperandForMathDxilOp(CI, Opcode, ValCtx); break; // Resource validation. case DXIL::OpCode::GetDimensions: @@ -1684,7 +1684,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::CBufferLoadLegacy: case DXIL::OpCode::RawBufferLoad: case DXIL::OpCode::RawBufferStore: - ValidateResourceDxilOp(CI, opcode, ValCtx); + ValidateResourceDxilOp(CI, Opcode, ValCtx); break; // Input output. case DXIL::OpCode::LoadInput: @@ -1705,13 +1705,13 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::EmitStream: case DXIL::OpCode::EmitThenCutStream: case DXIL::OpCode::CutStream: - ValidateSignatureDxilOp(CI, opcode, ValCtx); + ValidateSignatureDxilOp(CI, Opcode, ValCtx); break; // Special. case DXIL::OpCode::AllocateRayQuery: { // validate flags are immediate and compatible - llvm::Value *constRayFlag = CI->getOperand(1); - if (!llvm::isa(constRayFlag)) { + llvm::Value *ConstRayFlag = CI->getOperand(1); + if (!llvm::isa(ConstRayFlag)) { ValCtx.EmitInstrError(CI, ValidationRule::DeclAllocateRayQueryFlagsAreConst); } @@ -1719,9 +1719,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, } case DXIL::OpCode::AllocateRayQuery2: { // validate flags are immediate and compatible - llvm::Value *constRayFlag = CI->getOperand(1); + llvm::Value *ConstRayFlag = CI->getOperand(1); llvm::Value *RayQueryFlag = CI->getOperand(2); - if (!llvm::isa(constRayFlag) || + if (!llvm::isa(ConstRayFlag) || !llvm::isa(RayQueryFlag)) { ValCtx.EmitInstrError(CI, ValidationRule::DeclAllocateRayQuery2FlagsAreConst); @@ -1730,7 +1730,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, // When the ForceOMM2State ConstRayFlag is given as an argument to // a RayQuery object, AllowOpacityMicromaps is expected // as a RayQueryFlag argument - llvm::ConstantInt *Arg1 = llvm::cast(constRayFlag); + llvm::ConstantInt *Arg1 = llvm::cast(ConstRayFlag); llvm::ConstantInt *Arg2 = llvm::cast(RayQueryFlag); if ((Arg1->getValue().getSExtValue() & (unsigned)DXIL::RayFlag::ForceOMM2State) && @@ -1744,9 +1744,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, } case DXIL::OpCode::BufferUpdateCounter: { - DxilInst_BufferUpdateCounter updateCounter(CI); - Value *handle = updateCounter.get_uav(); - DxilResourceProperties RP = ValCtx.GetResourceFromVal(handle); + DxilInst_BufferUpdateCounter UpdateCounter(CI); + Value *Handle = UpdateCounter.get_uav(); + DxilResourceProperties RP = ValCtx.GetResourceFromVal(Handle); if (!RP.isUAV()) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBufferUpdateCounterOnUAV); @@ -1761,20 +1761,20 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, CI, ValidationRule::InstrBufferUpdateCounterOnResHasCounter); } - Value *inc = updateCounter.get_inc(); - if (ConstantInt *cInc = dyn_cast(inc)) { - bool isInc = cInc->getLimitedValue() == 1; + Value *Inc = UpdateCounter.get_inc(); + if (ConstantInt *cInc = dyn_cast(Inc)) { + bool IsInc = cInc->getLimitedValue() == 1; if (!ValCtx.isLibProfile) { - auto it = ValCtx.HandleResIndexMap.find(handle); - if (it != ValCtx.HandleResIndexMap.end()) { - unsigned resIndex = it->second; - if (ValCtx.UavCounterIncMap.count(resIndex)) { - if (isInc != ValCtx.UavCounterIncMap[resIndex]) { + auto It = ValCtx.HandleResIndexMap.find(Handle); + if (It != ValCtx.HandleResIndexMap.end()) { + unsigned ResIndex = It->second; + if (ValCtx.UavCounterIncMap.count(ResIndex)) { + if (IsInc != ValCtx.UavCounterIncMap[ResIndex]) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOnlyOneAllocConsume); } } else { - ValCtx.UavCounterIncMap[resIndex] = isInc; + ValCtx.UavCounterIncMap[ResIndex] = IsInc; } } @@ -1789,35 +1789,35 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, } break; case DXIL::OpCode::Barrier: { - DxilInst_Barrier barrier(CI); - Value *mode = barrier.get_barrierMode(); - ConstantInt *cMode = dyn_cast(mode); - if (!cMode) { + DxilInst_Barrier Barrier(CI); + Value *Mode = Barrier.get_barrierMode(); + ConstantInt *CMode = dyn_cast(Mode); + if (!CMode) { ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst, {"Mode", "Barrier"}); return; } - const unsigned uglobal = + const unsigned Uglobal = static_cast(DXIL::BarrierMode::UAVFenceGlobal); - const unsigned g = static_cast(DXIL::BarrierMode::TGSMFence); - const unsigned ut = + const unsigned G = static_cast(DXIL::BarrierMode::TGSMFence); + const unsigned Ut = static_cast(DXIL::BarrierMode::UAVFenceThreadGroup); - unsigned barrierMode = cMode->getLimitedValue(); + unsigned BarrierMode = CMode->getLimitedValue(); - if (isCSLike || isLibFunc) { - bool bHasUGlobal = barrierMode & uglobal; - bool bHasGroup = barrierMode & g; - bool bHasUGroup = barrierMode & ut; - if (bHasUGlobal && bHasUGroup) { + if (IsCSLike || IsLibFunc) { + bool HasUGlobal = BarrierMode & Uglobal; + bool HasGroup = BarrierMode & G; + bool HasUGroup = BarrierMode & Ut; + if (HasUGlobal && HasUGroup) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeUselessUGroup); } - if (!bHasUGlobal && !bHasGroup && !bHasUGroup) { + if (!HasUGlobal && !HasGroup && !HasUGroup) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeNoMemory); } } else { - if (uglobal != barrierMode) { + if (Uglobal != BarrierMode) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); } } @@ -1831,28 +1831,28 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, ValidateBarrierFlagArg(ValCtx, CI, DI.get_SemanticFlags(), (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask, "semantic", "BarrierByMemoryType"); - if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node && + if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node && OP::BarrierRequiresNode(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode); } - if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) { + if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); } } break; case DXIL::OpCode::BarrierByNodeRecordHandle: case DXIL::OpCode::BarrierByMemoryHandle: { - std::string opName = opcode == DXIL::OpCode::BarrierByNodeRecordHandle + std::string OpName = Opcode == DXIL::OpCode::BarrierByNodeRecordHandle ? "barrierByNodeRecordHandle" : "barrierByMemoryHandle"; DxilInst_BarrierByMemoryHandle DIMH(CI); ValidateBarrierFlagArg(ValCtx, CI, DIMH.get_SemanticFlags(), (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask, - "semantic", opName); - if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node && + "semantic", OpName); + if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node && OP::BarrierRequiresNode(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode); } - if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) { + if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); } } break; @@ -1864,7 +1864,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, break; case DXIL::OpCode::AtomicBinOp: case DXIL::OpCode::AtomicCompareExchange: { - Type *pOverloadType = OP::GetOverloadType(opcode, CI->getCalledFunction()); + Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction()); if ((pOverloadType->isIntegerTy(64)) && !pSM->IsSM66Plus()) ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcodeInInvalidFunction, @@ -1890,73 +1890,73 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, break; case DXIL::OpCode::ThreadId: // SV_DispatchThreadID - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting) break; ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrSVConflictingLaunchMode, - {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(nodeLaunchType)}); + {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(NodeLaunchType)}); break; case DXIL::OpCode::GroupId: // SV_GroupId - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting) break; ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrSVConflictingLaunchMode, - {"GroupId", "SV_GroupId", GetLaunchTypeStr(nodeLaunchType)}); + {"GroupId", "SV_GroupId", GetLaunchTypeStr(NodeLaunchType)}); break; case DXIL::OpCode::ThreadIdInGroup: // SV_GroupThreadID - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting || - nodeLaunchType == DXIL::NodeLaunchType::Coalescing) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting || + NodeLaunchType == DXIL::NodeLaunchType::Coalescing) break; ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrSVConflictingLaunchMode, {"ThreadIdInGroup", "SV_GroupThreadID", - GetLaunchTypeStr(nodeLaunchType)}); + GetLaunchTypeStr(NodeLaunchType)}); break; case DXIL::OpCode::FlattenedThreadIdInGroup: // SV_GroupIndex - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting || - nodeLaunchType == DXIL::NodeLaunchType::Coalescing) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting || + NodeLaunchType == DXIL::NodeLaunchType::Coalescing) break; ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrSVConflictingLaunchMode, {"FlattenedThreadIdInGroup", "SV_GroupIndex", - GetLaunchTypeStr(nodeLaunchType)}); + GetLaunchTypeStr(NodeLaunchType)}); break; default: - // TODO: make sure every opcode is checked. + // TODO: make sure every Opcode is checked. // Skip opcodes don't need special check. break; } } static bool IsDxilFunction(llvm::Function *F) { - unsigned argSize = F->arg_size(); - if (argSize < 1) { + unsigned ArgSize = F->arg_size(); + if (ArgSize < 1) { // Cannot be a DXIL operation. return false; } @@ -1991,9 +1991,9 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { } const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); - OP *hlslOP = ValCtx.DxilMod.GetOP(); - bool isDxilOp = OP::IsDxilOpFunc(F); - Type *voidTy = Type::getVoidTy(F->getContext()); + OP *HlslOP = ValCtx.DxilMod.GetOP(); + bool IsDxilOp = OP::IsDxilOpFunc(F); + Type *VoidTy = Type::getVoidTy(F->getContext()); for (User *user : F->users()) { CallInst *CI = dyn_cast(user); @@ -2004,32 +2004,32 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { } // Skip call to external user defined function - if (!isDxilOp) + if (!IsDxilOp) continue; - Value *argOpcode = CI->getArgOperand(0); - ConstantInt *constOpcode = dyn_cast(argOpcode); - if (!constOpcode) { - // opcode not immediate; function body will validate this error. + Value *ArgOpcode = CI->getArgOperand(0); + ConstantInt *ConstOpcode = dyn_cast(ArgOpcode); + if (!ConstOpcode) { + // Opcode not immediate; function body will validate this error. continue; } - unsigned opcode = constOpcode->getLimitedValue(); - if (opcode >= (unsigned)DXIL::OpCode::NumOpCodes) { - // invalid opcode; function body will validate this error. + unsigned Opcode = ConstOpcode->getLimitedValue(); + if (Opcode >= (unsigned)DXIL::OpCode::NumOpCodes) { + // invalid Opcode; function body will validate this error. continue; } - DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode; + DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode; // In some cases, no overloads are provided (void is exclusive to others) - Function *dxilFunc; - if (hlslOP->IsOverloadLegal(dxilOpcode, voidTy)) { - dxilFunc = hlslOP->GetOpFunc(dxilOpcode, voidTy); + Function *DxilFunc; + if (HlslOP->IsOverloadLegal(DxilOpcode, VoidTy)) { + DxilFunc = HlslOP->GetOpFunc(DxilOpcode, VoidTy); } else { - Type *Ty = OP::GetOverloadType(dxilOpcode, CI->getCalledFunction()); + Type *Ty = OP::GetOverloadType(DxilOpcode, CI->getCalledFunction()); try { - if (!hlslOP->IsOverloadLegal(dxilOpcode, Ty)) { + if (!HlslOP->IsOverloadLegal(DxilOpcode, Ty)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } @@ -2037,75 +2037,75 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } - dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty); + DxilFunc = HlslOP->GetOpFunc(DxilOpcode, Ty); } - if (!dxilFunc) { - // Cannot find dxilFunction based on opcode and type. + if (!DxilFunc) { + // Cannot find DxilFunction based on Opcode and type. ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } - if (dxilFunc->getFunctionType() != F->getFunctionType()) { + if (DxilFunc->getFunctionType() != F->getFunctionType()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrCallOload, - {dxilFunc->getName()}); + {DxilFunc->getName()}); continue; } unsigned major = pSM->GetMajor(); unsigned minor = pSM->GetMinor(); if (ValCtx.isLibProfile) { - Function *callingFunction = CI->getParent()->getParent(); + Function *CallingFunction = CI->getParent()->getParent(); DXIL::ShaderKind SK = DXIL::ShaderKind::Library; - if (ValCtx.DxilMod.HasDxilFunctionProps(callingFunction)) - SK = ValCtx.DxilMod.GetDxilFunctionProps(callingFunction).shaderKind; - else if (ValCtx.DxilMod.IsPatchConstantShader(callingFunction)) + if (ValCtx.DxilMod.HasDxilFunctionProps(CallingFunction)) + SK = ValCtx.DxilMod.GetDxilFunctionProps(CallingFunction).shaderKind; + else if (ValCtx.DxilMod.IsPatchConstantShader(CallingFunction)) SK = DXIL::ShaderKind::Hull; - if (!ValidateOpcodeInProfile(dxilOpcode, SK, major, minor)) { + if (!ValidateOpcodeInProfile(DxilOpcode, SK, major, minor)) { // Opcode not available in profile. // produces: "lib_6_3(ps)", or "lib_6_3(anyhit)" for shader types // Or: "lib_6_3(lib)" for library function - std::string shaderModel = pSM->GetName(); - shaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")"; + std::string ShaderModel = pSM->GetName(); + ShaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")"; ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcode, - {hlslOP->GetOpCodeName(dxilOpcode), shaderModel}); + {HlslOP->GetOpCodeName(DxilOpcode), ShaderModel}); continue; } } else { - if (!ValidateOpcodeInProfile(dxilOpcode, pSM->GetKind(), major, minor)) { + if (!ValidateOpcodeInProfile(DxilOpcode, pSM->GetKind(), major, minor)) { // Opcode not available in profile. ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcode, - {hlslOP->GetOpCodeName(dxilOpcode), pSM->GetName()}); + {HlslOP->GetOpCodeName(DxilOpcode), pSM->GetName()}); continue; } } // Check more detail. - ValidateDxilOperationCallInProfile(CI, dxilOpcode, pSM, ValCtx); + ValidateDxilOperationCallInProfile(CI, DxilOpcode, pSM, ValCtx); } } /////////////////////////////////////////////////////////////////////////////// // Instruction validation functions. // -static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { - if (ST == hlslOP->GetBinaryWithCarryType()) +static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *HlslOP) { + if (ST == HlslOP->GetBinaryWithCarryType()) return true; - if (ST == hlslOP->GetBinaryWithTwoOutputsType()) + if (ST == HlslOP->GetBinaryWithTwoOutputsType()) return true; - if (ST == hlslOP->GetFourI32Type()) + if (ST == HlslOP->GetFourI32Type()) return true; - if (ST == hlslOP->GetFourI16Type()) + if (ST == HlslOP->GetFourI16Type()) return true; - if (ST == hlslOP->GetDimensionsType()) + if (ST == HlslOP->GetDimensionsType()) return true; - if (ST == hlslOP->GetHandleType()) + if (ST == HlslOP->GetHandleType()) return true; - if (ST == hlslOP->GetSamplePosType()) + if (ST == HlslOP->GetSamplePosType()) return true; - if (ST == hlslOP->GetSplitDoubleType()) + if (ST == HlslOP->GetSplitDoubleType()) return true; unsigned EltNum = ST->getNumElements(); @@ -2114,14 +2114,14 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { case 2: // Check if it's a native vector resret. if (EltTy->isVectorTy()) - return ST == hlslOP->GetResRetType(EltTy); + return ST == HlslOP->GetResRetType(EltTy); LLVM_FALLTHROUGH; case 4: case 8: // 2 for doubles, 8 for halfs. - return ST == hlslOP->GetCBufferRetType(EltTy); + return ST == HlslOP->GetCBufferRetType(EltTy); break; case 5: - return ST == hlslOP->GetResRetType(EltTy); + return ST == HlslOP->GetResRetType(EltTy); break; default: return false; @@ -2132,11 +2132,11 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { // inner type (UDT struct member) may be: [N dim array of]( UDT struct | scalar // ) scalar type may be: ( float(16|32|64) | int(16|32|64) ) static bool ValidateType(Type *Ty, ValidationContext &ValCtx, - bool bInner = false) { + bool IsInner = false) { DXASSERT_NOMSG(Ty != nullptr); if (Ty->isPointerTy()) { Type *EltTy = Ty->getPointerElementType(); - if (bInner || EltTy->isPointerTy()) { + if (IsInner || EltTy->isPointerTy()) { ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoPtrToPtr); return false; } @@ -2144,7 +2144,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, } if (Ty->isArrayTy()) { Type *EltTy = Ty->getArrayElementType(); - if (!bInner && isa(EltTy)) { + if (!IsInner && isa(EltTy)) { // Outermost array should be converted to single-dim, // but arrays inside struct are allowed to be multi-dim ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoMultiDim); @@ -2155,7 +2155,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, Ty = EltTy; } if (Ty->isStructTy()) { - bool result = true; + bool Result = true; StructType *ST = cast(Ty); StringRef Name = ST->getName(); @@ -2163,28 +2163,28 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, // Allow handle type. if (ValCtx.HandleTy == Ty) return true; - hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); - if (IsDxilBuiltinStructType(ST, hlslOP)) { + hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); + if (IsDxilBuiltinStructType(ST, HlslOP)) { ValCtx.EmitTypeError(Ty, ValidationRule::InstrDxilStructUser); - result = false; + Result = false; } ValCtx.EmitTypeError(Ty, ValidationRule::DeclDxilNsReserved); - result = false; + Result = false; } for (auto e : ST->elements()) { - if (!ValidateType(e, ValCtx, /*bInner*/ true)) { - result = false; + if (!ValidateType(e, ValCtx, /*IsInner*/ true)) { + Result = false; } } - return result; + return Result; } if (Ty->isFloatTy() || Ty->isHalfTy() || Ty->isDoubleTy()) { return true; } if (Ty->isIntegerTy()) { - unsigned width = Ty->getIntegerBitWidth(); - if (width != 1 && width != 8 && width != 16 && width != 32 && width != 64) { + unsigned Width = Ty->getIntegerBitWidth(); + if (Width != 1 && Width != 8 && Width != 16 && Width != 32 && Width != 64) { ValCtx.EmitTypeError(Ty, ValidationRule::TypesIntWidth); return false; } @@ -2207,13 +2207,13 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, } static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD, - unsigned index, uint64_t *pValue) { - *pValue = 0; - if (pMD->getNumOperands() < index) { + unsigned Index, uint64_t *PValue) { + *PValue = 0; + if (pMD->getNumOperands() < Index) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); return false; } - ConstantAsMetadata *C = dyn_cast(pMD->getOperand(index)); + ConstantAsMetadata *C = dyn_cast(pMD->getOperand(Index)); if (C == nullptr) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); return false; @@ -2223,7 +2223,7 @@ static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD, ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); return false; } - *pValue = CI->getValue().getZExtValue(); + *PValue = CI->getValue().getZExtValue(); return true; } @@ -2237,14 +2237,14 @@ static bool IsPrecise(Instruction &I, ValidationContext &ValCtx) { return false; } - uint64_t val; - if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) { + uint64_t Val; + if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) { return false; } - if (val == 1) { + if (Val == 1) { return true; } - if (val != 0) { + if (Val != 0) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange); } return false; @@ -2263,12 +2263,12 @@ static bool IsValueMinPrec(DxilModule &DxilMod, Value *V) { } static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, - CallInst *setMeshOutputCounts, - CallInst *getMeshPayload) { + CallInst *SetMeshOutputCounts, + CallInst *GetMeshPayload) { if (ValCtx.DxilMod.HasDxilFunctionProps(F)) { - DXIL::ShaderKind shaderKind = + DXIL::ShaderKind ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; - if (shaderKind != DXIL::ShaderKind::Mesh) + if (ShaderKind != DXIL::ShaderKind::Mesh) return; } else { return; @@ -2277,10 +2277,10 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, DominatorTreeAnalysis DTA; DominatorTree DT = DTA.run(*F); - for (auto b = F->begin(), bend = F->end(); b != bend; ++b) { - bool foundSetMeshOutputCountsInCurrentBB = false; - for (auto i = b->begin(), iend = b->end(); i != iend; ++i) { - llvm::Instruction &I = *i; + for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) { + bool FoundSetMeshOutputCountsInCurrentBb = false; + for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) { + llvm::Instruction &I = *It; // Calls to external functions. CallInst *CI = dyn_cast(&I); @@ -2296,22 +2296,22 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, continue; } - if (CI == setMeshOutputCounts) { - foundSetMeshOutputCountsInCurrentBB = true; + if (CI == SetMeshOutputCounts) { + FoundSetMeshOutputCountsInCurrentBb = true; } - Value *opcodeVal = CI->getOperand(0); - ConstantInt *OpcodeConst = dyn_cast(opcodeVal); - unsigned opcode = OpcodeConst->getLimitedValue(); - DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode; - - if (dxilOpcode == DXIL::OpCode::StoreVertexOutput || - dxilOpcode == DXIL::OpCode::StorePrimitiveOutput || - dxilOpcode == DXIL::OpCode::EmitIndices) { - if (setMeshOutputCounts == nullptr) { + Value *OpcodeVal = CI->getOperand(0); + ConstantInt *OpcodeConst = dyn_cast(OpcodeVal); + unsigned Opcode = OpcodeConst->getLimitedValue(); + DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode; + + if (DxilOpcode == DXIL::OpCode::StoreVertexOutput || + DxilOpcode == DXIL::OpCode::StorePrimitiveOutput || + DxilOpcode == DXIL::OpCode::EmitIndices) { + if (SetMeshOutputCounts == nullptr) { ValCtx.EmitInstrError( &I, ValidationRule::InstrMissingSetMeshOutputCounts); - } else if (!foundSetMeshOutputCountsInCurrentBB && - !DT.dominates(setMeshOutputCounts->getParent(), + } else if (!FoundSetMeshOutputCountsInCurrentBb && + !DT.dominates(SetMeshOutputCounts->getParent(), I.getParent())) { ValCtx.EmitInstrError( &I, ValidationRule::InstrNonDominatingSetMeshOutputCounts); @@ -2322,61 +2322,61 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, } } - if (getMeshPayload) { - PointerType *payloadPTy = cast(getMeshPayload->getType()); - StructType *payloadTy = - cast(payloadPTy->getPointerElementType()); + if (GetMeshPayload) { + PointerType *PayloadPTy = cast(GetMeshPayload->getType()); + StructType *PayloadTy = + cast(PayloadPTy->getPointerElementType()); const DataLayout &DL = F->getParent()->getDataLayout(); - unsigned payloadSize = DL.getTypeAllocSize(payloadTy); + unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy); - DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F); + DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F); - if (prop.ShaderProps.MS.payloadSizeInBytes < payloadSize) { + if (Prop.ShaderProps.MS.payloadSizeInBytes < PayloadSize) { ValCtx.EmitFnFormatError( F, ValidationRule::SmMeshShaderPayloadSizeDeclared, - {F->getName(), std::to_string(payloadSize), - std::to_string(prop.ShaderProps.MS.payloadSizeInBytes)}); + {F->getName(), std::to_string(PayloadSize), + std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes)}); } - if (prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { + if (Prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { ValCtx.EmitFnFormatError( F, ValidationRule::SmMeshShaderPayloadSize, - {F->getName(), std::to_string(prop.ShaderProps.MS.payloadSizeInBytes), + {F->getName(), std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes), std::to_string(DXIL::kMaxMSASPayloadBytes)}); } } } static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx, - CallInst *dispatchMesh) { + CallInst *DispatchMesh) { if (ValCtx.DxilMod.HasDxilFunctionProps(F)) { - DXIL::ShaderKind shaderKind = + DXIL::ShaderKind ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; - if (shaderKind != DXIL::ShaderKind::Amplification) + if (ShaderKind != DXIL::ShaderKind::Amplification) return; - if (dispatchMesh) { - DxilInst_DispatchMesh dispatchMeshCall(dispatchMesh); - Value *operandVal = dispatchMeshCall.get_payload(); - Type *payloadTy = operandVal->getType(); + if (DispatchMesh) { + DxilInst_DispatchMesh DispatchMeshCall(DispatchMesh); + Value *OperandVal = DispatchMeshCall.get_payload(); + Type *PayloadTy = OperandVal->getType(); const DataLayout &DL = F->getParent()->getDataLayout(); - unsigned payloadSize = DL.getTypeAllocSize(payloadTy); + unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy); - DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F); + DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F); - if (prop.ShaderProps.AS.payloadSizeInBytes < payloadSize) { + if (Prop.ShaderProps.AS.payloadSizeInBytes < PayloadSize) { ValCtx.EmitInstrFormatError( - dispatchMesh, + DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSizeDeclared, - {F->getName(), std::to_string(payloadSize), - std::to_string(prop.ShaderProps.AS.payloadSizeInBytes)}); + {F->getName(), std::to_string(PayloadSize), + std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes)}); } - if (prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { + if (Prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { ValCtx.EmitInstrFormatError( - dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, + DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, {F->getName(), - std::to_string(prop.ShaderProps.AS.payloadSizeInBytes), + std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes), std::to_string(DXIL::kMaxMSASPayloadBytes)}); } } @@ -2385,7 +2385,7 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx, return; } - if (dispatchMesh == nullptr) { + if (DispatchMesh == nullptr) { ValCtx.EmitFnError(F, ValidationRule::InstrNotOnceDispatchMesh); return; } @@ -2393,30 +2393,30 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx, PostDominatorTree PDT; PDT.runOnFunction(*F); - if (!PDT.dominates(dispatchMesh->getParent(), &F->getEntryBlock())) { - ValCtx.EmitInstrError(dispatchMesh, + if (!PDT.dominates(DispatchMesh->getParent(), &F->getEntryBlock())) { + ValCtx.EmitInstrError(DispatchMesh, ValidationRule::InstrNonDominatingDispatchMesh); } - Function *dispatchMeshFunc = dispatchMesh->getCalledFunction(); - FunctionType *dispatchMeshFuncTy = dispatchMeshFunc->getFunctionType(); - PointerType *payloadPTy = - cast(dispatchMeshFuncTy->getParamType(4)); - StructType *payloadTy = cast(payloadPTy->getPointerElementType()); + Function *DispatchMeshFunc = DispatchMesh->getCalledFunction(); + FunctionType *DispatchMeshFuncTy = DispatchMeshFunc->getFunctionType(); + PointerType *PayloadPTy = + cast(DispatchMeshFuncTy->getParamType(4)); + StructType *PayloadTy = cast(PayloadPTy->getPointerElementType()); const DataLayout &DL = F->getParent()->getDataLayout(); - unsigned payloadSize = DL.getTypeAllocSize(payloadTy); + unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy); - if (payloadSize > DXIL::kMaxMSASPayloadBytes) { + if (PayloadSize > DXIL::kMaxMSASPayloadBytes) { ValCtx.EmitInstrFormatError( - dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, - {F->getName(), std::to_string(payloadSize), + DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, + {F->getName(), std::to_string(PayloadSize), std::to_string(DXIL::kMaxMSASPayloadBytes)}); } } -static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) { +static void ValidateControlFlowHint(BasicBlock &BB, ValidationContext &ValCtx) { // Validate controlflow hint. - TerminatorInst *TI = bb.getTerminator(); + TerminatorInst *TI = BB.getTerminator(); if (!TI) return; @@ -2427,33 +2427,33 @@ static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) { if (pNode->getNumOperands() < 3) return; - bool bHasBranch = false; - bool bHasFlatten = false; - bool bForceCase = false; + bool HasBranch = false; + bool HasFlatten = false; + bool ForceCase = false; - for (unsigned i = 2; i < pNode->getNumOperands(); i++) { - uint64_t value = 0; - if (GetNodeOperandAsInt(ValCtx, pNode, i, &value)) { - DXIL::ControlFlowHint hint = static_cast(value); - switch (hint) { + for (unsigned I = 2; I < pNode->getNumOperands(); I++) { + uint64_t Value = 0; + if (GetNodeOperandAsInt(ValCtx, pNode, I, &Value)) { + DXIL::ControlFlowHint Hint = static_cast(Value); + switch (Hint) { case DXIL::ControlFlowHint::Flatten: - bHasFlatten = true; + HasFlatten = true; break; case DXIL::ControlFlowHint::Branch: - bHasBranch = true; + HasBranch = true; break; case DXIL::ControlFlowHint::ForceCase: - bForceCase = true; + ForceCase = true; break; default: ValCtx.EmitMetaError(pNode, ValidationRule::MetaInvalidControlFlowHint); } } } - if (bHasBranch && bHasFlatten) { + if (HasBranch && HasFlatten) { ValCtx.EmitMetaError(pNode, ValidationRule::MetaBranchFlatten); } - if (bForceCase && !isa(TI)) { + if (ForceCase && !isa(TI)) { ValCtx.EmitMetaError(pNode, ValidationRule::MetaForceCaseOnSwitch); } } @@ -2466,30 +2466,30 @@ static void ValidateTBAAMetadata(MDNode *Node, ValidationContext &ValCtx) { } } break; case 2: { - MDNode *rootNode = dyn_cast(Node->getOperand(1)); - if (!rootNode) { + MDNode *RootNode = dyn_cast(Node->getOperand(1)); + if (!RootNode) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } else { - ValidateTBAAMetadata(rootNode, ValCtx); + ValidateTBAAMetadata(RootNode, ValCtx); } } break; case 3: { - MDNode *rootNode = dyn_cast(Node->getOperand(1)); - if (!rootNode) { + MDNode *RootNode = dyn_cast(Node->getOperand(1)); + if (!RootNode) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } else { - ValidateTBAAMetadata(rootNode, ValCtx); + ValidateTBAAMetadata(RootNode, ValCtx); } - ConstantAsMetadata *pointsToConstMem = + ConstantAsMetadata *PointsToConstMem = dyn_cast(Node->getOperand(2)); - if (!pointsToConstMem) { + if (!PointsToConstMem) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } else { - ConstantInt *isConst = - dyn_cast(pointsToConstMem->getValue()); - if (!isConst) { + ConstantInt *IsConst = + dyn_cast(PointsToConstMem->getValue()); + if (!IsConst) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); - } else if (isConst->getValue().getLimitedValue() > 1) { + } else if (IsConst->getValue().getLimitedValue() > 1) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } } @@ -2570,11 +2570,11 @@ static void ValidateNonUniformMetadata(Instruction &I, MDNode *pMD, if (pMD->getNumOperands() != 1) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); } - uint64_t val; - if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) { + uint64_t Val; + if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); } - if (val != 1) { + if (Val != 1) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange); } } @@ -2609,31 +2609,31 @@ static void ValidateInstructionMetadata(Instruction *I, } static void ValidateFunctionAttribute(Function *F, ValidationContext &ValCtx) { - AttributeSet attrSet = F->getAttributes().getFnAttributes(); + AttributeSet AttrSet = F->getAttributes().getFnAttributes(); // fp32-denorm-mode - if (attrSet.hasAttribute(AttributeSet::FunctionIndex, + if (AttrSet.hasAttribute(AttributeSet::FunctionIndex, DXIL::kFP32DenormKindString)) { - Attribute attr = attrSet.getAttribute(AttributeSet::FunctionIndex, + Attribute Attr = AttrSet.getAttribute(AttributeSet::FunctionIndex, DXIL::kFP32DenormKindString); - StringRef value = attr.getValueAsString(); - if (!value.equals(DXIL::kFP32DenormValueAnyString) && - !value.equals(DXIL::kFP32DenormValueFtzString) && - !value.equals(DXIL::kFP32DenormValuePreserveString)) { - ValCtx.EmitFnAttributeError(F, attr.getKindAsString(), - attr.getValueAsString()); + StringRef StrValue = Attr.getValueAsString(); + if (!StrValue.equals(DXIL::kFP32DenormValueAnyString) && + !StrValue.equals(DXIL::kFP32DenormValueFtzString) && + !StrValue.equals(DXIL::kFP32DenormValuePreserveString)) { + ValCtx.EmitFnAttributeError(F, Attr.getKindAsString(), + Attr.getValueAsString()); } } // TODO: If validating libraries, we should remove all unknown function // attributes. For each attribute, check if it is a known attribute - for (unsigned I = 0, E = attrSet.getNumSlots(); I != E; ++I) { - for (auto AttrIter = attrSet.begin(I), AttrEnd = attrSet.end(I); + for (unsigned I = 0, E = AttrSet.getNumSlots(); I != E; ++I) { + for (auto AttrIter = AttrSet.begin(I), AttrEnd = AttrSet.end(I); AttrIter != AttrEnd; ++AttrIter) { if (!AttrIter->isStringAttribute()) { continue; } - StringRef kind = AttrIter->getKindAsString(); - if (!kind.equals(DXIL::kFP32DenormKindString) && - !kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) { + StringRef Kind = AttrIter->getKindAsString(); + if (!Kind.equals(DXIL::kFP32DenormKindString) && + !Kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) { ValCtx.EmitFnAttributeError(F, AttrIter->getKindAsString(), AttrIter->getValueAsString()); } @@ -2683,10 +2683,10 @@ static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I, ValidationContext &ValCtx) { if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) return true; - unsigned OpCode = I.getOpcode(); - if (OpCode == Instruction::InsertElement || - OpCode == Instruction::ExtractElement || - OpCode == Instruction::ShuffleVector) + unsigned Opcode = I.getOpcode(); + if (Opcode == Instruction::InsertElement || + Opcode == Instruction::ExtractElement || + Opcode == Instruction::ShuffleVector) return false; return true; @@ -2697,16 +2697,16 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision; bool SupportsLifetimeIntrinsics = ValCtx.DxilMod.GetShaderModel()->IsSM66Plus(); - SmallVector gradientOps; - SmallVector barriers; - CallInst *setMeshOutputCounts = nullptr; - CallInst *getMeshPayload = nullptr; - CallInst *dispatchMesh = nullptr; - hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); + SmallVector GradientOps; + SmallVector Barriers; + CallInst *SetMeshOutputCounts = nullptr; + CallInst *GetMeshPayload = nullptr; + CallInst *DispatchMesh = nullptr; + hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); - for (auto b = F->begin(), bend = F->end(); b != bend; ++b) { - for (auto i = b->begin(), iend = b->end(); i != iend; ++i) { - llvm::Instruction &I = *i; + for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) { + for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) { + llvm::Instruction &I = *It; if (I.hasMetadata()) { @@ -2745,27 +2745,27 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { continue; } - Value *opcodeVal = CI->getOperand(0); - ConstantInt *OpcodeConst = dyn_cast(opcodeVal); + Value *OpcodeVal = CI->getOperand(0); + ConstantInt *OpcodeConst = dyn_cast(OpcodeVal); if (OpcodeConst == nullptr) { ValCtx.EmitInstrFormatError(&I, ValidationRule::InstrOpConst, {"Opcode", "DXIL operation"}); continue; } - unsigned opcode = OpcodeConst->getLimitedValue(); - if (opcode >= static_cast(DXIL::OpCode::NumOpCodes)) { + unsigned Opcode = OpcodeConst->getLimitedValue(); + if (Opcode >= static_cast(DXIL::OpCode::NumOpCodes)) { ValCtx.EmitInstrFormatError( &I, ValidationRule::InstrIllegalDXILOpCode, {std::to_string((unsigned)DXIL::OpCode::NumOpCodes), - std::to_string(opcode)}); + std::to_string(Opcode)}); continue; } - DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode; + DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode; bool IllegalOpFunc = true; - for (auto &it : hlslOP->GetOpFuncList(dxilOpcode)) { - if (it.second == FCalled) { + for (auto &It : HlslOP->GetOpFuncList(DxilOpcode)) { + if (It.second == FCalled) { IllegalOpFunc = false; break; } @@ -2774,46 +2774,46 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { if (IllegalOpFunc) { ValCtx.EmitInstrFormatError( &I, ValidationRule::InstrIllegalDXILOpFunction, - {FCalled->getName(), OP::GetOpCodeName(dxilOpcode)}); + {FCalled->getName(), OP::GetOpCodeName(DxilOpcode)}); continue; } - if (OP::IsDxilOpGradient(dxilOpcode)) { - gradientOps.push_back(CI); + if (OP::IsDxilOpGradient(DxilOpcode)) { + GradientOps.push_back(CI); } - if (dxilOpcode == DXIL::OpCode::Barrier) { - barriers.push_back(CI); + if (DxilOpcode == DXIL::OpCode::Barrier) { + Barriers.push_back(CI); } // External function validation will check the parameter // list. This function will check that the call does not // violate any rules. - if (dxilOpcode == DXIL::OpCode::SetMeshOutputCounts) { + if (DxilOpcode == DXIL::OpCode::SetMeshOutputCounts) { // validate the call count of SetMeshOutputCounts - if (setMeshOutputCounts != nullptr) { + if (SetMeshOutputCounts != nullptr) { ValCtx.EmitInstrError( &I, ValidationRule::InstrMultipleSetMeshOutputCounts); } - setMeshOutputCounts = CI; + SetMeshOutputCounts = CI; } - if (dxilOpcode == DXIL::OpCode::GetMeshPayload) { + if (DxilOpcode == DXIL::OpCode::GetMeshPayload) { // validate the call count of GetMeshPayload - if (getMeshPayload != nullptr) { + if (GetMeshPayload != nullptr) { ValCtx.EmitInstrError( &I, ValidationRule::InstrMultipleGetMeshPayload); } - getMeshPayload = CI; + GetMeshPayload = CI; } - if (dxilOpcode == DXIL::OpCode::DispatchMesh) { + if (DxilOpcode == DXIL::OpCode::DispatchMesh) { // validate the call count of DispatchMesh - if (dispatchMesh != nullptr) { + if (DispatchMesh != nullptr) { ValCtx.EmitInstrError(&I, ValidationRule::InstrNotOnceDispatchMesh); } - dispatchMesh = CI; + DispatchMesh = CI; } } continue; @@ -2821,23 +2821,23 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { for (Value *op : I.operands()) { if (isa(op)) { - bool legalUndef = isa(&I); + bool LegalUndef = isa(&I); if (isa(&I)) { - legalUndef = op == I.getOperand(0); + LegalUndef = op == I.getOperand(0); } if (isa(&I)) { - legalUndef = op == I.getOperand(1); + LegalUndef = op == I.getOperand(1); } if (isa(&I)) { - legalUndef = op == I.getOperand(0); + LegalUndef = op == I.getOperand(0); } - if (!legalUndef) + if (!LegalUndef) ValCtx.EmitInstrError(&I, ValidationRule::InstrNoReadingUninitialized); } else if (ConstantExpr *CE = dyn_cast(op)) { - for (Value *opCE : CE->operands()) { - if (isa(opCE)) { + for (Value *OpCE : CE->operands()) { + if (isa(OpCE)) { ValCtx.EmitInstrError( &I, ValidationRule::InstrNoReadingUninitialized); } @@ -2867,8 +2867,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { } } - unsigned opcode = I.getOpcode(); - switch (opcode) { + unsigned Opcode = I.getOpcode(); + switch (Opcode) { case Instruction::Alloca: { AllocaInst *AI = cast(&I); // TODO: validate address space and alignment @@ -2909,26 +2909,26 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { continue; } GetElementPtrInst *GEP = cast(&I); - bool allImmIndex = true; + bool AllImmIndex = true; for (auto Idx = GEP->idx_begin(), E = GEP->idx_end(); Idx != E; Idx++) { if (!isa(Idx)) { - allImmIndex = false; + AllImmIndex = false; break; } } - if (allImmIndex) { + if (AllImmIndex) { const DataLayout &DL = ValCtx.DL; Value *Ptr = GEP->getPointerOperand(); - unsigned size = + unsigned Size = DL.getTypeAllocSize(Ptr->getType()->getPointerElementType()); - unsigned valSize = + unsigned ValSize = DL.getTypeAllocSize(GEP->getType()->getPointerElementType()); SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); - unsigned offset = + unsigned Offset = DL.getIndexedOffset(GEP->getPointerOperandType(), Indices); - if ((offset + valSize) > size) { + if ((Offset + ValSize) > Size) { ValCtx.EmitInstrError(GEP, ValidationRule::InstrInBoundsAccess); } } @@ -3002,16 +3002,16 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { case Instruction::AtomicCmpXchg: case Instruction::AtomicRMW: { Value *Ptr = I.getOperand(AtomicRMWInst::getPointerOperandIndex()); - PointerType *ptrType = cast(Ptr->getType()); - Type *elType = ptrType->getElementType(); + PointerType *PtrType = cast(Ptr->getType()); + Type *ElType = PtrType->getElementType(); const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); - if ((elType->isIntegerTy(64)) && !pSM->IsSM66Plus()) + if ((ElType->isIntegerTy(64)) && !pSM->IsSM66Plus()) ValCtx.EmitInstrFormatError( &I, ValidationRule::SmOpcodeInInvalidFunction, {"64-bit atomic operations", "Shader Model 6.6+"}); - if (ptrType->getAddressSpace() != DXIL::kTGSMAddrSpace && - ptrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace) + if (PtrType->getAddressSpace() != DXIL::kTGSMAddrSpace && + PtrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace) ValCtx.EmitInstrError( &I, ValidationRule::InstrAtomicOpNonGroupsharedOrRecord); @@ -3062,12 +3062,12 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { } } } - ValidateControlFlowHint(*b, ValCtx); + ValidateControlFlowHint(*B, ValCtx); } - ValidateMsIntrinsics(F, ValCtx, setMeshOutputCounts, getMeshPayload); + ValidateMsIntrinsics(F, ValCtx, SetMeshOutputCounts, GetMeshPayload); - ValidateAsIntrinsics(F, ValCtx, dispatchMesh); + ValidateAsIntrinsics(F, ValCtx, DispatchMesh); } static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) { @@ -3075,39 +3075,39 @@ static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) { // to do here if (!ValCtx.DxilMod.HasDxilFunctionProps(F)) return; - auto &props = ValCtx.DxilMod.GetDxilFunctionProps(F); - if (!props.IsNode()) + auto &Props = ValCtx.DxilMod.GetDxilFunctionProps(F); + if (!Props.IsNode()) return; - if (props.InputNodes.size() > 1) { + if (Props.InputNodes.size() > 1) { ValCtx.EmitFnFormatError( F, ValidationRule::DeclMultipleNodeInputs, - {F->getName(), std::to_string(props.InputNodes.size())}); + {F->getName(), std::to_string(Props.InputNodes.size())}); } - for (auto &input : props.InputNodes) { - if (!input.Flags.RecordTypeMatchesLaunchType(props.Node.LaunchType)) { + for (auto &input : Props.InputNodes) { + if (!input.Flags.RecordTypeMatchesLaunchType(Props.Node.LaunchType)) { // We allow EmptyNodeInput here, as that may have been added implicitly // if there was no input specified if (input.Flags.IsEmptyInput()) continue; - llvm::StringRef validInputs = ""; - switch (props.Node.LaunchType) { + llvm::StringRef ValidInputs = ""; + switch (Props.Node.LaunchType) { case DXIL::NodeLaunchType::Broadcasting: - validInputs = "{RW}DispatchNodeInputRecord"; + ValidInputs = "{RW}DispatchNodeInputRecord"; break; case DXIL::NodeLaunchType::Coalescing: - validInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput"; + ValidInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput"; break; case DXIL::NodeLaunchType::Thread: - validInputs = "{RW}ThreadNodeInputRecord"; + ValidInputs = "{RW}ThreadNodeInputRecord"; break; default: llvm_unreachable("invalid launch type"); } ValCtx.EmitFnFormatError( F, ValidationRule::DeclNodeLaunchInputType, - {ShaderModel::GetNodeLaunchTypeName(props.Node.LaunchType), - F->getName(), validInputs}); + {ShaderModel::GetNodeLaunchTypeName(Props.Node.LaunchType), + F->getName(), ValidInputs}); } } } @@ -3118,26 +3118,26 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { if (F.isIntrinsic() || IsDxilFunction(&F)) return; } else { - DXIL::ShaderKind shaderKind = DXIL::ShaderKind::Library; - bool isShader = ValCtx.DxilMod.HasDxilFunctionProps(&F); - unsigned numUDTShaderArgs = 0; - if (isShader) { - shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind; - switch (shaderKind) { + DXIL::ShaderKind ShaderKind = DXIL::ShaderKind::Library; + bool IsShader = ValCtx.DxilMod.HasDxilFunctionProps(&F); + unsigned NumUDTShaderArgs = 0; + if (IsShader) { + ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind; + switch (ShaderKind) { case DXIL::ShaderKind::AnyHit: case DXIL::ShaderKind::ClosestHit: - numUDTShaderArgs = 2; + NumUDTShaderArgs = 2; break; case DXIL::ShaderKind::Miss: case DXIL::ShaderKind::Callable: - numUDTShaderArgs = 1; + NumUDTShaderArgs = 1; break; case DXIL::ShaderKind::Compute: { DxilModule &DM = ValCtx.DxilMod; if (DM.HasDxilEntryProps(&F)) { - DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F); // Check that compute has no node metadata - if (entryProps.props.IsNode()) { + if (EntryProps.props.IsNode()) { ValCtx.EmitFnFormatError(&F, ValidationRule::MetaComputeWithNode, {F.getName()}); } @@ -3148,45 +3148,45 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { break; } } else { - isShader = ValCtx.DxilMod.IsPatchConstantShader(&F); + IsShader = ValCtx.DxilMod.IsPatchConstantShader(&F); } // Entry function should not have parameter. - if (isShader && 0 == numUDTShaderArgs && !F.arg_empty()) + if (IsShader && 0 == NumUDTShaderArgs && !F.arg_empty()) ValCtx.EmitFnFormatError(&F, ValidationRule::FlowFunctionCall, {F.getName()}); // Shader functions should return void. - if (isShader && !F.getReturnType()->isVoidTy()) + if (IsShader && !F.getReturnType()->isVoidTy()) ValCtx.EmitFnFormatError(&F, ValidationRule::DeclShaderReturnVoid, {F.getName()}); - auto ArgFormatError = [&](Function &F, Argument &arg, ValidationRule rule) { - if (arg.hasName()) - ValCtx.EmitFnFormatError(&F, rule, {arg.getName().str(), F.getName()}); + auto ArgFormatError = [&](Function &F, Argument &Arg, ValidationRule Rule) { + if (Arg.hasName()) + ValCtx.EmitFnFormatError(&F, Rule, {Arg.getName().str(), F.getName()}); else - ValCtx.EmitFnFormatError(&F, rule, - {std::to_string(arg.getArgNo()), F.getName()}); + ValCtx.EmitFnFormatError(&F, Rule, + {std::to_string(Arg.getArgNo()), F.getName()}); }; - unsigned numArgs = 0; - for (auto &arg : F.args()) { - Type *argTy = arg.getType(); - if (argTy->isPointerTy()) - argTy = argTy->getPointerElementType(); - - numArgs++; - if (numUDTShaderArgs) { - if (arg.getArgNo() >= numUDTShaderArgs) { - ArgFormatError(F, arg, ValidationRule::DeclExtraArgs); - } else if (!argTy->isStructTy()) { - switch (shaderKind) { + unsigned NumArgs = 0; + for (auto &Arg : F.args()) { + Type *ArgTy = Arg.getType(); + if (ArgTy->isPointerTy()) + ArgTy = ArgTy->getPointerElementType(); + + NumArgs++; + if (NumUDTShaderArgs) { + if (Arg.getArgNo() >= NumUDTShaderArgs) { + ArgFormatError(F, Arg, ValidationRule::DeclExtraArgs); + } else if (!ArgTy->isStructTy()) { + switch (ShaderKind) { case DXIL::ShaderKind::Callable: - ArgFormatError(F, arg, ValidationRule::DeclParamStruct); + ArgFormatError(F, Arg, ValidationRule::DeclParamStruct); break; default: - ArgFormatError(F, arg, - arg.getArgNo() == 0 + ArgFormatError(F, Arg, + Arg.getArgNo() == 0 ? ValidationRule::DeclPayloadStruct : ValidationRule::DeclAttrStruct); } @@ -3194,24 +3194,24 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { continue; } - while (argTy->isArrayTy()) { - argTy = argTy->getArrayElementType(); + while (ArgTy->isArrayTy()) { + ArgTy = ArgTy->getArrayElementType(); } - if (argTy->isStructTy() && !ValCtx.isLibProfile) { - ArgFormatError(F, arg, ValidationRule::DeclFnFlattenParam); + if (ArgTy->isStructTy() && !ValCtx.isLibProfile) { + ArgFormatError(F, Arg, ValidationRule::DeclFnFlattenParam); break; } } - if (numArgs < numUDTShaderArgs && shaderKind != DXIL::ShaderKind::Node) { - StringRef argType[2] = { - shaderKind == DXIL::ShaderKind::Callable ? "params" : "payload", + if (NumArgs < NumUDTShaderArgs && ShaderKind != DXIL::ShaderKind::Node) { + StringRef ArgType[2] = { + ShaderKind == DXIL::ShaderKind::Callable ? "params" : "payload", "attributes"}; - for (unsigned i = numArgs; i < numUDTShaderArgs; i++) { + for (unsigned I = NumArgs; I < NumUDTShaderArgs; I++) { ValCtx.EmitFnFormatError( &F, ValidationRule::DeclShaderMissingArg, - {ShaderModel::GetKindName(shaderKind), F.getName(), argType[i]}); + {ShaderModel::GetKindName(ShaderKind), F.getName(), ArgType[I]}); } } @@ -3248,25 +3248,25 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { static void ValidateGlobalVariable(GlobalVariable &GV, ValidationContext &ValCtx) { - bool isInternalGV = + bool IsInternalGv = dxilutil::IsStaticGlobal(&GV) || dxilutil::IsSharedMemoryGlobal(&GV); if (ValCtx.isLibProfile) { - auto isCBufferGlobal = + auto IsCBufferGlobal = [&](const std::vector> &ResTab) -> bool { for (auto &Res : ResTab) if (Res->GetGlobalSymbol() == &GV) return true; return false; }; - auto isResourceGlobal = + auto IsResourceGlobal = [&](const std::vector> &ResTab) -> bool { for (auto &Res : ResTab) if (Res->GetGlobalSymbol() == &GV) return true; return false; }; - auto isSamplerGlobal = + auto IsSamplerGlobal = [&](const std::vector> &ResTab) -> bool { for (auto &Res : ResTab) if (Res->GetGlobalSymbol() == &GV) @@ -3274,32 +3274,32 @@ static void ValidateGlobalVariable(GlobalVariable &GV, return false; }; - bool isRes = isCBufferGlobal(ValCtx.DxilMod.GetCBuffers()); - isRes |= isResourceGlobal(ValCtx.DxilMod.GetUAVs()); - isRes |= isResourceGlobal(ValCtx.DxilMod.GetSRVs()); - isRes |= isSamplerGlobal(ValCtx.DxilMod.GetSamplers()); - isInternalGV |= isRes; + bool IsRes = IsCBufferGlobal(ValCtx.DxilMod.GetCBuffers()); + IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetUAVs()); + IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetSRVs()); + IsRes |= IsSamplerGlobal(ValCtx.DxilMod.GetSamplers()); + IsInternalGv |= IsRes; // Allow special dx.ishelper for library target if (GV.getName().compare(DXIL::kDxIsHelperGlobalName) == 0) { Type *Ty = GV.getType()->getPointerElementType(); if (Ty->isIntegerTy() && Ty->getScalarSizeInBits() == 32) { - isInternalGV = true; + IsInternalGv = true; } } } - if (!isInternalGV) { + if (!IsInternalGv) { if (!GV.user_empty()) { - bool hasInstructionUser = false; + bool HasInstructionUser = false; for (User *U : GV.users()) { if (isa(U)) { - hasInstructionUser = true; + HasInstructionUser = true; break; } } // External GV should not have instruction user. - if (hasInstructionUser) { + if (HasInstructionUser) { ValCtx.EmitGlobalVariableFormatError( &GV, ValidationRule::DeclNotUsedExternal, {GV.getName()}); } @@ -3322,14 +3322,14 @@ static void ValidateGlobalVariable(GlobalVariable &GV, } static void CollectFixAddressAccess(Value *V, - std::vector &fixAddrTGSMList) { + std::vector &FixAddrTGSMList) { for (User *U : V->users()) { if (GEPOperator *GEP = dyn_cast(U)) { if (isa(GEP) || GEP->hasAllConstantIndices()) { - CollectFixAddressAccess(GEP, fixAddrTGSMList); + CollectFixAddressAccess(GEP, FixAddrTGSMList); } } else if (StoreInst *SI = dyn_cast(U)) { - fixAddrTGSMList.emplace_back(SI); + FixAddrTGSMList.emplace_back(SI); } } } @@ -3339,16 +3339,16 @@ static bool IsDivergent(Value *V) { return false; } -static void ValidateTGSMRaceCondition(std::vector &fixAddrTGSMList, +static void ValidateTGSMRaceCondition(std::vector &FixAddrTGSMList, ValidationContext &ValCtx) { - std::unordered_set fixAddrTGSMFuncSet; - for (StoreInst *I : fixAddrTGSMList) { + std::unordered_set FixAddrTGSMFuncSet; + for (StoreInst *I : FixAddrTGSMList) { BasicBlock *BB = I->getParent(); - fixAddrTGSMFuncSet.insert(BB->getParent()); + FixAddrTGSMFuncSet.insert(BB->getParent()); } for (auto &F : ValCtx.DxilMod.GetModule()->functions()) { - if (F.isDeclaration() || !fixAddrTGSMFuncSet.count(&F)) + if (F.isDeclaration() || !FixAddrTGSMFuncSet.count(&F)) continue; PostDominatorTree PDT; @@ -3356,7 +3356,7 @@ static void ValidateTGSMRaceCondition(std::vector &fixAddrTGSMList, BasicBlock *Entry = &F.getEntryBlock(); - for (StoreInst *SI : fixAddrTGSMList) { + for (StoreInst *SI : FixAddrTGSMList) { BasicBlock *BB = SI->getParent(); if (BB->getParent() == &F) { if (PDT.dominates(BB, Entry)) { @@ -3375,7 +3375,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { bool TGSMAllowed = pSM->IsCS() || pSM->IsAS() || pSM->IsMS() || pSM->IsLib(); unsigned TGSMSize = 0; - std::vector fixAddrTGSMList; + std::vector FixAddrTGSMList; const DataLayout &DL = M.GetModule()->getDataLayout(); for (GlobalVariable &GV : M.GetModule()->globals()) { ValidateGlobalVariable(GV, ValCtx); @@ -3390,9 +3390,9 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { if (Instruction *I = dyn_cast(U)) { llvm::Function *F = I->getParent()->getParent(); if (M.HasDxilEntryProps(F)) { - DxilFunctionProps &props = M.GetDxilEntryProps(F).props; - if (!props.IsCS() && !props.IsAS() && !props.IsMS() && - !props.IsNode()) { + DxilFunctionProps &Props = M.GetDxilEntryProps(F).props; + if (!Props.IsCS() && !Props.IsAS() && !Props.IsMS() && + !Props.IsNode()) { ValCtx.EmitInstrFormatError(I, ValidationRule::SmTGSMUnsupported, {"from non-compute entry points"}); @@ -3402,7 +3402,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { } } TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType()); - CollectFixAddressAccess(&GV, fixAddrTGSMList); + CollectFixAddressAccess(&GV, FixAddrTGSMList); } } @@ -3426,8 +3426,8 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { GV, Rule, {std::to_string(TGSMSize), std::to_string(MaxSize)}); } - if (!fixAddrTGSMList.empty()) { - ValidateTGSMRaceCondition(fixAddrTGSMList, ValCtx); + if (!FixAddrTGSMList.empty()) { + ValidateTGSMRaceCondition(FixAddrTGSMList, ValCtx); } } @@ -3440,20 +3440,20 @@ static void ValidateValidatorVersion(ValidationContext &ValCtx) { if (pNode->getNumOperands() == 1) { MDTuple *pVerValues = dyn_cast(pNode->getOperand(0)); if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) { - uint64_t majorVer, minorVer; - if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) && - GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) { - unsigned curMajor, curMinor; - GetValidationVersion(&curMajor, &curMinor); + uint64_t MajorVer, MinorVer; + if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) && + GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) { + unsigned CurMajor, CurMinor; + GetValidationVersion(&CurMajor, &CurMinor); // This will need to be updated as major/minor versions evolve, // depending on the degree of compat across versions. - if (majorVer == curMajor && minorVer <= curMinor) { + if (MajorVer == CurMajor && MinorVer <= CurMinor) { return; } else { ValCtx.EmitFormatError( ValidationRule::MetaVersionSupported, - {"Validator", std::to_string(majorVer), std::to_string(minorVer), - std::to_string(curMajor), std::to_string(curMinor)}); + {"Validator", std::to_string(MajorVer), std::to_string(MinorVer), + std::to_string(CurMajor), std::to_string(CurMinor)}); return; } } @@ -3471,19 +3471,19 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) { if (pNode->getNumOperands() == 1) { MDTuple *pVerValues = dyn_cast(pNode->getOperand(0)); if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) { - uint64_t majorVer, minorVer; - if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) && - GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) { + uint64_t MajorVer, MinorVer; + if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) && + GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) { // This will need to be updated as dxil major/minor versions evolve, // depending on the degree of compat across versions. - if ((majorVer == DXIL::kDxilMajor && minorVer <= DXIL::kDxilMinor) && - (majorVer == ValCtx.m_DxilMajor && - minorVer == ValCtx.m_DxilMinor)) { + if ((MajorVer == DXIL::kDxilMajor && MinorVer <= DXIL::kDxilMinor) && + (MajorVer == ValCtx.m_DxilMajor && + MinorVer == ValCtx.m_DxilMinor)) { return; } else { ValCtx.EmitFormatError(ValidationRule::MetaVersionSupported, - {"Dxil", std::to_string(majorVer), - std::to_string(minorVer), + {"Dxil", std::to_string(MajorVer), + std::to_string(MinorVer), std::to_string(DXIL::kDxilMajor), std::to_string(DXIL::kDxilMinor)}); return; @@ -3501,16 +3501,16 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) { NamedMDNode *TA = pModule->getNamedMetadata("dx.typeAnnotations"); if (TA == nullptr) return; - for (unsigned i = 0, end = TA->getNumOperands(); i < end; ++i) { - MDTuple *TANode = dyn_cast(TA->getOperand(i)); + for (unsigned I = 0, End = TA->getNumOperands(); I < End; ++I) { + MDTuple *TANode = dyn_cast(TA->getOperand(I)); if (TANode->getNumOperands() < 3) { ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed); return; } - ConstantInt *tag = mdconst::extract(TANode->getOperand(0)); - uint64_t tagValue = tag->getZExtValue(); - if (tagValue != DxilMDHelper::kDxilTypeSystemStructTag && - tagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) { + ConstantInt *Tag = mdconst::extract(TANode->getOperand(0)); + uint64_t TagValue = Tag->getZExtValue(); + if (TagValue != DxilMDHelper::kDxilTypeSystemStructTag && + TagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) { ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed); return; } @@ -3519,11 +3519,11 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) { } static void ValidateBitcode(ValidationContext &ValCtx) { - std::string diagStr; - raw_string_ostream diagStream(diagStr); - if (llvm::verifyModule(ValCtx.M, &diagStream)) { + std::string DiagStr; + raw_string_ostream DiagStream(DiagStr); + if (llvm::verifyModule(ValCtx.M, &DiagStream)) { ValCtx.EmitError(ValidationRule::BitcodeValid); - dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), diagStream.str()); + dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), DiagStream.str()); } } @@ -3537,18 +3537,18 @@ static void ValidateWaveSize(ValidationContext &ValCtx, if (!EPs) return; - for (unsigned i = 0, end = EPs->getNumOperands(); i < end; ++i) { - MDTuple *EPNodeRef = dyn_cast(EPs->getOperand(i)); + for (unsigned I = 0, End = EPs->getNumOperands(); I < End; ++I) { + MDTuple *EPNodeRef = dyn_cast(EPs->getOperand(I)); if (EPNodeRef->getNumOperands() < 5) { ValCtx.EmitMetaError(EPNodeRef, ValidationRule::MetaWellFormed); return; } // get access to the digit that represents the metadata number that // would store entry properties - const llvm::MDOperand &mOp = + const llvm::MDOperand &MOp = EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1); // the final operand to the entry points tuple should be a tuple. - if (mOp == nullptr || (mOp.get())->getMetadataID() != Metadata::MDTupleKind) + if (MOp == nullptr || (MOp.get())->getMetadataID() != Metadata::MDTupleKind) continue; // get access to the node that stores entry properties @@ -3556,29 +3556,29 @@ static void ValidateWaveSize(ValidationContext &ValCtx, EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1)); // find any incompatible tags inside the entry properties // increment j by 2 to only analyze tags, not values - bool foundTag = false; - for (unsigned j = 0, end2 = EPropNode->getNumOperands(); j < end2; j += 2) { - const MDOperand &propertyTagOp = EPropNode->getOperand(j); + bool FoundTag = false; + for (unsigned J = 0, End2 = EPropNode->getNumOperands(); J < End2; J += 2) { + const MDOperand &PropertyTagOp = EPropNode->getOperand(J); // note, we are only looking for tags, which will be a constant // integer - DXASSERT(!(propertyTagOp == nullptr || - (propertyTagOp.get())->getMetadataID() != + DXASSERT(!(PropertyTagOp == nullptr || + (PropertyTagOp.get())->getMetadataID() != Metadata::ConstantAsMetadataKind), "tag operand should be a constant integer."); - ConstantInt *tag = mdconst::extract(propertyTagOp); - uint64_t tagValue = tag->getZExtValue(); + ConstantInt *Tag = mdconst::extract(PropertyTagOp); + uint64_t TagValue = Tag->getZExtValue(); // legacy wavesize is only supported between 6.6 and 6.7, so we // should fail if we find the ranged wave size metadata tag - if (tagValue == DxilMDHelper::kDxilRangedWaveSizeTag) { + if (TagValue == DxilMDHelper::kDxilRangedWaveSizeTag) { // if this tag is already present in the // current entry point, emit an error - if (foundTag) { + if (FoundTag) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {}); return; } - foundTag = true; + FoundTag = true; if (SM->IsSM66Plus() && !SM->IsSM68Plus()) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeRangeNeedsSM68Plus, @@ -3587,36 +3587,36 @@ static void ValidateWaveSize(ValidationContext &ValCtx, } // get the metadata that contains the // parameters to the wavesize attribute - MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(j + 1)); + MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(J + 1)); if (WaveTuple->getNumOperands() != 3) { ValCtx.EmitFormatError( ValidationRule::SmWaveSizeRangeExpectsThreeParams, {}); return; } - for (int k = 0; k < 3; k++) { - const MDOperand ¶m = WaveTuple->getOperand(k); - if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) { + for (int K = 0; K < 3; K++) { + const MDOperand &Param = WaveTuple->getOperand(K); + if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) { ValCtx.EmitFormatError( ValidationRule::SmWaveSizeNeedsConstantOperands, {}); return; } } - } else if (tagValue == DxilMDHelper::kDxilWaveSizeTag) { + } else if (TagValue == DxilMDHelper::kDxilWaveSizeTag) { // if this tag is already present in the // current entry point, emit an error - if (foundTag) { + if (FoundTag) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {}); return; } - foundTag = true; - MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(j + 1)); + FoundTag = true; + MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(J + 1)); if (WaveTuple->getNumOperands() != 1) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeExpectsOneParam, {}); return; } - const MDOperand ¶m = WaveTuple->getOperand(0); - if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) { + const MDOperand &Param = WaveTuple->getOperand(0); + if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) { ValCtx.EmitFormatError( ValidationRule::SmWaveSizeNeedsConstantOperands, {}); return; @@ -3637,9 +3637,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) { ValidateDxilVersion(ValCtx); Module *pModule = &ValCtx.M; - const std::string &target = pModule->getTargetTriple(); - if (target != "dxil-ms-dx") { - ValCtx.EmitFormatError(ValidationRule::MetaTarget, {target}); + const std::string &Target = pModule->getTargetTriple(); + if (Target != "dxil-ms-dx") { + ValCtx.EmitFormatError(ValidationRule::MetaTarget, {Target}); } // The llvm.dbg.(cu/contents/defines/mainFileName/arg) named metadata nodes @@ -3647,9 +3647,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) { // llvm.bitsets is also disallowed. // // These are verified in lib/IR/Verifier.cpp. - StringMap llvmNamedMeta; - llvmNamedMeta["llvm.ident"]; - llvmNamedMeta["llvm.module.flags"]; + StringMap LlvmNamedMeta; + LlvmNamedMeta["llvm.ident"]; + LlvmNamedMeta["llvm.module.flags"]; for (auto &NamedMetaNode : pModule->named_metadata()) { if (!DxilModule::IsKnownNamedMetaData(NamedMetaNode)) { @@ -3657,7 +3657,7 @@ static void ValidateMetadata(ValidationContext &ValCtx) { if (!name.startswith_lower("llvm.")) { ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()}); } else { - if (llvmNamedMeta.count(name) == 0) { + if (LlvmNamedMeta.count(name) == 0) { ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()}); } } @@ -3690,35 +3690,35 @@ static void ValidateMetadata(ValidationContext &ValCtx) { } static void ValidateResourceOverlap( - hlsl::DxilResourceBase &res, - SpacesAllocator &spaceAllocator, + hlsl::DxilResourceBase &Res, + SpacesAllocator &SpaceAllocator, ValidationContext &ValCtx) { - unsigned base = res.GetLowerBound(); - if (ValCtx.isLibProfile && !res.IsAllocated()) { + unsigned Base = Res.GetLowerBound(); + if (ValCtx.isLibProfile && !Res.IsAllocated()) { // Skip unallocated resource for library. return; } - unsigned size = res.GetRangeSize(); - unsigned space = res.GetSpaceID(); + unsigned Size = Res.GetRangeSize(); + unsigned Space = Res.GetSpaceID(); - auto &allocator = spaceAllocator.Get(space); - unsigned end = base + size - 1; + auto &Allocator = SpaceAllocator.Get(Space); + unsigned End = Base + Size - 1; // unbounded - if (end < base) - end = size; - const DxilResourceBase *conflictRes = allocator.Insert(&res, base, end); - if (conflictRes) { + if (End < Base) + End = Size; + const DxilResourceBase *ConflictRes = Allocator.Insert(&Res, Base, End); + if (ConflictRes) { ValCtx.EmitFormatError( ValidationRule::SmResourceRangeOverlap, - {ValCtx.GetResourceName(&res), std::to_string(base), - std::to_string(size), std::to_string(conflictRes->GetLowerBound()), - std::to_string(conflictRes->GetRangeSize()), std::to_string(space)}); + {ValCtx.GetResourceName(&Res), std::to_string(Base), + std::to_string(Size), std::to_string(ConflictRes->GetLowerBound()), + std::to_string(ConflictRes->GetRangeSize()), std::to_string(Space)}); } } -static void ValidateResource(hlsl::DxilResource &res, +static void ValidateResource(hlsl::DxilResource &Res, ValidationContext &ValCtx) { - switch (res.GetKind()) { + switch (Res.GetKind()) { case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: @@ -3730,8 +3730,8 @@ static void ValidateResource(hlsl::DxilResource &res, case DXIL::ResourceKind::Texture3D: case DXIL::ResourceKind::TextureCube: case DXIL::ResourceKind::TextureCubeArray: - if (res.GetSampleCount() > 0) { - ValCtx.EmitResourceError(&res, ValidationRule::SmSampleCountOnlyOn2DMS); + if (Res.GetSampleCount() > 0) { + ValCtx.EmitResourceError(&Res, ValidationRule::SmSampleCountOnlyOn2DMS); } break; case DXIL::ResourceKind::Texture2DMS: @@ -3742,16 +3742,16 @@ static void ValidateResource(hlsl::DxilResource &res, break; case DXIL::ResourceKind::FeedbackTexture2D: case DXIL::ResourceKind::FeedbackTexture2DArray: - if (res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry) - ValCtx.EmitResourceError(&res, + if (Res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry) + ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidSamplerFeedbackType); break; default: - ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceKind); + ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceKind); break; } - switch (res.GetCompType().GetKind()) { + switch (Res.GetCompType().GetKind()) { case DXIL::ComponentType::F32: case DXIL::ComponentType::SNormF32: case DXIL::ComponentType::UNormF32: @@ -3765,266 +3765,266 @@ static void ValidateResource(hlsl::DxilResource &res, case DXIL::ComponentType::U16: break; default: - if (!res.IsStructuredBuffer() && !res.IsRawBuffer() && - !res.IsFeedbackTexture()) - ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceCompType); + if (!Res.IsStructuredBuffer() && !Res.IsRawBuffer() && + !Res.IsFeedbackTexture()) + ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceCompType); break; } - if (res.IsStructuredBuffer()) { - unsigned stride = res.GetElementStride(); - bool alignedTo4Bytes = (stride & 3) == 0; - if (!alignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) { + if (Res.IsStructuredBuffer()) { + unsigned Stride = Res.GetElementStride(); + bool AlignedTo4Bytes = (Stride & 3) == 0; + if (!AlignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) { ValCtx.EmitResourceFormatError( - &res, ValidationRule::MetaStructBufAlignment, - {std::to_string(4), std::to_string(stride)}); + &Res, ValidationRule::MetaStructBufAlignment, + {std::to_string(4), std::to_string(Stride)}); } - if (stride > DXIL::kMaxStructBufferStride) { + if (Stride > DXIL::kMaxStructBufferStride) { ValCtx.EmitResourceFormatError( - &res, ValidationRule::MetaStructBufAlignmentOutOfBound, + &Res, ValidationRule::MetaStructBufAlignmentOutOfBound, {std::to_string(DXIL::kMaxStructBufferStride), - std::to_string(stride)}); + std::to_string(Stride)}); } } - if (res.IsAnyTexture() || res.IsTypedBuffer()) { - Type *RetTy = res.GetRetType(); - unsigned size = + if (Res.IsAnyTexture() || Res.IsTypedBuffer()) { + Type *RetTy = Res.GetRetType(); + unsigned Size = ValCtx.DxilMod.GetModule()->getDataLayout().getTypeAllocSize(RetTy); - if (size > 4 * 4) { - ValCtx.EmitResourceError(&res, ValidationRule::MetaTextureType); + if (Size > 4 * 4) { + ValCtx.EmitResourceError(&Res, ValidationRule::MetaTextureType); } } } static void CollectCBufferRanges( - DxilStructAnnotation *annotation, - SpanAllocator &constAllocator, unsigned base, - DxilTypeSystem &typeSys, StringRef cbName, ValidationContext &ValCtx) { - DXASSERT(((base + 15) & ~(0xf)) == base, + DxilStructAnnotation *Annotation, + SpanAllocator &ConstAllocator, unsigned Base, + DxilTypeSystem &TypeSys, StringRef CbName, ValidationContext &ValCtx) { + DXASSERT(((Base + 15) & ~(0xf)) == Base, "otherwise, base for struct is not aligned"); - unsigned cbSize = annotation->GetCBufferSize(); + unsigned CbSize = Annotation->GetCBufferSize(); - const StructType *ST = annotation->GetStructType(); + const StructType *ST = Annotation->GetStructType(); - for (int i = annotation->GetNumFields() - 1; i >= 0; i--) { - DxilFieldAnnotation &fieldAnnotation = annotation->GetFieldAnnotation(i); - Type *EltTy = ST->getElementType(i); + for (int I = Annotation->GetNumFields() - 1; I >= 0; I--) { + DxilFieldAnnotation &FieldAnnotation = Annotation->GetFieldAnnotation(I); + Type *EltTy = ST->getElementType(I); - unsigned offset = fieldAnnotation.GetCBufferOffset(); + unsigned Offset = FieldAnnotation.GetCBufferOffset(); unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( - fieldAnnotation, EltTy, typeSys); + FieldAnnotation, EltTy, TypeSys); - bool bOutOfBound = false; + bool IsOutOfBound = false; if (!EltTy->isAggregateType()) { - bOutOfBound = (offset + EltSize) > cbSize; - if (!bOutOfBound) { - if (constAllocator.Insert(&fieldAnnotation, base + offset, - base + offset + EltSize - 1)) { + IsOutOfBound = (Offset + EltSize) > CbSize; + if (!IsOutOfBound) { + if (ConstAllocator.Insert(&FieldAnnotation, Base + Offset, + Base + Offset + EltSize - 1)) { ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap, - {cbName, std::to_string(base + offset)}); + {CbName, std::to_string(Base + Offset)}); } } } else if (isa(EltTy)) { - if (((offset + 15) & ~(0xf)) != offset) { + if (((Offset + 15) & ~(0xf)) != Offset) { ValCtx.EmitFormatError(ValidationRule::SmCBufferArrayOffsetAlignment, - {cbName, std::to_string(offset)}); + {CbName, std::to_string(Offset)}); continue; } - unsigned arrayCount = 1; + unsigned ArrayCount = 1; while (isa(EltTy)) { - arrayCount *= EltTy->getArrayNumElements(); + ArrayCount *= EltTy->getArrayNumElements(); EltTy = EltTy->getArrayElementType(); } DxilStructAnnotation *EltAnnotation = nullptr; if (StructType *EltST = dyn_cast(EltTy)) - EltAnnotation = typeSys.GetStructAnnotation(EltST); + EltAnnotation = TypeSys.GetStructAnnotation(EltST); - unsigned alignedEltSize = ((EltSize + 15) & ~(0xf)); - unsigned arraySize = ((arrayCount - 1) * alignedEltSize) + EltSize; - bOutOfBound = (offset + arraySize) > cbSize; + unsigned AlignedEltSize = ((EltSize + 15) & ~(0xf)); + unsigned ArraySize = ((ArrayCount - 1) * AlignedEltSize) + EltSize; + IsOutOfBound = (Offset + ArraySize) > CbSize; - if (!bOutOfBound) { + if (!IsOutOfBound) { // If we didn't care about gaps where elements could be placed with user // offsets, we could: recurse once if EltAnnotation, then allocate the - // rest if arrayCount > 1 + // rest if ArrayCount > 1 - unsigned arrayBase = base + offset; + unsigned ArrayBase = Base + Offset; if (!EltAnnotation) { if (EltSize > 0 && - nullptr != constAllocator.Insert(&fieldAnnotation, arrayBase, - arrayBase + arraySize - 1)) { + nullptr != ConstAllocator.Insert(&FieldAnnotation, ArrayBase, + ArrayBase + ArraySize - 1)) { ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap, - {cbName, std::to_string(arrayBase)}); + {CbName, std::to_string(ArrayBase)}); } } else { - for (unsigned idx = 0; idx < arrayCount; idx++) { - CollectCBufferRanges(EltAnnotation, constAllocator, arrayBase, - typeSys, cbName, ValCtx); - arrayBase += alignedEltSize; + for (unsigned Idx = 0; Idx < ArrayCount; Idx++) { + CollectCBufferRanges(EltAnnotation, ConstAllocator, ArrayBase, + TypeSys, CbName, ValCtx); + ArrayBase += AlignedEltSize; } } } } else { StructType *EltST = cast(EltTy); - unsigned structBase = base + offset; - bOutOfBound = (offset + EltSize) > cbSize; - if (!bOutOfBound) { + unsigned StructBase = Base + Offset; + IsOutOfBound = (Offset + EltSize) > CbSize; + if (!IsOutOfBound) { if (DxilStructAnnotation *EltAnnotation = - typeSys.GetStructAnnotation(EltST)) { - CollectCBufferRanges(EltAnnotation, constAllocator, structBase, - typeSys, cbName, ValCtx); + TypeSys.GetStructAnnotation(EltST)) { + CollectCBufferRanges(EltAnnotation, ConstAllocator, StructBase, + TypeSys, CbName, ValCtx); } else { if (EltSize > 0 && - nullptr != constAllocator.Insert(&fieldAnnotation, structBase, - structBase + EltSize - 1)) { + nullptr != ConstAllocator.Insert(&FieldAnnotation, StructBase, + StructBase + EltSize - 1)) { ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap, - {cbName, std::to_string(structBase)}); + {CbName, std::to_string(StructBase)}); } } } } - if (bOutOfBound) { + if (IsOutOfBound) { ValCtx.EmitFormatError(ValidationRule::SmCBufferElementOverflow, - {cbName, std::to_string(base + offset)}); + {CbName, std::to_string(Base + Offset)}); } } } -static void ValidateCBuffer(DxilCBuffer &cb, ValidationContext &ValCtx) { - Type *Ty = cb.GetHLSLType()->getPointerElementType(); - if (cb.GetRangeSize() != 1 || Ty->isArrayTy()) { +static void ValidateCBuffer(DxilCBuffer &Cb, ValidationContext &ValCtx) { + Type *Ty = Cb.GetHLSLType()->getPointerElementType(); + if (Cb.GetRangeSize() != 1 || Ty->isArrayTy()) { Ty = Ty->getArrayElementType(); } if (!isa(Ty)) { - ValCtx.EmitResourceError(&cb, + ValCtx.EmitResourceError(&Cb, ValidationRule::SmCBufferTemplateTypeMustBeStruct); return; } - if (cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) { - ValCtx.EmitResourceFormatError(&cb, ValidationRule::SmCBufferSize, - {std::to_string(cb.GetSize())}); + if (Cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) { + ValCtx.EmitResourceFormatError(&Cb, ValidationRule::SmCBufferSize, + {std::to_string(Cb.GetSize())}); return; } StructType *ST = cast(Ty); - DxilTypeSystem &typeSys = ValCtx.DxilMod.GetTypeSystem(); - DxilStructAnnotation *annotation = typeSys.GetStructAnnotation(ST); - if (!annotation) + DxilTypeSystem &TypeSys = ValCtx.DxilMod.GetTypeSystem(); + DxilStructAnnotation *Annotation = TypeSys.GetStructAnnotation(ST); + if (!Annotation) return; // Collect constant ranges. - std::vector> constRanges; - SpanAllocator constAllocator( + std::vector> ConstRanges; + SpanAllocator ConstAllocator( 0, // 4096 * 16 bytes. DXIL::kMaxCBufferSize << 4); - CollectCBufferRanges(annotation, constAllocator, 0, typeSys, - ValCtx.GetResourceName(&cb), ValCtx); + CollectCBufferRanges(Annotation, ConstAllocator, 0, TypeSys, + ValCtx.GetResourceName(&Cb), ValCtx); } static void ValidateResources(ValidationContext &ValCtx) { - const vector> &uavs = ValCtx.DxilMod.GetUAVs(); - SpacesAllocator uavAllocator; + const vector> &Uavs = ValCtx.DxilMod.GetUAVs(); + SpacesAllocator UavAllocator; - for (auto &uav : uavs) { - if (uav->IsROV()) { + for (auto &Uav : Uavs) { + if (Uav->IsROV()) { if (!ValCtx.DxilMod.GetShaderModel()->IsPS() && !ValCtx.isLibProfile) { - ValCtx.EmitResourceError(uav.get(), ValidationRule::SmROVOnlyInPS); + ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmROVOnlyInPS); } } - switch (uav->GetKind()) { + switch (Uav->GetKind()) { case DXIL::ResourceKind::TextureCube: case DXIL::ResourceKind::TextureCubeArray: - ValCtx.EmitResourceError(uav.get(), + ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmInvalidTextureKindOnUAV); break; default: break; } - if (uav->HasCounter() && !uav->IsStructuredBuffer()) { - ValCtx.EmitResourceError(uav.get(), + if (Uav->HasCounter() && !Uav->IsStructuredBuffer()) { + ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmCounterOnlyOnStructBuf); } - if (uav->HasCounter() && uav->IsGloballyCoherent()) - ValCtx.EmitResourceFormatError(uav.get(), + if (Uav->HasCounter() && Uav->IsGloballyCoherent()) + ValCtx.EmitResourceFormatError(Uav.get(), ValidationRule::MetaGlcNotOnAppendConsume, - {ValCtx.GetResourceName(uav.get())}); + {ValCtx.GetResourceName(Uav.get())}); - ValidateResource(*uav, ValCtx); - ValidateResourceOverlap(*uav, uavAllocator, ValCtx); + ValidateResource(*Uav, ValCtx); + ValidateResourceOverlap(*Uav, UavAllocator, ValCtx); } - SpacesAllocator srvAllocator; - const vector> &srvs = ValCtx.DxilMod.GetSRVs(); - for (auto &srv : srvs) { + SpacesAllocator SrvAllocator; + const vector> &Srvs = ValCtx.DxilMod.GetSRVs(); + for (auto &srv : Srvs) { ValidateResource(*srv, ValCtx); - ValidateResourceOverlap(*srv, srvAllocator, ValCtx); + ValidateResourceOverlap(*srv, SrvAllocator, ValCtx); } - hlsl::DxilResourceBase *pNonDense; - if (!AreDxilResourcesDense(&ValCtx.M, &pNonDense)) { - ValCtx.EmitResourceError(pNonDense, ValidationRule::MetaDenseResIDs); + hlsl::DxilResourceBase *NonDenseRes; + if (!AreDxilResourcesDense(&ValCtx.M, &NonDenseRes)) { + ValCtx.EmitResourceError(NonDenseRes, ValidationRule::MetaDenseResIDs); } - SpacesAllocator samplerAllocator; + SpacesAllocator SamplerAllocator; for (auto &sampler : ValCtx.DxilMod.GetSamplers()) { if (sampler->GetSamplerKind() == DXIL::SamplerKind::Invalid) { ValCtx.EmitResourceError(sampler.get(), ValidationRule::MetaValidSamplerMode); } - ValidateResourceOverlap(*sampler, samplerAllocator, ValCtx); + ValidateResourceOverlap(*sampler, SamplerAllocator, ValCtx); } - SpacesAllocator cbufferAllocator; + SpacesAllocator CbufferAllocator; for (auto &cbuffer : ValCtx.DxilMod.GetCBuffers()) { ValidateCBuffer(*cbuffer, ValCtx); - ValidateResourceOverlap(*cbuffer, cbufferAllocator, ValCtx); + ValidateResourceOverlap(*cbuffer, CbufferAllocator, ValCtx); } } static void ValidateShaderFlags(ValidationContext &ValCtx) { - ShaderFlags calcFlags; - ValCtx.DxilMod.CollectShaderFlagsForModule(calcFlags); + ShaderFlags CalcFlags; + ValCtx.DxilMod.CollectShaderFlagsForModule(CalcFlags); // Special case for validator version prior to 1.8. // If DXR 1.1 flag is set, but our computed flags do not have this set, then // this is due to prior versions setting the flag based on DXR 1.1 subobjects, // which are gone by this point. Set the flag and the rest should match. - unsigned valMajor, valMinor; - ValCtx.DxilMod.GetValidatorVersion(valMajor, valMinor); - if (DXIL::CompareVersions(valMajor, valMinor, 1, 5) >= 0 && - DXIL::CompareVersions(valMajor, valMinor, 1, 8) < 0 && + unsigned ValMajor, ValMinor; + ValCtx.DxilMod.GetValidatorVersion(ValMajor, ValMinor); + if (DXIL::CompareVersions(ValMajor, ValMinor, 1, 5) >= 0 && + DXIL::CompareVersions(ValMajor, ValMinor, 1, 8) < 0 && ValCtx.DxilMod.m_ShaderFlags.GetRaytracingTier1_1() && - !calcFlags.GetRaytracingTier1_1()) { - calcFlags.SetRaytracingTier1_1(true); + !CalcFlags.GetRaytracingTier1_1()) { + CalcFlags.SetRaytracingTier1_1(true); } - const uint64_t mask = ShaderFlags::GetShaderFlagsRawForCollection(); - uint64_t declaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw(); - uint64_t calcFlagsRaw = calcFlags.GetShaderFlagsRaw(); + const uint64_t Mask = ShaderFlags::GetShaderFlagsRawForCollection(); + uint64_t DeclaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw(); + uint64_t CalcFlagsRaw = CalcFlags.GetShaderFlagsRaw(); - declaredFlagsRaw &= mask; - calcFlagsRaw &= mask; + DeclaredFlagsRaw &= Mask; + CalcFlagsRaw &= Mask; - if (declaredFlagsRaw == calcFlagsRaw) { + if (DeclaredFlagsRaw == CalcFlagsRaw) { return; } ValCtx.EmitError(ValidationRule::MetaFlagsUsage); dxilutil::EmitNoteOnContext(ValCtx.M.getContext(), Twine("Flags declared=") + - Twine(declaredFlagsRaw) + Twine(", actual=") + - Twine(calcFlagsRaw)); + Twine(DeclaredFlagsRaw) + Twine(", actual=") + + Twine(CalcFlagsRaw)); } static void ValidateSignatureElement(DxilSignatureElement &SE, ValidationContext &ValCtx) { - DXIL::SemanticKind semanticKind = SE.GetSemantic()->GetKind(); - CompType::Kind compKind = SE.GetCompType().GetKind(); + DXIL::SemanticKind SemanticKind = SE.GetSemantic()->GetKind(); + CompType::Kind CompKind = SE.GetCompType().GetKind(); DXIL::InterpolationMode Mode = SE.GetInterpolationMode()->GetKind(); StringRef Name = SE.GetName(); @@ -4032,86 +4032,86 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSemanticLen); } - if (semanticKind > DXIL::SemanticKind::Arbitrary && - semanticKind < DXIL::SemanticKind::Invalid) { - if (semanticKind != Semantic::GetByName(SE.GetName())->GetKind()) { + if (SemanticKind > DXIL::SemanticKind::Arbitrary && + SemanticKind < DXIL::SemanticKind::Invalid) { + if (SemanticKind != Semantic::GetByName(SE.GetName())->GetKind()) { ValCtx.EmitFormatError(ValidationRule::MetaSemaKindMatchesName, {SE.GetName(), SE.GetSemantic()->GetName()}); } } - unsigned compWidth = 0; - bool compFloat = false; - bool compInt = false; - bool compBool = false; + unsigned CompWidth = 0; + bool CompFloat = false; + bool CompInt = false; + bool CompBool = false; - switch (compKind) { + switch (CompKind) { case CompType::Kind::U64: - compWidth = 64; - compInt = true; + CompWidth = 64; + CompInt = true; break; case CompType::Kind::I64: - compWidth = 64; - compInt = true; + CompWidth = 64; + CompInt = true; break; // These should be translated for signatures: // case CompType::Kind::PackedS8x32: // case CompType::Kind::PackedU8x32: case CompType::Kind::U32: - compWidth = 32; - compInt = true; + CompWidth = 32; + CompInt = true; break; case CompType::Kind::I32: - compWidth = 32; - compInt = true; + CompWidth = 32; + CompInt = true; break; case CompType::Kind::U16: - compWidth = 16; - compInt = true; + CompWidth = 16; + CompInt = true; break; case CompType::Kind::I16: - compWidth = 16; - compInt = true; + CompWidth = 16; + CompInt = true; break; case CompType::Kind::I1: - compWidth = 1; - compBool = true; + CompWidth = 1; + CompBool = true; break; case CompType::Kind::F64: - compWidth = 64; - compFloat = true; + CompWidth = 64; + CompFloat = true; break; case CompType::Kind::F32: - compWidth = 32; - compFloat = true; + CompWidth = 32; + CompFloat = true; break; case CompType::Kind::F16: - compWidth = 16; - compFloat = true; + CompWidth = 16; + CompFloat = true; break; case CompType::Kind::SNormF64: - compWidth = 64; - compFloat = true; + CompWidth = 64; + CompFloat = true; break; case CompType::Kind::SNormF32: - compWidth = 32; - compFloat = true; + CompWidth = 32; + CompFloat = true; break; case CompType::Kind::SNormF16: - compWidth = 16; - compFloat = true; + CompWidth = 16; + CompFloat = true; break; case CompType::Kind::UNormF64: - compWidth = 64; - compFloat = true; + CompWidth = 64; + CompFloat = true; break; case CompType::Kind::UNormF32: - compWidth = 32; - compFloat = true; + CompWidth = 32; + CompFloat = true; break; case CompType::Kind::UNormF16: - compWidth = 16; - compFloat = true; + CompWidth = 16; + CompFloat = true; break; case CompType::Kind::Invalid: default: @@ -4120,7 +4120,7 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, break; } - if (compInt || compBool) { + if (CompInt || CompBool) { switch (Mode) { case DXIL::InterpolationMode::Linear: case DXIL::InterpolationMode::LinearCentroid: @@ -4137,91 +4137,91 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, } // Elements that should not appear in the Dxil signature: - bool bAllowedInSig = true; - bool bShouldBeAllocated = true; + bool AllowedInSig = true; + bool ShouldBeAllocated = true; switch (SE.GetInterpretation()) { case DXIL::SemanticInterpretationKind::NA: case DXIL::SemanticInterpretationKind::NotInSig: case DXIL::SemanticInterpretationKind::Invalid: - bAllowedInSig = false; + AllowedInSig = false; LLVM_FALLTHROUGH; case DXIL::SemanticInterpretationKind::NotPacked: case DXIL::SemanticInterpretationKind::Shadow: - bShouldBeAllocated = false; + ShouldBeAllocated = false; break; default: break; } - const char *inputOutput = nullptr; + const char *InputOutput = nullptr; if (SE.IsInput()) - inputOutput = "Input"; + InputOutput = "Input"; else if (SE.IsOutput()) - inputOutput = "Output"; + InputOutput = "Output"; else - inputOutput = "PatchConstant"; + InputOutput = "PatchConstant"; - if (!bAllowedInSig) { + if (!AllowedInSig) { ValCtx.EmitFormatError(ValidationRule::SmSemantic, {SE.GetName(), ValCtx.DxilMod.GetShaderModel()->GetKindName(), - inputOutput}); - } else if (bShouldBeAllocated && !SE.IsAllocated()) { + InputOutput}); + } else if (ShouldBeAllocated && !SE.IsAllocated()) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldBeAllocated, - {inputOutput, SE.GetName()}); - } else if (!bShouldBeAllocated && SE.IsAllocated()) { + {InputOutput, SE.GetName()}); + } else if (!ShouldBeAllocated && SE.IsAllocated()) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldNotBeAllocated, - {inputOutput, SE.GetName()}); + {InputOutput, SE.GetName()}); } - bool bIsClipCull = false; - bool bIsTessfactor = false; - bool bIsBarycentric = false; + bool IsClipCull = false; + bool IsTessfactor = false; + bool IsBarycentric = false; - switch (semanticKind) { + switch (SemanticKind) { case DXIL::SemanticKind::Depth: case DXIL::SemanticKind::DepthGreaterEqual: case DXIL::SemanticKind::DepthLessEqual: - if (!compFloat || compWidth > 32 || SE.GetCols() != 1) { + if (!CompFloat || CompWidth > 32 || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } break; case DXIL::SemanticKind::Coverage: - DXASSERT(!SE.IsInput() || !bAllowedInSig, + DXASSERT(!SE.IsInput() || !AllowedInSig, "else internal inconsistency between semantic interpretation " "table and validation code"); LLVM_FALLTHROUGH; case DXIL::SemanticKind::InnerCoverage: case DXIL::SemanticKind::OutputControlPointID: - if (compKind != CompType::Kind::U32 || SE.GetCols() != 1) { + if (CompKind != CompType::Kind::U32 || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "uint"}); } break; case DXIL::SemanticKind::Position: - if (!compFloat || compWidth > 32 || SE.GetCols() != 4) { + if (!CompFloat || CompWidth > 32 || SE.GetCols() != 4) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float4"}); } break; case DXIL::SemanticKind::Target: - if (compWidth > 32) { + if (CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float/int/uint"}); } break; case DXIL::SemanticKind::ClipDistance: case DXIL::SemanticKind::CullDistance: - bIsClipCull = true; - if (!compFloat || compWidth > 32) { + IsClipCull = true; + if (!CompFloat || CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } // NOTE: clip cull distance size is checked at ValidateSignature. break; case DXIL::SemanticKind::IsFrontFace: { - if (!(compInt && compWidth == 32) || SE.GetCols() != 1) { + if (!(CompInt && CompWidth == 32) || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "uint"}); } @@ -4235,14 +4235,14 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, case DXIL::SemanticKind::SampleIndex: case DXIL::SemanticKind::StencilRef: case DXIL::SemanticKind::ShadingRate: - if ((compKind != CompType::Kind::U32 && compKind != CompType::Kind::U16) || + if ((CompKind != CompType::Kind::U32 && CompKind != CompType::Kind::U16) || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "uint"}); } break; case DXIL::SemanticKind::CullPrimitive: { - if (!(compBool && compWidth == 1) || SE.GetCols() != 1) { + if (!(CompBool && CompWidth == 1) || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "bool"}); } @@ -4250,8 +4250,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, case DXIL::SemanticKind::TessFactor: case DXIL::SemanticKind::InsideTessFactor: // NOTE: the size check is at CheckPatchConstantSemantic. - bIsTessfactor = true; - if (!compFloat || compWidth > 32) { + IsTessfactor = true; + if (!CompFloat || CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } @@ -4260,12 +4260,12 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, break; case DXIL::SemanticKind::DomainLocation: case DXIL::SemanticKind::Invalid: - DXASSERT(!bAllowedInSig, "else internal inconsistency between semantic " - "interpretation table and validation code"); + DXASSERT(!AllowedInSig, "else internal inconsistency between semantic " + "interpretation table and validation code"); break; case DXIL::SemanticKind::Barycentrics: - bIsBarycentric = true; - if (!compFloat || compWidth > 32) { + IsBarycentric = true; + if (!CompFloat || CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } @@ -4310,32 +4310,32 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, } } - if (semanticKind == DXIL::SemanticKind::Target) { - // Verify packed row == semantic index - unsigned row = SE.GetStartRow(); + if (SemanticKind == DXIL::SemanticKind::Target) { + // Verify packed Row == semantic index + unsigned Row = SE.GetStartRow(); for (unsigned i : SE.GetSemanticIndexVec()) { - if (row != i) { + if (Row != i) { ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetIndexMatchesRow); } - ++row; + ++Row; } - // Verify packed col is 0 + // Verify packed Col is 0 if (SE.GetStartCol() != 0) { ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetCol0); } - // Verify max row used < 8 + // Verify max Row used < 8 if (SE.GetStartRow() + SE.GetRows() > 8) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {"SV_Target", "7"}); } - } else if (bAllowedInSig && semanticKind != DXIL::SemanticKind::Arbitrary) { - if (bIsBarycentric) { + } else if (AllowedInSig && SemanticKind != DXIL::SemanticKind::Arbitrary) { + if (IsBarycentric) { if (SE.GetSemanticStartIndex() > 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {SE.GetSemantic()->GetName(), "1"}); } - } else if (!bIsClipCull && SE.GetSemanticStartIndex() > 0) { + } else if (!IsClipCull && SE.GetSemanticStartIndex() > 0) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {SE.GetSemantic()->GetName(), "0"}); } @@ -4343,17 +4343,17 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, // with the exception of tessfactors, which are validated in // CheckPatchConstantSemantic and ClipDistance/CullDistance, which have // other custom constraints. - if (!bIsTessfactor && !bIsClipCull && SE.GetRows() > 1) { + if (!IsTessfactor && !IsClipCull && SE.GetRows() > 1) { ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSystemValueRows); } } if (SE.GetCols() + (SE.IsAllocated() ? SE.GetStartCol() : 0) > 4) { - unsigned size = (SE.GetRows() - 1) * 4 + SE.GetCols(); + unsigned Size = (SE.GetRows() - 1) * 4 + SE.GetCols(); ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange, {SE.GetName(), std::to_string(SE.GetStartRow()), std::to_string(SE.GetStartCol()), - std::to_string(size)}); + std::to_string(Size)}); } if (!SE.GetInterpolationMode()->IsValid()) { @@ -4362,8 +4362,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, } static void ValidateSignatureOverlap(DxilSignatureElement &E, - unsigned maxScalars, - DxilSignatureAllocator &allocator, + unsigned MaxScalars, + DxilSignatureAllocator &Allocator, ValidationContext &ValCtx) { // Skip entries that are not or should not be allocated. Validation occurs in @@ -4381,16 +4381,16 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E, break; } - DxilPackElement PE(&E, allocator.UseMinPrecision()); - DxilSignatureAllocator::ConflictType conflict = - allocator.DetectRowConflict(&PE, E.GetStartRow()); - if (conflict == DxilSignatureAllocator::kNoConflict || - conflict == DxilSignatureAllocator::kInsufficientFreeComponents) - conflict = - allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol()); - switch (conflict) { + DxilPackElement PE(&E, Allocator.UseMinPrecision()); + DxilSignatureAllocator::ConflictType Conflict = + Allocator.DetectRowConflict(&PE, E.GetStartRow()); + if (Conflict == DxilSignatureAllocator::kNoConflict || + Conflict == DxilSignatureAllocator::kInsufficientFreeComponents) + Conflict = + Allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol()); + switch (Conflict) { case DxilSignatureAllocator::kNoConflict: - allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol()); + Allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol()); break; case DxilSignatureAllocator::kConflictsWithIndexed: ValCtx.EmitFormatError(ValidationRule::MetaSignatureIndexConflict, @@ -4452,59 +4452,59 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E, } static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S, - EntryStatus &Status, unsigned maxScalars) { - DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = { + EntryStatus &Status, unsigned MaxScalars) { + DxilSignatureAllocator Allocator[DXIL::kNumOutputStreams] = { {32, ValCtx.DxilMod.GetUseMinPrecision()}, {32, ValCtx.DxilMod.GetUseMinPrecision()}, {32, ValCtx.DxilMod.GetUseMinPrecision()}, {32, ValCtx.DxilMod.GetUseMinPrecision()}}; - unordered_set semanticUsageSet[DXIL::kNumOutputStreams]; - StringMap> semanticIndexMap[DXIL::kNumOutputStreams]; - unordered_set clipcullRowSet[DXIL::kNumOutputStreams]; - unsigned clipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0}; + unordered_set SemanticUsageSet[DXIL::kNumOutputStreams]; + StringMap> SemanticIndexMap[DXIL::kNumOutputStreams]; + unordered_set ClipcullRowSet[DXIL::kNumOutputStreams]; + unsigned ClipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0}; - bool isOutput = S.IsOutput(); + bool IsOutput = S.IsOutput(); unsigned TargetMask = 0; DXIL::SemanticKind DepthKind = DXIL::SemanticKind::Invalid; - const InterpolationMode *prevBaryInterpMode = nullptr; - unsigned numBarycentrics = 0; + const InterpolationMode *PrevBaryInterpMode = nullptr; + unsigned NumBarycentrics = 0; for (auto &E : S.GetElements()) { - DXIL::SemanticKind semanticKind = E->GetSemantic()->GetKind(); + DXIL::SemanticKind SemanticKind = E->GetSemantic()->GetKind(); ValidateSignatureElement(*E, ValCtx); - // Avoid OOB indexing on streamId. - unsigned streamId = E->GetOutputStream(); - if (streamId >= DXIL::kNumOutputStreams || !isOutput || + // Avoid OOB indexing on StreamId. + unsigned StreamId = E->GetOutputStream(); + if (StreamId >= DXIL::kNumOutputStreams || !IsOutput || !ValCtx.DxilMod.GetShaderModel()->IsGS()) { - streamId = 0; + StreamId = 0; } // Semantic index overlap check, keyed by name. - std::string nameUpper(E->GetName()); - std::transform(nameUpper.begin(), nameUpper.end(), nameUpper.begin(), + std::string NameUpper(E->GetName()); + std::transform(NameUpper.begin(), NameUpper.end(), NameUpper.begin(), ::toupper); - unordered_set &semIdxSet = semanticIndexMap[streamId][nameUpper]; - for (unsigned semIdx : E->GetSemanticIndexVec()) { - if (semIdxSet.count(semIdx) > 0) { + unordered_set &SemIdxSet = SemanticIndexMap[StreamId][NameUpper]; + for (unsigned SemIdx : E->GetSemanticIndexVec()) { + if (SemIdxSet.count(SemIdx) > 0) { ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap, - {E->GetName(), std::to_string(semIdx)}); + {E->GetName(), std::to_string(SemIdx)}); return; } else - semIdxSet.insert(semIdx); + SemIdxSet.insert(SemIdx); } // SV_Target has special rules - if (semanticKind == DXIL::SemanticKind::Target) { + if (SemanticKind == DXIL::SemanticKind::Target) { // Validate target overlap if (E->GetStartRow() + E->GetRows() <= 8) { - unsigned mask = ((1 << E->GetRows()) - 1) << E->GetStartRow(); - if (TargetMask & mask) { + unsigned Mask = ((1 << E->GetRows()) - 1) << E->GetStartRow(); + if (TargetMask & Mask) { ValCtx.EmitFormatError( ValidationRule::MetaNoSemanticOverlap, {"SV_Target", std::to_string(E->GetStartRow())}); } - TargetMask = TargetMask | mask; + TargetMask = TargetMask | Mask; } if (E->GetRows() > 1) { ValCtx.EmitSignatureError(E.get(), ValidationRule::SmNoPSOutputIdx); @@ -4516,19 +4516,19 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S, continue; // validate system value semantic rules - switch (semanticKind) { + switch (SemanticKind) { case DXIL::SemanticKind::Arbitrary: break; case DXIL::SemanticKind::ClipDistance: case DXIL::SemanticKind::CullDistance: // Validate max 8 components across 2 rows (registers) - for (unsigned rowIdx = 0; rowIdx < E->GetRows(); rowIdx++) - clipcullRowSet[streamId].insert(E->GetStartRow() + rowIdx); - if (clipcullRowSet[streamId].size() > 2) { + for (unsigned RowIdx = 0; RowIdx < E->GetRows(); RowIdx++) + ClipcullRowSet[StreamId].insert(E->GetStartRow() + RowIdx); + if (ClipcullRowSet[StreamId].size() > 2) { ValCtx.EmitSignatureError(E.get(), ValidationRule::MetaClipCullMaxRows); } - clipcullComponents[streamId] += E->GetCols(); - if (clipcullComponents[streamId] > 8) { + ClipcullComponents[StreamId] += E->GetCols(); + if (ClipcullComponents[StreamId] > 8) { ValCtx.EmitSignatureError(E.get(), ValidationRule::MetaClipCullMaxComponents); } @@ -4540,58 +4540,58 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S, ValCtx.EmitSignatureError(E.get(), ValidationRule::SmPSMultipleDepthSemantic); } - DepthKind = semanticKind; + DepthKind = SemanticKind; break; case DXIL::SemanticKind::Barycentrics: { // There can only be up to two SV_Barycentrics // with differeent perspective interpolation modes. - if (numBarycentrics++ > 1) { + if (NumBarycentrics++ > 1) { ValCtx.EmitSignatureError( E.get(), ValidationRule::MetaBarycentricsTwoPerspectives); break; } - const InterpolationMode *mode = E->GetInterpolationMode(); - if (prevBaryInterpMode) { - if ((mode->IsAnyNoPerspective() && - prevBaryInterpMode->IsAnyNoPerspective()) || - (!mode->IsAnyNoPerspective() && - !prevBaryInterpMode->IsAnyNoPerspective())) { + const InterpolationMode *Mode = E->GetInterpolationMode(); + if (PrevBaryInterpMode) { + if ((Mode->IsAnyNoPerspective() && + PrevBaryInterpMode->IsAnyNoPerspective()) || + (!Mode->IsAnyNoPerspective() && + !PrevBaryInterpMode->IsAnyNoPerspective())) { ValCtx.EmitSignatureError( E.get(), ValidationRule::MetaBarycentricsTwoPerspectives); } } - prevBaryInterpMode = mode; + PrevBaryInterpMode = Mode; break; } default: - if (semanticUsageSet[streamId].count( - static_cast(semanticKind)) > 0) { + if (SemanticUsageSet[StreamId].count( + static_cast(SemanticKind)) > 0) { ValCtx.EmitFormatError(ValidationRule::MetaDuplicateSysValue, {E->GetSemantic()->GetName()}); } - semanticUsageSet[streamId].insert(static_cast(semanticKind)); + SemanticUsageSet[StreamId].insert(static_cast(SemanticKind)); break; } // Packed element overlap check. - ValidateSignatureOverlap(*E.get(), maxScalars, allocator[streamId], ValCtx); + ValidateSignatureOverlap(*E.get(), MaxScalars, Allocator[StreamId], ValCtx); - if (isOutput && semanticKind == DXIL::SemanticKind::Position) { + if (IsOutput && SemanticKind == DXIL::SemanticKind::Position) { Status.hasOutputPosition[E->GetOutputStream()] = true; } } if (Status.hasViewID && S.IsInput() && ValCtx.DxilMod.GetShaderModel()->GetKind() == DXIL::ShaderKind::Pixel) { - // Ensure sufficient space for ViewID: - DxilSignatureAllocator::DummyElement viewID; - viewID.rows = 1; - viewID.cols = 1; - viewID.kind = DXIL::SemanticKind::Arbitrary; - viewID.interpolation = DXIL::InterpolationMode::Constant; - viewID.interpretation = DXIL::SemanticInterpretationKind::SGV; - allocator[0].PackNext(&viewID, 0, 32); - if (!viewID.IsAllocated()) { + // Ensure sufficient space for ViewId: + DxilSignatureAllocator::DummyElement ViewId; + ViewId.rows = 1; + ViewId.cols = 1; + ViewId.kind = DXIL::SemanticKind::Arbitrary; + ViewId.interpolation = DXIL::InterpolationMode::Constant; + ViewId.interpretation = DXIL::SemanticInterpretationKind::SGV; + Allocator[0].PackNext(&ViewId, 0, 32); + if (!ViewId.IsAllocated()) { ValCtx.EmitError(ValidationRule::SmViewIDNeedsSlot); } } @@ -4616,12 +4616,12 @@ static void ValidateConstantInterpModeSignature(ValidationContext &ValCtx, } static void ValidateEntrySignatures(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, + const DxilEntryProps &EntryProps, EntryStatus &Status, Function &F) { - const DxilFunctionProps &props = entryProps.props; - const DxilEntrySignature &S = entryProps.sig; + const DxilFunctionProps &Props = EntryProps.props; + const DxilEntrySignature &S = EntryProps.sig; - if (props.IsRay()) { + if (Props.IsRay()) { // No signatures allowed if (!S.InputSignature.GetElements().empty() || !S.OutputSignature.GetElements().empty() || @@ -4631,62 +4631,62 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } // Validate payload/attribute/params sizes - unsigned payloadSize = 0; - unsigned attrSize = 0; - auto itPayload = F.arg_begin(); - auto itAttr = itPayload; - if (itAttr != F.arg_end()) - itAttr++; + unsigned PayloadSize = 0; + unsigned AttrSize = 0; + auto ItPayload = F.arg_begin(); + auto ItAttr = ItPayload; + if (ItAttr != F.arg_end()) + ItAttr++; DataLayout DL(F.getParent()); - switch (props.shaderKind) { + switch (Props.shaderKind) { case DXIL::ShaderKind::AnyHit: case DXIL::ShaderKind::ClosestHit: - if (itAttr != F.arg_end()) { - Type *Ty = itAttr->getType(); + if (ItAttr != F.arg_end()) { + Type *Ty = ItAttr->getType(); if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); - attrSize = + AttrSize = (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX); } LLVM_FALLTHROUGH; case DXIL::ShaderKind::Miss: case DXIL::ShaderKind::Callable: - if (itPayload != F.arg_end()) { - Type *Ty = itPayload->getType(); + if (ItPayload != F.arg_end()) { + Type *Ty = ItPayload->getType(); if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); - payloadSize = + PayloadSize = (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX); } break; } - if (props.ShaderProps.Ray.payloadSizeInBytes < payloadSize) { + if (Props.ShaderProps.Ray.payloadSizeInBytes < PayloadSize) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmRayShaderPayloadSize, - {F.getName(), props.IsCallable() ? "params" : "payload"}); + {F.getName(), Props.IsCallable() ? "params" : "payload"}); } - if (props.ShaderProps.Ray.attributeSizeInBytes < attrSize) { + if (Props.ShaderProps.Ray.attributeSizeInBytes < AttrSize) { ValCtx.EmitFnFormatError(&F, ValidationRule::SmRayShaderPayloadSize, {F.getName(), "attribute"}); } return; } - bool isPS = props.IsPS(); - bool isVS = props.IsVS(); - bool isGS = props.IsGS(); - bool isCS = props.IsCS(); - bool isMS = props.IsMS(); + bool IsPs = Props.IsPS(); + bool IsVs = Props.IsVS(); + bool IsGs = Props.IsGS(); + bool IsCs = Props.IsCS(); + bool IsMs = Props.IsMS(); - if (isPS) { + if (IsPs) { // PS output no interp mode. ValidateNoInterpModeSignature(ValCtx, S.OutputSignature); - } else if (isVS) { + } else if (IsVs) { // VS input no interp mode. ValidateNoInterpModeSignature(ValCtx, S.InputSignature); } - if (isMS) { + if (IsMs) { // primitive output constant interp mode. ValidateConstantInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature); } else { @@ -4694,38 +4694,38 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, ValidateNoInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature); } - unsigned maxInputScalars = DXIL::kMaxInputTotalScalars; - unsigned maxOutputScalars = 0; - unsigned maxPatchConstantScalars = 0; + unsigned MaxInputScalars = DXIL::kMaxInputTotalScalars; + unsigned MaxOutputScalars = 0; + unsigned MaxPatchConstantScalars = 0; - switch (props.shaderKind) { + switch (Props.shaderKind) { case DXIL::ShaderKind::Compute: break; case DXIL::ShaderKind::Vertex: case DXIL::ShaderKind::Geometry: case DXIL::ShaderKind::Pixel: - maxOutputScalars = DXIL::kMaxOutputTotalScalars; + MaxOutputScalars = DXIL::kMaxOutputTotalScalars; break; case DXIL::ShaderKind::Hull: case DXIL::ShaderKind::Domain: - maxOutputScalars = DXIL::kMaxOutputTotalScalars; - maxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars; + MaxOutputScalars = DXIL::kMaxOutputTotalScalars; + MaxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars; break; case DXIL::ShaderKind::Mesh: - maxOutputScalars = DXIL::kMaxOutputTotalScalars; - maxPatchConstantScalars = DXIL::kMaxOutputTotalScalars; + MaxOutputScalars = DXIL::kMaxOutputTotalScalars; + MaxPatchConstantScalars = DXIL::kMaxOutputTotalScalars; break; case DXIL::ShaderKind::Amplification: default: break; } - ValidateSignature(ValCtx, S.InputSignature, Status, maxInputScalars); - ValidateSignature(ValCtx, S.OutputSignature, Status, maxOutputScalars); + ValidateSignature(ValCtx, S.InputSignature, Status, MaxInputScalars); + ValidateSignature(ValCtx, S.OutputSignature, Status, MaxOutputScalars); ValidateSignature(ValCtx, S.PatchConstOrPrimSignature, Status, - maxPatchConstantScalars); + MaxPatchConstantScalars); - if (isPS) { + if (IsPs) { // Gather execution information. hlsl::PSExecutionInfo PSExec; DxilSignatureElement *PosInterpSE = nullptr; @@ -4767,10 +4767,10 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } // Validate PS output semantic. - const DxilSignature &outputSig = S.OutputSignature; - for (auto &SE : outputSig.GetElements()) { - Semantic::Kind semanticKind = SE->GetSemantic()->GetKind(); - switch (semanticKind) { + const DxilSignature &OutputSig = S.OutputSignature; + for (auto &SE : OutputSig.GetElements()) { + Semantic::Kind SemanticKind = SE->GetSemantic()->GetKind(); + switch (SemanticKind) { case Semantic::Kind::Target: case Semantic::Kind::Coverage: case Semantic::Kind::Depth: @@ -4786,24 +4786,24 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } } - if (isGS) { - unsigned maxVertexCount = props.ShaderProps.GS.maxVertexCount; - unsigned outputScalarCount = 0; - const DxilSignature &outSig = S.OutputSignature; - for (auto &SE : outSig.GetElements()) { - outputScalarCount += SE->GetRows() * SE->GetCols(); + if (IsGs) { + unsigned MaxVertexCount = Props.ShaderProps.GS.maxVertexCount; + unsigned OutputScalarCount = 0; + const DxilSignature &OutSig = S.OutputSignature; + for (auto &SE : OutSig.GetElements()) { + OutputScalarCount += SE->GetRows() * SE->GetCols(); } - unsigned totalOutputScalars = maxVertexCount * outputScalarCount; - if (totalOutputScalars > DXIL::kMaxGSOutputTotalScalars) { + unsigned TotalOutputScalars = MaxVertexCount * OutputScalarCount; + if (TotalOutputScalars > DXIL::kMaxGSOutputTotalScalars) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmGSTotalOutputVertexDataRange, - {std::to_string(maxVertexCount), std::to_string(outputScalarCount), - std::to_string(totalOutputScalars), + {std::to_string(MaxVertexCount), std::to_string(OutputScalarCount), + std::to_string(TotalOutputScalars), std::to_string(DXIL::kMaxGSOutputTotalScalars)}); } } - if (isCS) { + if (IsCs) { if (!S.InputSignature.GetElements().empty() || !S.OutputSignature.GetElements().empty() || !S.PatchConstOrPrimSignature.GetElements().empty()) { @@ -4811,7 +4811,7 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } } - if (isMS) { + if (IsMs) { unsigned VertexSignatureRows = S.OutputSignature.GetRowCount(); if (VertexSignatureRows > DXIL::kMaxMSVSigRows) { ValCtx.EmitFnFormatError( @@ -4833,31 +4833,31 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, const unsigned kScalarSizeForMSAttributes = 4; #define ALIGN32(n) (((n) + 31) & ~31) - unsigned maxAlign32VertexCount = - ALIGN32(props.ShaderProps.MS.maxVertexCount); - unsigned maxAlign32PrimitiveCount = - ALIGN32(props.ShaderProps.MS.maxPrimitiveCount); - unsigned totalOutputScalars = 0; + unsigned MaxAlign32VertexCount = + ALIGN32(Props.ShaderProps.MS.maxVertexCount); + unsigned MaxAlign32PrimitiveCount = + ALIGN32(Props.ShaderProps.MS.maxPrimitiveCount); + unsigned TotalOutputScalars = 0; for (auto &SE : S.OutputSignature.GetElements()) { - totalOutputScalars += - SE->GetRows() * SE->GetCols() * maxAlign32VertexCount; + TotalOutputScalars += + SE->GetRows() * SE->GetCols() * MaxAlign32VertexCount; } for (auto &SE : S.PatchConstOrPrimSignature.GetElements()) { - totalOutputScalars += - SE->GetRows() * SE->GetCols() * maxAlign32PrimitiveCount; + TotalOutputScalars += + SE->GetRows() * SE->GetCols() * MaxAlign32PrimitiveCount; } - if (totalOutputScalars * kScalarSizeForMSAttributes > + if (TotalOutputScalars * kScalarSizeForMSAttributes > DXIL::kMaxMSOutputTotalBytes) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmMeshShaderOutputSize, {F.getName(), std::to_string(DXIL::kMaxMSOutputTotalBytes)}); } - unsigned totalInputOutputBytes = - totalOutputScalars * kScalarSizeForMSAttributes + - props.ShaderProps.MS.payloadSizeInBytes; - if (totalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) { + unsigned TotalInputOutputBytes = + TotalOutputScalars * kScalarSizeForMSAttributes + + Props.ShaderProps.MS.payloadSizeInBytes; + if (TotalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmMeshShaderInOutSize, {F.getName(), std::to_string(DXIL::kMaxMSInputOutputTotalBytes)}); @@ -4870,9 +4870,9 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) { if (ValCtx.isLibProfile) { for (Function &F : DM.GetModule()->functions()) { if (DM.HasDxilEntryProps(&F)) { - DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F); EntryStatus &Status = ValCtx.GetEntryStatus(&F); - ValidateEntrySignatures(ValCtx, entryProps, Status, F); + ValidateEntrySignatures(ValCtx, EntryProps, Status, F); } } } else { @@ -4883,8 +4883,8 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) { return; } EntryStatus &Status = ValCtx.GetEntryStatus(Entry); - DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry); - ValidateEntrySignatures(ValCtx, entryProps, Status, *Entry); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry); + ValidateEntrySignatures(ValCtx, EntryProps, Status, *Entry); } } @@ -4893,14 +4893,14 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) { struct CompatibilityChecker { ValidationContext &ValCtx; Function *EntryFn; - const DxilFunctionProps &props; - DXIL::ShaderKind shaderKind; + const DxilFunctionProps &Props; + DXIL::ShaderKind ShaderKind; // These masks identify the potential conflict flags based on the entry // function's shader kind and properties when either UsesDerivatives or // RequiresGroup flags are set in ShaderCompatInfo. - uint32_t maskForDeriv = 0; - uint32_t maskForGroup = 0; + uint32_t MaskForDeriv = 0; + uint32_t MaskForGroup = 0; enum class ConflictKind : uint32_t { Stage, @@ -4922,77 +4922,77 @@ struct CompatibilityChecker { CompatibilityChecker(ValidationContext &ValCtx, Function *EntryFn) : ValCtx(ValCtx), EntryFn(EntryFn), - props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props), - shaderKind(props.shaderKind) { + Props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props), + ShaderKind(Props.shaderKind) { // Precompute potential incompatibilities based on shader stage, shader kind // and entry attributes. These will turn into full conflicts if the entry // point's shader flags indicate that they use relevant features. if (!ValCtx.DxilMod.GetShaderModel()->IsSM66Plus() && - (shaderKind == DXIL::ShaderKind::Mesh || - shaderKind == DXIL::ShaderKind::Amplification || - shaderKind == DXIL::ShaderKind::Compute)) { - maskForDeriv |= + (ShaderKind == DXIL::ShaderKind::Mesh || + ShaderKind == DXIL::ShaderKind::Amplification || + ShaderKind == DXIL::ShaderKind::Compute)) { + MaskForDeriv |= static_cast(ConflictFlags::DerivInComputeShaderModel); - } else if (shaderKind == DXIL::ShaderKind::Node) { + } else if (ShaderKind == DXIL::ShaderKind::Node) { // Only broadcasting launch supports derivatives. - if (props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting) - maskForDeriv |= static_cast(ConflictFlags::DerivLaunch); + if (Props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting) + MaskForDeriv |= static_cast(ConflictFlags::DerivLaunch); // Thread launch node has no group. - if (props.Node.LaunchType == DXIL::NodeLaunchType::Thread) - maskForGroup |= static_cast(ConflictFlags::RequiresGroup); + if (Props.Node.LaunchType == DXIL::NodeLaunchType::Thread) + MaskForGroup |= static_cast(ConflictFlags::RequiresGroup); } - if (shaderKind == DXIL::ShaderKind::Mesh || - shaderKind == DXIL::ShaderKind::Amplification || - shaderKind == DXIL::ShaderKind::Compute || - shaderKind == DXIL::ShaderKind::Node) { + if (ShaderKind == DXIL::ShaderKind::Mesh || + ShaderKind == DXIL::ShaderKind::Amplification || + ShaderKind == DXIL::ShaderKind::Compute || + ShaderKind == DXIL::ShaderKind::Node) { // All compute-like stages // Thread dimensions must be either 1D and X is multiple of 4, or 2D // and X and Y must be multiples of 2. - if (props.numThreads[1] == 1 && props.numThreads[2] == 1) { - if ((props.numThreads[0] & 0x3) != 0) - maskForDeriv |= + if (Props.numThreads[1] == 1 && Props.numThreads[2] == 1) { + if ((Props.numThreads[0] & 0x3) != 0) + MaskForDeriv |= static_cast(ConflictFlags::DerivThreadGroupDim); - } else if ((props.numThreads[0] & 0x1) || (props.numThreads[1] & 0x1)) - maskForDeriv |= + } else if ((Props.numThreads[0] & 0x1) || (Props.numThreads[1] & 0x1)) + MaskForDeriv |= static_cast(ConflictFlags::DerivThreadGroupDim); } else { // other stages have no group - maskForGroup |= static_cast(ConflictFlags::RequiresGroup); + MaskForGroup |= static_cast(ConflictFlags::RequiresGroup); } } uint32_t - IdentifyConflict(const DxilModule::ShaderCompatInfo &compatInfo) const { - uint32_t conflictMask = 0; + IdentifyConflict(const DxilModule::ShaderCompatInfo &CompatInfo) const { + uint32_t ConflictMask = 0; // Compatibility check said this shader kind is not compatible. - if (0 == ((1 << (uint32_t)shaderKind) & compatInfo.mask)) - conflictMask |= (uint32_t)ConflictFlags::Stage; + if (0 == ((1 << (uint32_t)ShaderKind) & CompatInfo.mask)) + ConflictMask |= (uint32_t)ConflictFlags::Stage; // Compatibility check said this shader model is not compatible. if (DXIL::CompareVersions(ValCtx.DxilMod.GetShaderModel()->GetMajor(), ValCtx.DxilMod.GetShaderModel()->GetMinor(), - compatInfo.minMajor, compatInfo.minMinor) < 0) - conflictMask |= (uint32_t)ConflictFlags::ShaderModel; + CompatInfo.minMajor, CompatInfo.minMinor) < 0) + ConflictMask |= (uint32_t)ConflictFlags::ShaderModel; - if (compatInfo.shaderFlags.GetUsesDerivatives()) - conflictMask |= maskForDeriv; + if (CompatInfo.shaderFlags.GetUsesDerivatives()) + ConflictMask |= MaskForDeriv; - if (compatInfo.shaderFlags.GetRequiresGroup()) - conflictMask |= maskForGroup; + if (CompatInfo.shaderFlags.GetRequiresGroup()) + ConflictMask |= MaskForGroup; - return conflictMask; + return ConflictMask; } - void Diagnose(Function *F, uint32_t conflictMask, ConflictKind conflict, - ValidationRule rule, ArrayRef args = {}) { - if (conflictMask & (1 << (unsigned)conflict)) - ValCtx.EmitFnFormatError(F, rule, args); + void Diagnose(Function *F, uint32_t ConflictMask, ConflictKind Conflict, + ValidationRule Rule, ArrayRef Args = {}) { + if (ConflictMask & (1 << (unsigned)Conflict)) + ValCtx.EmitFnFormatError(F, Rule, Args); } - void DiagnoseConflicts(Function *F, uint32_t conflictMask) { + void DiagnoseConflicts(Function *F, uint32_t ConflictMask) { // Emit a diagnostic indicating that either the entry function or a function // called by the entry function contains a disallowed operation. if (F == EntryFn) @@ -5001,22 +5001,22 @@ struct CompatibilityChecker { ValCtx.EmitFnError(EntryFn, ValidationRule::SmIncompatibleCallInEntry); // Emit diagnostics for each conflict found in this function. - Diagnose(F, conflictMask, ConflictKind::Stage, + Diagnose(F, ConflictMask, ConflictKind::Stage, ValidationRule::SmIncompatibleStage, - {ShaderModel::GetKindName(props.shaderKind)}); - Diagnose(F, conflictMask, ConflictKind::ShaderModel, + {ShaderModel::GetKindName(Props.shaderKind)}); + Diagnose(F, ConflictMask, ConflictKind::ShaderModel, ValidationRule::SmIncompatibleShaderModel); - Diagnose(F, conflictMask, ConflictKind::DerivLaunch, + Diagnose(F, ConflictMask, ConflictKind::DerivLaunch, ValidationRule::SmIncompatibleDerivLaunch, - {GetLaunchTypeStr(props.Node.LaunchType)}); - Diagnose(F, conflictMask, ConflictKind::DerivThreadGroupDim, + {GetLaunchTypeStr(Props.Node.LaunchType)}); + Diagnose(F, ConflictMask, ConflictKind::DerivThreadGroupDim, ValidationRule::SmIncompatibleThreadGroupDim, - {std::to_string(props.numThreads[0]), - std::to_string(props.numThreads[1]), - std::to_string(props.numThreads[2])}); - Diagnose(F, conflictMask, ConflictKind::DerivInComputeShaderModel, + {std::to_string(Props.numThreads[0]), + std::to_string(Props.numThreads[1]), + std::to_string(Props.numThreads[2])}); + Diagnose(F, ConflictMask, ConflictKind::DerivInComputeShaderModel, ValidationRule::SmIncompatibleDerivInComputeShaderModel); - Diagnose(F, conflictMask, ConflictKind::RequiresGroup, + Diagnose(F, ConflictMask, ConflictKind::RequiresGroup, ValidationRule::SmIncompatibleRequiresGroup); } @@ -5025,59 +5025,59 @@ struct CompatibilityChecker { // functions called by that function introduced the conflict. // In those cases, the called functions themselves will emit the diagnostic. // Return conflict mask for this function. - uint32_t Visit(Function *F, uint32_t &remainingMask, - llvm::SmallPtrSet &visited, CallGraph &CG) { + uint32_t Visit(Function *F, uint32_t &RemainingMask, + llvm::SmallPtrSet &Visited, CallGraph &CG) { // Recursive check looks for where a conflict is found and not present // in functions called by the current function. // - When a source is found, emit diagnostics and clear the conflict // flags introduced by this function from the working mask so we don't // report this conflict again. - // - When the remainingMask is 0, we are done. + // - When the RemainingMask is 0, we are done. - if (remainingMask == 0) + if (RemainingMask == 0) return 0; // Nothing left to search for. - if (!visited.insert(F).second) + if (!Visited.insert(F).second) return 0; // Already visited. - const DxilModule::ShaderCompatInfo *compatInfo = + const DxilModule::ShaderCompatInfo *CompatInfo = ValCtx.DxilMod.GetCompatInfoForFunction(F); - DXASSERT(compatInfo, "otherwise, compat info not computed in module"); - if (!compatInfo) + DXASSERT(CompatInfo, "otherwise, compat info not computed in module"); + if (!CompatInfo) return 0; - uint32_t maskForThisFunction = IdentifyConflict(*compatInfo); + uint32_t MaskForThisFunction = IdentifyConflict(*CompatInfo); - uint32_t maskForCalls = 0; + uint32_t MaskForCalls = 0; if (CallGraphNode *CGNode = CG[F]) { for (auto &Call : *CGNode) { Function *called = Call.second->getFunction(); if (called->isDeclaration()) continue; - maskForCalls |= Visit(called, remainingMask, visited, CG); - if (remainingMask == 0) + MaskForCalls |= Visit(called, RemainingMask, Visited, CG); + if (RemainingMask == 0) return 0; // Nothing left to search for. } } // Mask of incompatibilities introduced by this function. - uint32_t conflictsIntroduced = - remainingMask & maskForThisFunction & ~maskForCalls; - if (conflictsIntroduced) { + uint32_t ConflictsIntroduced = + RemainingMask & MaskForThisFunction & ~MaskForCalls; + if (ConflictsIntroduced) { // This function introduces at least one conflict. - DiagnoseConflicts(F, conflictsIntroduced); + DiagnoseConflicts(F, ConflictsIntroduced); // Mask off diagnosed incompatibilities. - remainingMask &= ~conflictsIntroduced; + RemainingMask &= ~ConflictsIntroduced; } - return maskForThisFunction; + return MaskForThisFunction; } - void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &compatInfo) { - uint32_t conflictMask = IdentifyConflict(compatInfo); - if (conflictMask == 0) + void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &CompatInfo) { + uint32_t ConflictMask = IdentifyConflict(CompatInfo); + if (ConflictMask == 0) return; CallGraph &CG = ValCtx.GetCallGraph(); - llvm::SmallPtrSet visited; - Visit(EntryFn, conflictMask, visited, CG); + llvm::SmallPtrSet Visited; + Visit(EntryFn, ConflictMask, Visited, CG); } }; @@ -5086,14 +5086,14 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) { DxilModule &DM = ValCtx.DxilMod; for (Function &F : DM.GetModule()->functions()) { if (DM.HasDxilEntryProps(&F)) { - const DxilModule::ShaderCompatInfo *compatInfo = + const DxilModule::ShaderCompatInfo *CompatInfo = DM.GetCompatInfoForFunction(&F); - DXASSERT(compatInfo, "otherwise, compat info not computed in module"); - if (!compatInfo) + DXASSERT(CompatInfo, "otherwise, compat info not computed in module"); + if (!CompatInfo) continue; CompatibilityChecker checker(ValCtx, &F); - checker.FindIncompatibleCall(*compatInfo); + checker.FindIncompatibleCall(*CompatInfo); } } } @@ -5101,101 +5101,101 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) { static void CheckPatchConstantSemantic(ValidationContext &ValCtx, const DxilEntryProps &EntryProps, EntryStatus &Status, Function *F) { - const DxilFunctionProps &props = EntryProps.props; - bool isHS = props.IsHS(); + const DxilFunctionProps &Props = EntryProps.props; + bool IsHs = Props.IsHS(); - DXIL::TessellatorDomain domain = - isHS ? props.ShaderProps.HS.domain : props.ShaderProps.DS.domain; + DXIL::TessellatorDomain Domain = + IsHs ? Props.ShaderProps.HS.domain : Props.ShaderProps.DS.domain; - const DxilSignature &patchConstantSig = + const DxilSignature &PatchConstantSig = EntryProps.sig.PatchConstOrPrimSignature; - const unsigned kQuadEdgeSize = 4; - const unsigned kQuadInsideSize = 2; - const unsigned kQuadDomainLocSize = 2; + const unsigned KQuadEdgeSize = 4; + const unsigned KQuadInsideSize = 2; + const unsigned KQuadDomainLocSize = 2; - const unsigned kTriEdgeSize = 3; - const unsigned kTriInsideSize = 1; - const unsigned kTriDomainLocSize = 3; + const unsigned KTriEdgeSize = 3; + const unsigned KTriInsideSize = 1; + const unsigned KTriDomainLocSize = 3; - const unsigned kIsolineEdgeSize = 2; - const unsigned kIsolineInsideSize = 0; - const unsigned kIsolineDomainLocSize = 3; + const unsigned KIsolineEdgeSize = 2; + const unsigned KIsolineInsideSize = 0; + const unsigned KIsolineDomainLocSize = 3; - const char *domainName = ""; + const char *DomainName = ""; DXIL::SemanticKind kEdgeSemantic = DXIL::SemanticKind::TessFactor; - unsigned edgeSize = 0; + unsigned EdgeSize = 0; DXIL::SemanticKind kInsideSemantic = DXIL::SemanticKind::InsideTessFactor; - unsigned insideSize = 0; + unsigned InsideSize = 0; Status.domainLocSize = 0; - switch (domain) { + switch (Domain) { case DXIL::TessellatorDomain::IsoLine: - domainName = "IsoLine"; - edgeSize = kIsolineEdgeSize; - insideSize = kIsolineInsideSize; - Status.domainLocSize = kIsolineDomainLocSize; + DomainName = "IsoLine"; + EdgeSize = KIsolineEdgeSize; + InsideSize = KIsolineInsideSize; + Status.domainLocSize = KIsolineDomainLocSize; break; case DXIL::TessellatorDomain::Tri: - domainName = "Tri"; - edgeSize = kTriEdgeSize; - insideSize = kTriInsideSize; - Status.domainLocSize = kTriDomainLocSize; + DomainName = "Tri"; + EdgeSize = KTriEdgeSize; + InsideSize = KTriInsideSize; + Status.domainLocSize = KTriDomainLocSize; break; case DXIL::TessellatorDomain::Quad: - domainName = "Quad"; - edgeSize = kQuadEdgeSize; - insideSize = kQuadInsideSize; - Status.domainLocSize = kQuadDomainLocSize; + DomainName = "Quad"; + EdgeSize = KQuadEdgeSize; + InsideSize = KQuadInsideSize; + Status.domainLocSize = KQuadDomainLocSize; break; default: // Don't bother with other tests if domain is invalid return; } - bool bFoundEdgeSemantic = false; - bool bFoundInsideSemantic = false; - for (auto &SE : patchConstantSig.GetElements()) { - Semantic::Kind kind = SE->GetSemantic()->GetKind(); - if (kind == kEdgeSemantic) { - bFoundEdgeSemantic = true; - if (SE->GetRows() != edgeSize || SE->GetCols() > 1) { + bool FoundEdgeSemantic = false; + bool FoundInsideSemantic = false; + for (auto &SE : PatchConstantSig.GetElements()) { + Semantic::Kind Kind = SE->GetSemantic()->GetKind(); + if (Kind == kEdgeSemantic) { + FoundEdgeSemantic = true; + if (SE->GetRows() != EdgeSize || SE->GetCols() > 1) { ValCtx.EmitFnFormatError(F, ValidationRule::SmTessFactorSizeMatchDomain, {std::to_string(SE->GetRows()), - std::to_string(SE->GetCols()), domainName, - std::to_string(edgeSize)}); + std::to_string(SE->GetCols()), DomainName, + std::to_string(EdgeSize)}); } - } else if (kind == kInsideSemantic) { - bFoundInsideSemantic = true; - if (SE->GetRows() != insideSize || SE->GetCols() > 1) { + } else if (Kind == kInsideSemantic) { + FoundInsideSemantic = true; + if (SE->GetRows() != InsideSize || SE->GetCols() > 1) { ValCtx.EmitFnFormatError( F, ValidationRule::SmInsideTessFactorSizeMatchDomain, {std::to_string(SE->GetRows()), std::to_string(SE->GetCols()), - domainName, std::to_string(insideSize)}); + DomainName, std::to_string(InsideSize)}); } } } - if (isHS) { - if (!bFoundEdgeSemantic) { + if (IsHs) { + if (!FoundEdgeSemantic) { ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain); } - if (!bFoundInsideSemantic && domain != DXIL::TessellatorDomain::IsoLine) { + if (!FoundInsideSemantic && Domain != DXIL::TessellatorDomain::IsoLine) { ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain); } } } static void ValidatePassThruHS(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, Function *F) { + const DxilEntryProps &EntryProps, Function *F) { // Check pass thru HS. if (F->isDeclaration()) { - const auto &props = entryProps.props; - if (props.IsHS()) { - const auto &HS = props.ShaderProps.HS; + const auto &Props = EntryProps.props; + if (Props.IsHS()) { + const auto &HS = Props.ShaderProps.HS; if (HS.inputControlPoints < HS.outputControlPoints) { ValCtx.EmitFnError( F, ValidationRule::SmHullPassThruControlPointCountMatch); @@ -5203,12 +5203,12 @@ static void ValidatePassThruHS(ValidationContext &ValCtx, // Check declared control point outputs storage amounts are ok to pass // through (less output storage than input for control points). - const DxilSignature &outSig = entryProps.sig.OutputSignature; - unsigned totalOutputCPScalars = 0; - for (auto &SE : outSig.GetElements()) { - totalOutputCPScalars += SE->GetRows() * SE->GetCols(); + const DxilSignature &OutSig = EntryProps.sig.OutputSignature; + unsigned TotalOutputCpScalars = 0; + for (auto &SE : OutSig.GetElements()) { + TotalOutputCpScalars += SE->GetRows() * SE->GetCols(); } - if (totalOutputCPScalars * HS.outputControlPoints > + if (TotalOutputCpScalars * HS.outputControlPoints > DXIL::kMaxHSOutputControlPointsTotalScalars) { ValCtx.EmitFnError(F, ValidationRule::SmOutputControlPointsTotalScalars); @@ -5223,35 +5223,35 @@ static void ValidatePassThruHS(ValidationContext &ValCtx, // validate wave size (currently allowed only on CS and node shaders but might // be supported on other shader types in the future) static void ValidateWaveSize(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, Function *F) { - const DxilFunctionProps &props = entryProps.props; - const hlsl::DxilWaveSize &waveSize = props.WaveSize; + const DxilEntryProps &EntryProps, Function *F) { + const DxilFunctionProps &Props = EntryProps.props; + const hlsl::DxilWaveSize &WaveSize = Props.WaveSize; - switch (waveSize.Validate()) { + switch (WaveSize.Validate()) { case hlsl::DxilWaveSize::ValidationResult::Success: break; case hlsl::DxilWaveSize::ValidationResult::InvalidMin: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue, - {"Min", std::to_string(waveSize.Min), + {"Min", std::to_string(WaveSize.Min), std::to_string(DXIL::kMinWaveSize), std::to_string(DXIL::kMaxWaveSize)}); break; case hlsl::DxilWaveSize::ValidationResult::InvalidMax: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue, - {"Max", std::to_string(waveSize.Max), + {"Max", std::to_string(WaveSize.Max), std::to_string(DXIL::kMinWaveSize), std::to_string(DXIL::kMaxWaveSize)}); break; case hlsl::DxilWaveSize::ValidationResult::InvalidPreferred: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue, - {"Preferred", std::to_string(waveSize.Preferred), + {"Preferred", std::to_string(WaveSize.Preferred), std::to_string(DXIL::kMinWaveSize), std::to_string(DXIL::kMaxWaveSize)}); break; case hlsl::DxilWaveSize::ValidationResult::MaxOrPreferredWhenUndefined: ValCtx.EmitFnFormatError( F, ValidationRule::SmWaveSizeAllZeroWhenUndefined, - {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)}); + {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)}); break; case hlsl::DxilWaveSize::ValidationResult::MaxEqualsMin: // This case is allowed because users may disable the ErrorDefault warning. @@ -5259,227 +5259,227 @@ static void ValidateWaveSize(ValidationContext &ValCtx, case hlsl::DxilWaveSize::ValidationResult::PreferredWhenNoRange: ValCtx.EmitFnFormatError( F, ValidationRule::SmWaveSizeMaxAndPreferredZeroWhenNoRange, - {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)}); + {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)}); break; case hlsl::DxilWaveSize::ValidationResult::MaxLessThanMin: ValCtx.EmitFnFormatError( F, ValidationRule::SmWaveSizeMaxGreaterThanMin, - {std::to_string(waveSize.Max), std::to_string(waveSize.Min)}); + {std::to_string(WaveSize.Max), std::to_string(WaveSize.Min)}); break; case hlsl::DxilWaveSize::ValidationResult::PreferredOutOfRange: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizePreferredInRange, - {std::to_string(waveSize.Preferred), - std::to_string(waveSize.Min), - std::to_string(waveSize.Max)}); + {std::to_string(WaveSize.Preferred), + std::to_string(WaveSize.Min), + std::to_string(WaveSize.Max)}); break; } // Check shader model and kind. - if (waveSize.IsDefined()) { - if (!props.IsCS() && !props.IsNode()) { + if (WaveSize.IsDefined()) { + if (!Props.IsCS() && !Props.IsNode()) { ValCtx.EmitFnError(F, ValidationRule::SmWaveSizeOnComputeOrNode); } } } static void ValidateEntryProps(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, + const DxilEntryProps &EntryProps, EntryStatus &Status, Function *F) { - const DxilFunctionProps &props = entryProps.props; - DXIL::ShaderKind ShaderType = props.shaderKind; + const DxilFunctionProps &Props = EntryProps.props; + DXIL::ShaderKind ShaderType = Props.shaderKind; - ValidateWaveSize(ValCtx, entryProps, F); + ValidateWaveSize(ValCtx, EntryProps, F); - if (ShaderType == DXIL::ShaderKind::Compute || props.IsNode()) { - unsigned x = props.numThreads[0]; - unsigned y = props.numThreads[1]; - unsigned z = props.numThreads[2]; + if (ShaderType == DXIL::ShaderKind::Compute || Props.IsNode()) { + unsigned X = Props.numThreads[0]; + unsigned Y = Props.numThreads[1]; + unsigned Z = Props.numThreads[2]; - unsigned threadsInGroup = x * y * z; + unsigned ThreadsInGroup = X * Y * Z; - if ((x < DXIL::kMinCSThreadGroupX) || (x > DXIL::kMaxCSThreadGroupX)) { + if ((X < DXIL::kMinCSThreadGroupX) || (X > DXIL::kMaxCSThreadGroupX)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"X", std::to_string(x), + {"X", std::to_string(X), std::to_string(DXIL::kMinCSThreadGroupX), std::to_string(DXIL::kMaxCSThreadGroupX)}); } - if ((y < DXIL::kMinCSThreadGroupY) || (y > DXIL::kMaxCSThreadGroupY)) { + if ((Y < DXIL::kMinCSThreadGroupY) || (Y > DXIL::kMaxCSThreadGroupY)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Y", std::to_string(y), + {"Y", std::to_string(Y), std::to_string(DXIL::kMinCSThreadGroupY), std::to_string(DXIL::kMaxCSThreadGroupY)}); } - if ((z < DXIL::kMinCSThreadGroupZ) || (z > DXIL::kMaxCSThreadGroupZ)) { + if ((Z < DXIL::kMinCSThreadGroupZ) || (Z > DXIL::kMaxCSThreadGroupZ)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Z", std::to_string(z), + {"Z", std::to_string(Z), std::to_string(DXIL::kMinCSThreadGroupZ), std::to_string(DXIL::kMaxCSThreadGroupZ)}); } - if (threadsInGroup > DXIL::kMaxCSThreadsPerGroup) { + if (ThreadsInGroup > DXIL::kMaxCSThreadsPerGroup) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup, - {std::to_string(threadsInGroup), + {std::to_string(ThreadsInGroup), std::to_string(DXIL::kMaxCSThreadsPerGroup)}); } - // type of threadID, thread group ID take care by DXIL operation overload + // type of ThreadID, thread group ID take care by DXIL operation overload // check. } else if (ShaderType == DXIL::ShaderKind::Mesh) { - const auto &MS = props.ShaderProps.MS; - unsigned x = props.numThreads[0]; - unsigned y = props.numThreads[1]; - unsigned z = props.numThreads[2]; + const auto &MS = Props.ShaderProps.MS; + unsigned X = Props.numThreads[0]; + unsigned Y = Props.numThreads[1]; + unsigned Z = Props.numThreads[2]; - unsigned threadsInGroup = x * y * z; + unsigned ThreadsInGroup = X * Y * Z; - if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) { + if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"X", std::to_string(x), + {"X", std::to_string(X), std::to_string(DXIL::kMinMSASThreadGroupX), std::to_string(DXIL::kMaxMSASThreadGroupX)}); } - if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) { + if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Y", std::to_string(y), + {"Y", std::to_string(Y), std::to_string(DXIL::kMinMSASThreadGroupY), std::to_string(DXIL::kMaxMSASThreadGroupY)}); } - if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) { + if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Z", std::to_string(z), + {"Z", std::to_string(Z), std::to_string(DXIL::kMinMSASThreadGroupZ), std::to_string(DXIL::kMaxMSASThreadGroupZ)}); } - if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { + if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup, - {std::to_string(threadsInGroup), + {std::to_string(ThreadsInGroup), std::to_string(DXIL::kMaxMSASThreadsPerGroup)}); } - // type of threadID, thread group ID take care by DXIL operation overload + // type of ThreadID, thread group ID take care by DXIL operation overload // check. - unsigned maxVertexCount = MS.maxVertexCount; - if (maxVertexCount > DXIL::kMaxMSOutputVertexCount) { + unsigned MaxVertexCount = MS.maxVertexCount; + if (MaxVertexCount > DXIL::kMaxMSOutputVertexCount) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMeshShaderMaxVertexCount, {std::to_string(DXIL::kMaxMSOutputVertexCount), - std::to_string(maxVertexCount)}); + std::to_string(MaxVertexCount)}); } - unsigned maxPrimitiveCount = MS.maxPrimitiveCount; - if (maxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) { + unsigned MaxPrimitiveCount = MS.maxPrimitiveCount; + if (MaxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmMeshShaderMaxPrimitiveCount, {std::to_string(DXIL::kMaxMSOutputPrimitiveCount), - std::to_string(maxPrimitiveCount)}); + std::to_string(MaxPrimitiveCount)}); } } else if (ShaderType == DXIL::ShaderKind::Amplification) { - unsigned x = props.numThreads[0]; - unsigned y = props.numThreads[1]; - unsigned z = props.numThreads[2]; + unsigned X = Props.numThreads[0]; + unsigned Y = Props.numThreads[1]; + unsigned Z = Props.numThreads[2]; - unsigned threadsInGroup = x * y * z; + unsigned ThreadsInGroup = X * Y * Z; - if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) { + if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"X", std::to_string(x), + {"X", std::to_string(X), std::to_string(DXIL::kMinMSASThreadGroupX), std::to_string(DXIL::kMaxMSASThreadGroupX)}); } - if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) { + if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Y", std::to_string(y), + {"Y", std::to_string(Y), std::to_string(DXIL::kMinMSASThreadGroupY), std::to_string(DXIL::kMaxMSASThreadGroupY)}); } - if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) { + if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Z", std::to_string(z), + {"Z", std::to_string(Z), std::to_string(DXIL::kMinMSASThreadGroupZ), std::to_string(DXIL::kMaxMSASThreadGroupZ)}); } - if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { + if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup, - {std::to_string(threadsInGroup), + {std::to_string(ThreadsInGroup), std::to_string(DXIL::kMaxMSASThreadsPerGroup)}); } - // type of threadID, thread group ID take care by DXIL operation overload + // type of ThreadID, thread group ID take care by DXIL operation overload // check. } else if (ShaderType == DXIL::ShaderKind::Domain) { - const auto &DS = props.ShaderProps.DS; - DXIL::TessellatorDomain domain = DS.domain; - if (domain >= DXIL::TessellatorDomain::LastEntry) - domain = DXIL::TessellatorDomain::Undefined; - unsigned inputControlPointCount = DS.inputControlPoints; + const auto &DS = Props.ShaderProps.DS; + DXIL::TessellatorDomain Domain = DS.domain; + if (Domain >= DXIL::TessellatorDomain::LastEntry) + Domain = DXIL::TessellatorDomain::Undefined; + unsigned InputControlPointCount = DS.inputControlPoints; - if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { + if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmDSInputControlPointCountRange, {std::to_string(DXIL::kMaxIAPatchControlPointCount), - std::to_string(inputControlPointCount)}); + std::to_string(InputControlPointCount)}); } - if (domain == DXIL::TessellatorDomain::Undefined) { + if (Domain == DXIL::TessellatorDomain::Undefined) { ValCtx.EmitFnError(F, ValidationRule::SmValidDomain); } - CheckPatchConstantSemantic(ValCtx, entryProps, Status, F); + CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F); } else if (ShaderType == DXIL::ShaderKind::Hull) { - const auto &HS = props.ShaderProps.HS; - DXIL::TessellatorDomain domain = HS.domain; - if (domain >= DXIL::TessellatorDomain::LastEntry) - domain = DXIL::TessellatorDomain::Undefined; - unsigned inputControlPointCount = HS.inputControlPoints; - if (inputControlPointCount == 0) { - const DxilSignature &inputSig = entryProps.sig.InputSignature; - if (!inputSig.GetElements().empty()) { + const auto &HS = Props.ShaderProps.HS; + DXIL::TessellatorDomain Domain = HS.domain; + if (Domain >= DXIL::TessellatorDomain::LastEntry) + Domain = DXIL::TessellatorDomain::Undefined; + unsigned InputControlPointCount = HS.inputControlPoints; + if (InputControlPointCount == 0) { + const DxilSignature &InputSig = EntryProps.sig.InputSignature; + if (!InputSig.GetElements().empty()) { ValCtx.EmitFnError(F, ValidationRule::SmZeroHSInputControlPointWithInput); } - } else if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { + } else if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmHSInputControlPointCountRange, {std::to_string(DXIL::kMaxIAPatchControlPointCount), - std::to_string(inputControlPointCount)}); + std::to_string(InputControlPointCount)}); } - unsigned outputControlPointCount = HS.outputControlPoints; - if (outputControlPointCount < DXIL::kMinIAPatchControlPointCount || - outputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { + unsigned OutputControlPointCount = HS.outputControlPoints; + if (OutputControlPointCount < DXIL::kMinIAPatchControlPointCount || + OutputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmOutputControlPointCountRange, {std::to_string(DXIL::kMinIAPatchControlPointCount), std::to_string(DXIL::kMaxIAPatchControlPointCount), - std::to_string(outputControlPointCount)}); + std::to_string(OutputControlPointCount)}); } - if (domain == DXIL::TessellatorDomain::Undefined) { + if (Domain == DXIL::TessellatorDomain::Undefined) { ValCtx.EmitFnError(F, ValidationRule::SmValidDomain); } - DXIL::TessellatorPartitioning partition = HS.partition; - if (partition == DXIL::TessellatorPartitioning::Undefined) { + DXIL::TessellatorPartitioning Partition = HS.partition; + if (Partition == DXIL::TessellatorPartitioning::Undefined) { ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorPartition); } - DXIL::TessellatorOutputPrimitive tessOutputPrimitive = HS.outputPrimitive; - if (tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined || - tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) { + DXIL::TessellatorOutputPrimitive TessOutputPrimitive = HS.outputPrimitive; + if (TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined || + TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) { ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorOutputPrimitive); } - float maxTessFactor = HS.maxTessFactor; - if (maxTessFactor < DXIL::kHSMaxTessFactorLowerBound || - maxTessFactor > DXIL::kHSMaxTessFactorUpperBound) { + float MaxTessFactor = HS.maxTessFactor; + if (MaxTessFactor < DXIL::kHSMaxTessFactorLowerBound || + MaxTessFactor > DXIL::kHSMaxTessFactorUpperBound) { ValCtx.EmitFnFormatError( F, ValidationRule::MetaMaxTessFactor, {std::to_string(DXIL::kHSMaxTessFactorLowerBound), std::to_string(DXIL::kHSMaxTessFactorUpperBound), - std::to_string(maxTessFactor)}); + std::to_string(MaxTessFactor)}); } // Domain and OutPrimivtive match. - switch (domain) { + switch (Domain) { case DXIL::TessellatorDomain::IsoLine: - switch (tessOutputPrimitive) { + switch (TessOutputPrimitive) { case DXIL::TessellatorOutputPrimitive::TriangleCW: case DXIL::TessellatorOutputPrimitive::TriangleCCW: ValCtx.EmitFnError(F, ValidationRule::SmIsoLineOutputPrimitiveMismatch); @@ -5489,7 +5489,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx, } break; case DXIL::TessellatorDomain::Tri: - switch (tessOutputPrimitive) { + switch (TessOutputPrimitive) { case DXIL::TessellatorOutputPrimitive::Line: ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch); break; @@ -5498,7 +5498,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx, } break; case DXIL::TessellatorDomain::Quad: - switch (tessOutputPrimitive) { + switch (TessOutputPrimitive) { case DXIL::TessellatorOutputPrimitive::Line: ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch); break; @@ -5511,39 +5511,39 @@ static void ValidateEntryProps(ValidationContext &ValCtx, break; } - CheckPatchConstantSemantic(ValCtx, entryProps, Status, F); + CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F); } else if (ShaderType == DXIL::ShaderKind::Geometry) { - const auto &GS = props.ShaderProps.GS; - unsigned maxVertexCount = GS.maxVertexCount; - if (maxVertexCount > DXIL::kMaxGSOutputVertexCount) { + const auto &GS = Props.ShaderProps.GS; + unsigned MaxVertexCount = GS.maxVertexCount; + if (MaxVertexCount > DXIL::kMaxGSOutputVertexCount) { ValCtx.EmitFnFormatError(F, ValidationRule::SmGSOutputVertexCountRange, {std::to_string(DXIL::kMaxGSOutputVertexCount), - std::to_string(maxVertexCount)}); + std::to_string(MaxVertexCount)}); } - unsigned instanceCount = GS.instanceCount; - if (instanceCount > DXIL::kMaxGSInstanceCount || instanceCount < 1) { + unsigned InstanceCount = GS.instanceCount; + if (InstanceCount > DXIL::kMaxGSInstanceCount || InstanceCount < 1) { ValCtx.EmitFnFormatError(F, ValidationRule::SmGSInstanceCountRange, {std::to_string(DXIL::kMaxGSInstanceCount), - std::to_string(instanceCount)}); + std::to_string(InstanceCount)}); } - DXIL::PrimitiveTopology topo = DXIL::PrimitiveTopology::Undefined; - bool bTopoMismatch = false; - for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) { - if (GS.streamPrimitiveTopologies[i] != + DXIL::PrimitiveTopology Topo = DXIL::PrimitiveTopology::Undefined; + bool TopoMismatch = false; + for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) { + if (GS.streamPrimitiveTopologies[I] != DXIL::PrimitiveTopology::Undefined) { - if (topo == DXIL::PrimitiveTopology::Undefined) - topo = GS.streamPrimitiveTopologies[i]; - else if (topo != GS.streamPrimitiveTopologies[i]) { - bTopoMismatch = true; + if (Topo == DXIL::PrimitiveTopology::Undefined) + Topo = GS.streamPrimitiveTopologies[I]; + else if (Topo != GS.streamPrimitiveTopologies[I]) { + TopoMismatch = true; break; } } } - if (bTopoMismatch) - topo = DXIL::PrimitiveTopology::Undefined; - switch (topo) { + if (TopoMismatch) + Topo = DXIL::PrimitiveTopology::Undefined; + switch (Topo) { case DXIL::PrimitiveTopology::PointList: case DXIL::PrimitiveTopology::LineStrip: case DXIL::PrimitiveTopology::TriangleStrip: @@ -5553,9 +5553,9 @@ static void ValidateEntryProps(ValidationContext &ValCtx, } break; } - DXIL::InputPrimitive inputPrimitive = GS.inputPrimitive; - unsigned VertexCount = GetNumVertices(inputPrimitive); - if (VertexCount == 0 && inputPrimitive != DXIL::InputPrimitive::Undefined) { + DXIL::InputPrimitive InputPrimitive = GS.inputPrimitive; + unsigned VertexCount = GetNumVertices(InputPrimitive); + if (VertexCount == 0 && InputPrimitive != DXIL::InputPrimitive::Undefined) { ValCtx.EmitFnError(F, ValidationRule::SmGSValidInputPrimitive); } } @@ -5566,10 +5566,10 @@ static void ValidateShaderState(ValidationContext &ValCtx) { if (ValCtx.isLibProfile) { for (Function &F : DM.GetModule()->functions()) { if (DM.HasDxilEntryProps(&F)) { - DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F); EntryStatus &Status = ValCtx.GetEntryStatus(&F); - ValidateEntryProps(ValCtx, entryProps, Status, &F); - ValidatePassThruHS(ValCtx, entryProps, &F); + ValidateEntryProps(ValCtx, EntryProps, Status, &F); + ValidatePassThruHS(ValCtx, EntryProps, &F); } } } else { @@ -5580,33 +5580,33 @@ static void ValidateShaderState(ValidationContext &ValCtx) { return; } EntryStatus &Status = ValCtx.GetEntryStatus(Entry); - DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry); - ValidateEntryProps(ValCtx, entryProps, Status, Entry); - ValidatePassThruHS(ValCtx, entryProps, Entry); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry); + ValidateEntryProps(ValCtx, EntryProps, Status, Entry); + ValidatePassThruHS(ValCtx, EntryProps, Entry); } } static CallGraphNode * -CalculateCallDepth(CallGraphNode *node, - std::unordered_map &depthMap, - std::unordered_set &callStack, - std::unordered_set &funcSet) { - unsigned depth = callStack.size(); - funcSet.insert(node->getFunction()); - for (auto it = node->begin(), ei = node->end(); it != ei; it++) { - CallGraphNode *toNode = it->second; - if (callStack.insert(toNode).second == false) { +CalculateCallDepth(CallGraphNode *Node, + std::unordered_map &DepthMap, + std::unordered_set &CallStack, + std::unordered_set &FuncSet) { + unsigned Depth = CallStack.size(); + FuncSet.insert(Node->getFunction()); + for (auto It = Node->begin(), EIt = Node->end(); It != EIt; It++) { + CallGraphNode *ToNode = It->second; + if (CallStack.insert(ToNode).second == false) { // Recursive. - return toNode; + return ToNode; } - if (depthMap[toNode] < depth) - depthMap[toNode] = depth; + if (DepthMap[ToNode] < Depth) + DepthMap[ToNode] = Depth; if (CallGraphNode *N = - CalculateCallDepth(toNode, depthMap, callStack, funcSet)) { + CalculateCallDepth(ToNode, DepthMap, CallStack, FuncSet)) { // Recursive return N; } - callStack.erase(toNode); + CallStack.erase(ToNode); } return nullptr; @@ -5616,29 +5616,29 @@ static void ValidateCallGraph(ValidationContext &ValCtx) { // Build CallGraph. CallGraph &CG = ValCtx.GetCallGraph(); - std::unordered_map depthMap; - std::unordered_set callStack; - CallGraphNode *entryNode = CG[ValCtx.DxilMod.GetEntryFunction()]; - depthMap[entryNode] = 0; - if (CallGraphNode *N = CalculateCallDepth(entryNode, depthMap, callStack, + std::unordered_map DepthMap; + std::unordered_set CallStack; + CallGraphNode *EntryNode = CG[ValCtx.DxilMod.GetEntryFunction()]; + DepthMap[EntryNode] = 0; + if (CallGraphNode *N = CalculateCallDepth(EntryNode, DepthMap, CallStack, ValCtx.entryFuncCallSet)) ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion); if (ValCtx.DxilMod.GetShaderModel()->IsHS()) { - CallGraphNode *patchConstantNode = + CallGraphNode *PatchConstantNode = CG[ValCtx.DxilMod.GetPatchConstantFunction()]; - depthMap[patchConstantNode] = 0; - callStack.clear(); + DepthMap[PatchConstantNode] = 0; + CallStack.clear(); if (CallGraphNode *N = - CalculateCallDepth(patchConstantNode, depthMap, callStack, + CalculateCallDepth(PatchConstantNode, DepthMap, CallStack, ValCtx.patchConstFuncCallSet)) ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion); } } static void ValidateFlowControl(ValidationContext &ValCtx) { - bool reducible = + bool Reducible = IsReducible(*ValCtx.DxilMod.GetModule(), IrreducibilityAction::Ignore); - if (!reducible) { + if (!Reducible) { ValCtx.EmitError(ValidationRule::FlowReducible); return; } @@ -5653,28 +5653,28 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { DominatorTree DT = DTA.run(F); LoopInfo LI; LI.Analyze(DT); - for (auto loopIt = LI.begin(); loopIt != LI.end(); loopIt++) { - Loop *loop = *loopIt; - SmallVector exitBlocks; - loop->getExitBlocks(exitBlocks); - if (exitBlocks.empty()) + for (auto LoopIt = LI.begin(); LoopIt != LI.end(); LoopIt++) { + Loop *Loop = *LoopIt; + SmallVector ExitBlocks; + Loop->getExitBlocks(ExitBlocks); + if (ExitBlocks.empty()) ValCtx.EmitFnError(&F, ValidationRule::FlowDeadLoop); } // validate that there is no use of a value that has been output-completed // for this function. - hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); + hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); - for (auto &it : hlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) { - Function *pF = it.second; + for (auto &It : HlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) { + Function *pF = It.second; if (!pF) continue; // first, collect all the output complete calls that are not dominated // by another OutputComplete call for the same handle value llvm::SmallMapVector, 4> - handleToCI; + HandleToCI; for (User *U : pF->users()) { // all OutputComplete calls are instructions, and call instructions, // so there shouldn't need to be a null check. @@ -5686,33 +5686,33 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { continue; DxilInst_OutputComplete OutputComplete(CI); - Value *completedRecord = OutputComplete.get_output(); + Value *CompletedRecord = OutputComplete.get_output(); - auto vIt = handleToCI.find(completedRecord); - if (vIt == handleToCI.end()) { + auto vIt = HandleToCI.find(CompletedRecord); + if (vIt == HandleToCI.end()) { llvm::SmallPtrSet s; s.insert(CI); - handleToCI.insert(std::make_pair(completedRecord, s)); + HandleToCI.insert(std::make_pair(CompletedRecord, s)); } else { // if the handle is already in the map, make sure the map's set of // output complete calls that dominate the handle and do not dominate // each other gets updated if necessary bool CI_is_dominated = false; - for (auto ocIt = vIt->second.begin(); ocIt != vIt->second.end();) { + for (auto OcIt = vIt->second.begin(); OcIt != vIt->second.end();) { // if our new OC CI dominates an OC instruction in the set, // then replace the instruction in the set with the new OC CI. - if (DT.dominates(CI, *ocIt)) { - auto cur_it = ocIt++; + if (DT.dominates(CI, *OcIt)) { + auto cur_it = OcIt++; vIt->second.erase(*cur_it); continue; } // Remember if our new CI gets dominated by any CI in the set. - if (DT.dominates(*ocIt, CI)) { + if (DT.dominates(*OcIt, CI)) { CI_is_dominated = true; break; } - ocIt++; + OcIt++; } // if no CI in the set dominates our new CI, // the new CI should be added to the set @@ -5721,14 +5721,14 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { } } - for (auto handle_iter = handleToCI.begin(), e = handleToCI.end(); + for (auto handle_iter = HandleToCI.begin(), e = HandleToCI.end(); handle_iter != e; handle_iter++) { for (auto user_itr = handle_iter->first->user_begin(); user_itr != handle_iter->first->user_end(); user_itr++) { User *pU = *user_itr; - Instruction *useInstr = cast(pU); - if (useInstr) { - if (CallInst *CI = dyn_cast(useInstr)) { + Instruction *UseInstr = cast(pU); + if (UseInstr) { + if (CallInst *CI = dyn_cast(UseInstr)) { // if the user is an output complete call that is in the set of // OutputComplete calls not dominated by another OutputComplete // call for the same handle value, no diagnostics need to be @@ -5739,15 +5739,15 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { // make sure any output complete call in the set // that dominates this use gets its diagnostic emitted. - for (auto ocIt = handle_iter->second.begin(); - ocIt != handle_iter->second.end(); ocIt++) { - Instruction *ocInstr = cast(*ocIt); - if (DT.dominates(ocInstr, useInstr)) { + for (auto OcIt = handle_iter->second.begin(); + OcIt != handle_iter->second.end(); OcIt++) { + Instruction *OcInstr = cast(*OcIt); + if (DT.dominates(OcInstr, UseInstr)) { ValCtx.EmitInstrError( - useInstr, + UseInstr, ValidationRule::InstrNodeRecordHandleUseAfterComplete); ValCtx.EmitInstrNote( - *ocIt, "record handle invalidated by OutputComplete"); + *OcIt, "record handle invalidated by OutputComplete"); break; } } @@ -5763,57 +5763,57 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { static void ValidateUninitializedOutput(ValidationContext &ValCtx, Function *F) { DxilModule &DM = ValCtx.DxilMod; - DxilEntryProps &entryProps = DM.GetDxilEntryProps(F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F); EntryStatus &Status = ValCtx.GetEntryStatus(F); - const DxilFunctionProps &props = entryProps.props; + const DxilFunctionProps &Props = EntryProps.props; // For HS only need to check Tessfactor which is in patch constant sig. - if (props.IsHS()) { - std::vector &patchConstOrPrimCols = Status.patchConstOrPrimCols; - const DxilSignature &patchConstSig = - entryProps.sig.PatchConstOrPrimSignature; - for (auto &E : patchConstSig.GetElements()) { - unsigned mask = patchConstOrPrimCols[E->GetID()]; - unsigned requireMask = (1 << E->GetCols()) - 1; + if (Props.IsHS()) { + std::vector &PatchConstOrPrimCols = Status.patchConstOrPrimCols; + const DxilSignature &PatchConstSig = + EntryProps.sig.PatchConstOrPrimSignature; + for (auto &E : PatchConstSig.GetElements()) { + unsigned Mask = PatchConstOrPrimCols[E->GetID()]; + unsigned RequireMask = (1 << E->GetCols()) - 1; // TODO: check other case uninitialized output is allowed. - if (mask != requireMask && !E->GetSemantic()->IsArbitrary()) { + if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary()) { ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput, {E->GetName()}); } } return; } - const DxilSignature &outSig = entryProps.sig.OutputSignature; - std::vector &outputCols = Status.outputCols; - for (auto &E : outSig.GetElements()) { - unsigned mask = outputCols[E->GetID()]; - unsigned requireMask = (1 << E->GetCols()) - 1; + const DxilSignature &OutSig = EntryProps.sig.OutputSignature; + std::vector &OutputCols = Status.outputCols; + for (auto &E : OutSig.GetElements()) { + unsigned Mask = OutputCols[E->GetID()]; + unsigned RequireMask = (1 << E->GetCols()) - 1; // TODO: check other case uninitialized output is allowed. - if (mask != requireMask && !E->GetSemantic()->IsArbitrary() && + if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary() && E->GetSemantic()->GetKind() != Semantic::Kind::Target) { ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput, {E->GetName()}); } } - if (!props.IsGS()) { - unsigned posMask = Status.OutputPositionMask[0]; - if (posMask != 0xf && Status.hasOutputPosition[0]) { + if (!Props.IsGS()) { + unsigned PosMask = Status.OutputPositionMask[0]; + if (PosMask != 0xf && Status.hasOutputPosition[0]) { ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition); } } else { - const auto &GS = props.ShaderProps.GS; - unsigned streamMask = 0; - for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) { - if (GS.streamPrimitiveTopologies[i] != + const auto &GS = Props.ShaderProps.GS; + unsigned StreamMask = 0; + for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) { + if (GS.streamPrimitiveTopologies[I] != DXIL::PrimitiveTopology::Undefined) { - streamMask |= 1 << i; + StreamMask |= 1 << I; } } - for (unsigned i = 0; i < DXIL::kNumOutputStreams; i++) { - if (streamMask & (1 << i)) { - unsigned posMask = Status.OutputPositionMask[i]; - if (posMask != 0xf && Status.hasOutputPosition[i]) { + for (unsigned I = 0; I < DXIL::kNumOutputStreams; I++) { + if (StreamMask & (1 << I)) { + unsigned PosMask = Status.OutputPositionMask[I]; + if (PosMask != 0xf && Status.hasOutputPosition[I]) { ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition); } } From 0ffd60accba540b0127e727f68b61b8075d6130a Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Fri, 4 Apr 2025 13:10:28 -0700 Subject: [PATCH 15/19] [SM6.9] Native vector load/store lowering (#7292) Enables the declaration of long vector types for raw buffers, the lowering of those and traditional vectors in loads and stores maintaining the native types with new dxil ops along with validation and testing support of the same. Allow declaring long vector rawbuffer resources. Previously disallowed along with other global types, this provides a mechanism for indicating which buffers are raw and allowing them to contain long vectors, continuing to produce an error for other resource types verified by existing tests Introduce native vector DXIL load/store intrinsics. Add new raw buffer vector load/store intrinsics using the new vector overload types. Include them in validation associated with similar load/stores Lower native vector raw buffers load/stores into new ops. When the loaded/stored type is a vector of more than 1 element, the shader model is 6.9 or higher, and the operation is on a raw buffer, enable the generation of a native vector raw buffer load or store. Incidental removal of unused parameter in load translation and some refactoring of the lowering to flow better with the new resret types. add validation and compute shader tests Vector to scalar raw buffer load lowering pass Native vector loads and stores are generated for 6.9 targets and above. This includes the 6.x target used when compiling to libraries. This adds a pass run when linking that will lower the vector operations to scalar operations for shader models that don't have native vector support. This allows libraries compiled for supportive shader models to be linked to targets without support. Validate native vector loads and stores for properly defined parameters of the correct type. Add tests for both vector load/stores and the original scalar load/stores since they share a lot of validation code. Fixes #7118 --- include/dxc/DXIL/DxilConstants.h | 28 +- include/dxc/DXIL/DxilInstructions.h | 93 +++ include/dxc/HLSL/DxilGenerationPass.h | 2 + lib/DXIL/DxilOperations.cpp | 46 +- lib/DxilValidation/DxilValidation.cpp | 87 ++- lib/HLSL/CMakeLists.txt | 1 + lib/HLSL/DxilLinker.cpp | 4 + lib/HLSL/DxilScalarizeVectorLoadStores.cpp | 231 ++++++ lib/HLSL/HLOperationLower.cpp | 72 +- tools/clang/lib/Sema/SemaHLSL.cpp | 2 +- .../intrinsics/buffer-load-stores-sm69.hlsl | 91 +++ .../hlsl/types/longvec-operators-cs.hlsl | 719 ++++++++++++++++++ .../types/longvec-operators-vec1s-cs.hlsl | 680 +++++++++++++++++ .../hlsl/types/longvec-operators-vec1s.hlsl | 62 +- .../hlsl/types/longvec-operators.hlsl | 18 - .../longvec-load-stores-scalarizevecldst.ll | 478 ++++++++++++ .../DXILValidation/load-store-validation.hlsl | 74 ++ .../DXILValidation/vector-validation.hlsl | 14 + .../load-store-validation.ll | 229 ++++++ .../LitDXILValidation/vector-validation.ll | 78 ++ .../hlsl/types/invalid-longvecs-sm68.hlsl | 2 + tools/clang/unittests/HLSL/ValidationTest.cpp | 26 +- utils/hct/hctdb.py | 96 ++- 23 files changed, 2991 insertions(+), 142 deletions(-) create mode 100644 lib/HLSL/DxilScalarizeVectorLoadStores.cpp create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll create mode 100644 tools/clang/test/DXILValidation/load-store-validation.hlsl create mode 100644 tools/clang/test/DXILValidation/vector-validation.hlsl create mode 100644 tools/clang/test/LitDXILValidation/load-store-validation.ll create mode 100644 tools/clang/test/LitDXILValidation/vector-validation.ll diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 447728300b..4f8c521851 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -898,8 +898,11 @@ enum class OpCode : unsigned { GetDimensions = 72, // gets texture size information RawBufferLoad = 139, // reads from a raw buffer and structured buffer RawBufferStore = 140, // writes to a RWByteAddressBuffer or RWStructuredBuffer - TextureLoad = 66, // reads texel data without any filtering or sampling - TextureStore = 67, // reads texel data without any filtering or sampling + RawBufferVectorLoad = 303, // reads from a raw buffer and structured buffer + RawBufferVectorStore = + 304, // writes to a RWByteAddressBuffer or RWStructuredBuffer + TextureLoad = 66, // reads texel data without any filtering or sampling + TextureStore = 67, // reads texel data without any filtering or sampling TextureStoreSample = 225, // stores texel data at specified sample index // Sampler Feedback @@ -1044,7 +1047,7 @@ enum class OpCode : unsigned { NumOpCodes_Dxil_1_7 = 226, NumOpCodes_Dxil_1_8 = 258, - NumOpCodes = 303 // exclusive last value of enumeration + NumOpCodes = 305 // exclusive last value of enumeration }; // OPCODE-ENUM:END @@ -1278,6 +1281,8 @@ enum class OpCodeClass : unsigned { GetDimensions, RawBufferLoad, RawBufferStore, + RawBufferVectorLoad, + RawBufferVectorStore, TextureLoad, TextureStore, TextureStoreSample, @@ -1356,7 +1361,7 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, - NumOpClasses = 177 // exclusive last value of enumeration + NumOpClasses = 179 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END @@ -1415,6 +1420,12 @@ const unsigned kRawBufferLoadElementOffsetOpIdx = 3; const unsigned kRawBufferLoadMaskOpIdx = 4; const unsigned kRawBufferLoadAlignmentOpIdx = 5; +// RawBufferVectorLoad. +const unsigned kRawBufferVectorLoadHandleOpIdx = 1; +const unsigned kRawBufferVectorLoadIndexOpIdx = 2; +const unsigned kRawBufferVectorLoadElementOffsetOpIdx = 3; +const unsigned kRawBufferVectorLoadAlignmentOpIdx = 4; + // RawBufferStore const unsigned kRawBufferStoreHandleOpIdx = 1; const unsigned kRawBufferStoreIndexOpIdx = 2; @@ -1424,7 +1435,14 @@ const unsigned kRawBufferStoreVal1OpIdx = 5; const unsigned kRawBufferStoreVal2OpIdx = 6; const unsigned kRawBufferStoreVal3OpIdx = 7; const unsigned kRawBufferStoreMaskOpIdx = 8; -const unsigned kRawBufferStoreAlignmentOpIdx = 8; +const unsigned kRawBufferStoreAlignmentOpIdx = 9; + +// RawBufferVectorStore +const unsigned kRawBufferVectorStoreHandleOpIdx = 1; +const unsigned kRawBufferVectorStoreIndexOpIdx = 2; +const unsigned kRawBufferVectorStoreElementOffsetOpIdx = 3; +const unsigned kRawBufferVectorStoreValOpIdx = 4; +const unsigned kRawBufferVectorStoreAlignmentOpIdx = 5; // TextureStore. const unsigned kTextureStoreHandleOpIdx = 1; diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index f8d9ae77f3..6ee22869a5 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -8923,5 +8923,98 @@ struct DxilInst_HitObject_MakeNop { // Metadata bool requiresUniformInputs() const { return false; } }; + +/// This instruction reads from a raw buffer and structured buffer +struct DxilInst_RawBufferVectorLoad { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_RawBufferVectorLoad(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::RawBufferVectorLoad); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (5 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_buf = 1, + arg_index = 2, + arg_elementOffset = 3, + arg_alignment = 4, + }; + // Accessors + llvm::Value *get_buf() const { return Instr->getOperand(1); } + void set_buf(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_index() const { return Instr->getOperand(2); } + void set_index(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_elementOffset() const { return Instr->getOperand(3); } + void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_alignment() const { return Instr->getOperand(4); } + void set_alignment(llvm::Value *val) { Instr->setOperand(4, val); } + int32_t get_alignment_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(4)) + ->getZExtValue()); + } + void set_alignment_val(int32_t val) { + Instr->setOperand(4, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction writes to a RWByteAddressBuffer or RWStructuredBuffer +struct DxilInst_RawBufferVectorStore { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_RawBufferVectorStore(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::RawBufferVectorStore); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_uav = 1, + arg_index = 2, + arg_elementOffset = 3, + arg_value0 = 4, + arg_alignment = 5, + }; + // Accessors + llvm::Value *get_uav() const { return Instr->getOperand(1); } + void set_uav(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_index() const { return Instr->getOperand(2); } + void set_index(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_elementOffset() const { return Instr->getOperand(3); } + void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_value0() const { return Instr->getOperand(4); } + void set_value0(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_alignment() const { return Instr->getOperand(5); } + void set_alignment(llvm::Value *val) { Instr->setOperand(5, val); } + int32_t get_alignment_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(5)) + ->getZExtValue()); + } + void set_alignment_val(int32_t val) { + Instr->setOperand(5, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; // INSTR-HELPER:END } // namespace hlsl diff --git a/include/dxc/HLSL/DxilGenerationPass.h b/include/dxc/HLSL/DxilGenerationPass.h index c77ddab3d0..9df93e9232 100644 --- a/include/dxc/HLSL/DxilGenerationPass.h +++ b/include/dxc/HLSL/DxilGenerationPass.h @@ -81,6 +81,7 @@ ModulePass *createResumePassesPass(); FunctionPass *createMatrixBitcastLowerPass(); ModulePass *createDxilCleanupAddrSpaceCastPass(); ModulePass *createDxilRenameResourcesPass(); +ModulePass *createDxilScalarizeVectorLoadStoresPass(); void initializeDxilLowerCreateHandleForLibPass(llvm::PassRegistry &); void initializeDxilAllocateResourcesForLibPass(llvm::PassRegistry &); @@ -115,6 +116,7 @@ void initializeResumePassesPass(llvm::PassRegistry &); void initializeMatrixBitcastLowerPassPass(llvm::PassRegistry &); void initializeDxilCleanupAddrSpaceCastPass(llvm::PassRegistry &); void initializeDxilRenameResourcesPass(llvm::PassRegistry &); +void initializeDxilScalarizeVectorLoadStoresPass(llvm::PassRegistry &); ModulePass *createDxilValidateWaveSensitivityPass(); void initializeDxilValidateWaveSensitivityPass(llvm::PassRegistry &); diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 56cdd0d04f..0b4c7218d4 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2633,6 +2633,24 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { 0, {}, {}}, // Overloads: v + + // Resources + {OC::RawBufferVectorLoad, + "RawBufferVectorLoad", + OCC::RawBufferVectorLoad, + "rawBufferVectorLoad", + Attribute::ReadOnly, + 1, + {{0x4e7}}, + {{0xe7}}}, // Overloads: hfwidlgetNumParams() <= 4) return nullptr; return FT->getParamType(4); @@ -6134,7 +6173,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::TextureGatherRaw: case OpCode::SampleCmpLevel: case OpCode::SampleCmpGrad: - case OpCode::SampleCmpBias: { + case OpCode::SampleCmpBias: + case OpCode::RawBufferVectorLoad: { StructType *ST = cast(Ty); return ST->getElementType(0); } diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 97bde6ca24..a788f21d4e 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1475,34 +1475,35 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, } } } break; - case DXIL::OpCode::RawBufferLoad: { + case DXIL::OpCode::RawBufferLoad: if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) { Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferLoad, CI->getCalledFunction()); - if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) { + if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore); - } } - DxilInst_RawBufferLoad BufLd(CI); + LLVM_FALLTHROUGH; + case DXIL::OpCode::RawBufferVectorLoad: { + Value *Handle = + CI->getOperand(DXIL::OperandIndex::kRawBufferLoadHandleOpIdx); DXIL::ComponentType CompTy; DXIL::ResourceClass ResClass; DXIL::ResourceKind ResKind = - GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx); + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); if (ResClass != DXIL::ResourceClass::SRV && - ResClass != DXIL::ResourceClass::UAV) { + ResClass != DXIL::ResourceClass::UAV) + ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); - } - Value *Offset = BufLd.get_elementOffset(); - Value *Align = BufLd.get_alignment(); - unsigned AlignSize = 0; - if (!isa(Align)) { - ValCtx.EmitInstrError(CI, - ValidationRule::InstrCoordinateCountForRawTypedBuf); - } else { - AlignSize = BufLd.get_alignment_val(); - } + unsigned AlignIdx = DXIL::OperandIndex::kRawBufferLoadAlignmentOpIdx; + if (DXIL::OpCode::RawBufferVectorLoad == Opcode) + AlignIdx = DXIL::OperandIndex::kRawBufferVectorLoadAlignmentOpIdx; + if (!isa(CI->getOperand(AlignIdx))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf); + + Value *Offset = + CI->getOperand(DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx); switch (ResKind) { case DXIL::ResourceKind::RawBuffer: if (!isa(Offset)) { @@ -1526,38 +1527,44 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) { Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferStore, CI->getCalledFunction()); - if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) { + if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore); - } } - DxilInst_RawBufferStore BufSt(CI); - DXIL::ComponentType CompTy; - DXIL::ResourceClass ResClass; - DXIL::ResourceKind ResKind = - GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx); - - if (ResClass != DXIL::ResourceClass::UAV) { - ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); - } - - ConstantInt *Mask = dyn_cast(BufSt.get_mask()); + DxilInst_RawBufferStore bufSt(CI); + ConstantInt *Mask = dyn_cast(bufSt.get_mask()); unsigned StValMask = - StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(), - BufSt.get_value2(), BufSt.get_value3()}); + StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(), + bufSt.get_value2(), bufSt.get_value3()}); if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, false /*IsTyped*/, ValCtx)) return; + } + LLVM_FALLTHROUGH; + case DXIL::OpCode::RawBufferVectorStore: { + Value *Handle = + CI->getOperand(DXIL::OperandIndex::kRawBufferStoreHandleOpIdx); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); - Value *Offset = BufSt.get_elementOffset(); - Value *Align = BufSt.get_alignment(); - unsigned AlignSize = 0; - if (!isa(Align)) { - ValCtx.EmitInstrError(CI, - ValidationRule::InstrCoordinateCountForRawTypedBuf); - } else { - AlignSize = BufSt.get_alignment_val(); + if (ResClass != DXIL::ResourceClass::UAV) + ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); + + unsigned AlignIdx = DXIL::OperandIndex::kRawBufferStoreAlignmentOpIdx; + if (DXIL::OpCode::RawBufferVectorStore == Opcode) { + AlignIdx = DXIL::OperandIndex::kRawBufferVectorStoreAlignmentOpIdx; + unsigned ValueIx = DXIL::OperandIndex::kRawBufferVectorStoreValOpIdx; + if (isa(CI->getOperand(ValueIx))) + ValCtx.EmitInstrError(CI, + ValidationRule::InstrUndefinedValueForUAVStore); } + if (!isa(CI->getOperand(AlignIdx))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf); + + Value *Offset = + CI->getOperand(DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx); switch (ResKind) { case DXIL::ResourceKind::RawBuffer: if (!isa(Offset)) { @@ -1684,6 +1691,8 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::CBufferLoadLegacy: case DXIL::OpCode::RawBufferLoad: case DXIL::OpCode::RawBufferStore: + case DXIL::OpCode::RawBufferVectorLoad: + case DXIL::OpCode::RawBufferVectorStore: ValidateResourceDxilOp(CI, Opcode, ValCtx); break; // Input output. diff --git a/lib/HLSL/CMakeLists.txt b/lib/HLSL/CMakeLists.txt index 947fc4c14f..21bb9523a7 100644 --- a/lib/HLSL/CMakeLists.txt +++ b/lib/HLSL/CMakeLists.txt @@ -25,6 +25,7 @@ add_llvm_library(LLVMHLSL DxilNoops.cpp DxilPreserveAllOutputs.cpp DxilRenameResourcesPass.cpp + DxilScalarizeVectorLoadStores.cpp DxilSimpleGVNHoist.cpp DxilSignatureValidation.cpp DxilTargetLowering.cpp diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp index ca343662ab..75d1bf78e9 100644 --- a/lib/HLSL/DxilLinker.cpp +++ b/lib/HLSL/DxilLinker.cpp @@ -1247,6 +1247,10 @@ void DxilLinkJob::RunPreparePass(Module &M) { PM.add(createDxilReinsertNopsPass()); PM.add(createAlwaysInlinerPass(/*InsertLifeTime*/ false)); + // If we need SROA and dynamicindexvector to array, + // do it early to allow following scalarization to go forward. + PM.add(createDxilScalarizeVectorLoadStoresPass()); + // Remove unused functions. PM.add(createDxilDeadFunctionEliminationPass()); diff --git a/lib/HLSL/DxilScalarizeVectorLoadStores.cpp b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp new file mode 100644 index 0000000000..febcf32358 --- /dev/null +++ b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp @@ -0,0 +1,231 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilScalarizeVectorLoadStores.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// Lowers native vector load stores to potentially multiple scalar calls. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "dxc/DXIL/DxilInstructions.h" +#include "dxc/DXIL/DxilModule.h" +#include "dxc/HLSL/DxilGenerationPass.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +using namespace llvm; +using namespace hlsl; + +static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI); +static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI); + +class DxilScalarizeVectorLoadStores : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + explicit DxilScalarizeVectorLoadStores() : ModulePass(ID) {} + + StringRef getPassName() const override { + return "DXIL scalarize vector load/stores"; + } + + bool runOnModule(Module &M) override { + DxilModule &DM = M.GetOrCreateDxilModule(); + // Shader Model 6.9 allows native vectors and doesn't need this pass. + if (DM.GetShaderModel()->IsSM69Plus()) + return false; + + bool Changed = false; + + hlsl::OP *HlslOP = DM.GetOP(); + for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorLoad)) { + Function *Func = FIt.second; + if (!Func) + continue; + for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) { + CallInst *CI = cast(*(U++)); + scalarizeVectorLoad(HlslOP, M.getDataLayout(), CI); + Changed = true; + } + } + for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorStore)) { + Function *Func = FIt.second; + if (!Func) + continue; + for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) { + CallInst *CI = cast(*(U++)); + scalarizeVectorStore(HlslOP, M.getDataLayout(), CI); + Changed = true; + } + } + return Changed; + } +}; + +static unsigned GetRawBufferMask(unsigned NumComponents) { + switch (NumComponents) { + case 0: + return 0; + case 1: + return DXIL::kCompMask_X; + case 2: + return DXIL::kCompMask_X | DXIL::kCompMask_Y; + case 3: + return DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z; + case 4: + default: + return DXIL::kCompMask_All; + } + return DXIL::kCompMask_All; +} + +static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI) { + IRBuilder<> Builder(CI); + // Collect the information required to break this into scalar ops from args. + DxilInst_RawBufferVectorLoad VecLd(CI); + OP::OpCode OpCode = OP::OpCode::RawBufferLoad; + llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode); + SmallVector Args; + Args.emplace_back(OpArg); // opcode @0. + Args.emplace_back(VecLd.get_buf()); // Resource handle @1. + Args.emplace_back(VecLd.get_index()); // Index @2. + Args.emplace_back(VecLd.get_elementOffset()); // Offset @3. + Args.emplace_back(nullptr); // Mask to be set later @4. + Args.emplace_back(VecLd.get_alignment()); // Alignment @5. + + // Set offset to increment depending on whether the real offset is defined. + unsigned OffsetIdx; + if (isa(VecLd.get_elementOffset())) + // Byte Address Buffers can't use offset, so use index. + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; + else + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; + + StructType *ResRetTy = cast(CI->getType()); + Type *Ty = ResRetTy->getElementType(0); + unsigned NumComponents = Ty->getVectorNumElements(); + Type *EltTy = Ty->getScalarType(); + unsigned EltSize = DL.getTypeAllocSize(EltTy); + + const unsigned MaxElemCount = 4; + SmallVector Elts(NumComponents); + Value *Ld = nullptr; + for (unsigned EIx = 0; EIx < NumComponents;) { + // Load 4 elements or however many less than 4 are left to load. + unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount); + Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] = + HlslOP->GetI8Const(GetRawBufferMask(ChunkSize)); + // If we've loaded a chunk already, update offset to next chunk. + if (EIx > 0) + Args[OffsetIdx] = + Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize)); + Function *F = HlslOP->GetOpFunc(OpCode, EltTy); + Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(OpCode)); + for (unsigned ChIx = 0; ChIx < ChunkSize; ChIx++, EIx++) + Elts[EIx] = Builder.CreateExtractValue(Ld, ChIx); + } + + Value *RetValNew = UndefValue::get(VectorType::get(EltTy, NumComponents)); + for (unsigned ElIx = 0; ElIx < NumComponents; ElIx++) + RetValNew = Builder.CreateInsertElement(RetValNew, Elts[ElIx], ElIx); + + // Replace users of the vector extracted from the vector load resret. + Value *Status = nullptr; + for (auto CU = CI->user_begin(), CE = CI->user_end(); CU != CE;) { + auto EV = cast(*(CU++)); + unsigned Ix = EV->getIndices()[0]; + if (Ix == 0) { + // Handle value uses. + EV->replaceAllUsesWith(RetValNew); + } else if (Ix == 1) { + // Handle status uses. + if (!Status) + Status = Builder.CreateExtractValue(Ld, DXIL::kResRetStatusIndex); + EV->replaceAllUsesWith(Status); + } + EV->eraseFromParent(); + } + CI->eraseFromParent(); +} + +static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI) { + IRBuilder<> Builder(CI); + // Collect the information required to break this into scalar ops from args. + DxilInst_RawBufferVectorStore VecSt(CI); + OP::OpCode OpCode = OP::OpCode::RawBufferStore; + llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode); + SmallVector Args; + Args.emplace_back(OpArg); // opcode @0. + Args.emplace_back(VecSt.get_uav()); // Resource handle @1. + Args.emplace_back(VecSt.get_index()); // Index @2. + Args.emplace_back(VecSt.get_elementOffset()); // Offset @3. + Args.emplace_back(nullptr); // Val0 to be set later @4. + Args.emplace_back(nullptr); // Val1 to be set later @5. + Args.emplace_back(nullptr); // Val2 to be set later @6. + Args.emplace_back(nullptr); // Val3 to be set later @7. + Args.emplace_back(nullptr); // Mask to be set later @8. + Args.emplace_back(VecSt.get_alignment()); // Alignment @9. + + // Set offset to increment depending on whether the real offset is defined. + unsigned OffsetIdx; + if (isa(VecSt.get_elementOffset())) + // Byte Address Buffers can't use offset, so use index. + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; + else + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; + + Value *VecVal = VecSt.get_value0(); + + const unsigned MaxElemCount = 4; + Type *Ty = VecVal->getType(); + const unsigned NumComponents = Ty->getVectorNumElements(); + Type *EltTy = Ty->getScalarType(); + Value *UndefVal = UndefValue::get(EltTy); + unsigned EltSize = DL.getTypeAllocSize(EltTy); + Function *F = HlslOP->GetOpFunc(OpCode, EltTy); + for (unsigned EIx = 0; EIx < NumComponents;) { + // Store 4 elements or however many less than 4 are left to store. + unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount); + // For second and subsequent store calls, increment the resource-appropriate + // index or offset parameter. + if (EIx > 0) + Args[OffsetIdx] = + Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize)); + // Populate all value arguments either with the vector or undefs. + uint8_t Mask = 0; + unsigned ChIx = 0; + for (; ChIx < ChunkSize; ChIx++, EIx++) { + Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = + Builder.CreateExtractElement(VecVal, EIx); + Mask |= (1 << ChIx); + } + for (; ChIx < MaxElemCount; ChIx++) + Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = UndefVal; + + Args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] = + HlslOP->GetU8Const(Mask); + Builder.CreateCall(F, Args); + } + CI->eraseFromParent(); +} + +char DxilScalarizeVectorLoadStores::ID = 0; + +ModulePass *llvm::createDxilScalarizeVectorLoadStoresPass() { + return new DxilScalarizeVectorLoadStores(); +} + +INITIALIZE_PASS(DxilScalarizeVectorLoadStores, + "hlsl-dxil-scalarize-vector-load-stores", + "DXIL scalarize vector load/stores", false, false) diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 445dbcc879..4d8201df8d 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -3956,6 +3956,11 @@ struct ResLoadHelper { : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), addr(idx), offset(Offset), status(nullptr), mipLevel(mip) { opcode = LoadOpFromResKind(RK); + Type *Ty = Inst->getType(); + if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() && + Ty->getVectorNumElements() > 1 && + Inst->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()) + opcode = OP::OpCode::RawBufferVectorLoad; } OP::OpCode opcode; IntrinsicOp intrinsicOpCode; @@ -4025,6 +4030,14 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, if (RC == DxilResourceBase::Class::SRV) OffsetIdx = IsMS ? HLOperandIndex::kTex2DMSLoadOffsetOpIdx : HLOperandIndex::kTexLoadOffsetOpIdx; + } else if (opcode == OP::OpCode::RawBufferLoad) { + // If native vectors are available and this load had a vector + // with more than one elements, convert the RawBufferLod to the + // native vector variant RawBufferVectorLoad. + Type *Ty = CI->getType(); + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 1 && + CI->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()) + opcode = OP::OpCode::RawBufferVectorLoad; } // Set offset. @@ -4082,7 +4095,7 @@ Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset, // Sets up arguments for buffer load call. static SmallVector GetBufLoadArgs(ResLoadHelper helper, HLResource::Kind RK, - IRBuilder<> Builder, Type *EltTy, + IRBuilder<> Builder, unsigned LdSize) { OP::OpCode opcode = helper.opcode; llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode); @@ -4130,6 +4143,7 @@ static SmallVector GetBufLoadArgs(ResLoadHelper helper, // If not TextureLoad, it could be a typed or raw buffer load. // They have mostly similar arguments. DXASSERT(opcode == OP::OpCode::RawBufferLoad || + opcode == OP::OpCode::RawBufferVectorLoad || opcode == OP::OpCode::BufferLoad, "Wrong opcode in get load args"); Args.emplace_back( @@ -4140,6 +4154,9 @@ static SmallVector GetBufLoadArgs(ResLoadHelper helper, // Unlike typed buffer load, raw buffer load has mask and alignment. Args.emplace_back(nullptr); // Mask will be added later %4. Args.emplace_back(alignmentVal); // alignment @5. + } else if (opcode == OP::OpCode::RawBufferVectorLoad) { + // RawBufferVectorLoad takes just alignment, no mask. + Args.emplace_back(alignmentVal); // alignment @4 } } return Args; @@ -4165,18 +4182,21 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, if (isBool || (is64 && isTyped)) EltTy = Builder.getInt32Ty(); - // 64-bit types are stored as int32 pairs in typed buffers. + // Calculate load size with the scalar memory element type. + unsigned LdSize = DL.getTypeAllocSize(EltTy); + + // Adjust number of components as needed. if (is64 && isTyped) { + // 64-bit types are stored as int32 pairs in typed buffers. DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords."); NumComponents *= 2; + } else if (opcode == OP::OpCode::RawBufferVectorLoad) { + // Native vector loads only have a single vector element in ResRet. + EltTy = VectorType::get(EltTy, NumComponents); + NumComponents = 1; } - unsigned LdSize = DL.getTypeAllocSize(EltTy); - - SmallVector Elts(NumComponents); - - SmallVector Args = - GetBufLoadArgs(helper, RK, Builder, EltTy, LdSize); + SmallVector Args = GetBufLoadArgs(helper, RK, Builder, LdSize); // Keep track of the first load for debug info migration. Value *FirstLd = nullptr; @@ -4188,9 +4208,10 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, else if (RK == DxilResource::Kind::StructuredBuffer) OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; - // Create calls to function object. + // Create call(s) to function object and collect results in Elts. // Typed buffer loads are limited to one load of up to 4 32-bit values. // Raw buffer loads might need multiple loads in chunks of 4. + SmallVector Elts(NumComponents); for (unsigned i = 0; i < NumComponents;) { // Load 4 elements or however many less than 4 are left to load. unsigned chunkSize = std::min(NumComponents - i, 4U); @@ -4200,7 +4221,7 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] = GetRawBufferMaskForETy(EltTy, chunkSize, OP); // If we've loaded a chunk already, update offset to next chunk. - if (FirstLd != nullptr && opcode == OP::OpCode::RawBufferLoad) + if (FirstLd != nullptr) Args[OffsetIdx] = Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize)); } @@ -4209,8 +4230,13 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); // Extract elements from returned ResRet. - for (unsigned j = 0; j < chunkSize; j++, i++) - Elts[i] = Builder.CreateExtractValue(Ld, j); + // Native vector loads just have one vector element in the ResRet. + // Others have up to four scalars that need to be individually extracted. + if (opcode == OP::OpCode::RawBufferVectorLoad) + Elts[i++] = Builder.CreateExtractValue(Ld, 0); + else + for (unsigned j = 0; j < chunkSize; j++, i++) + Elts[i] = Builder.CreateExtractValue(Ld, j); // Update status. UpdateStatus(Ld, helper.status, Builder, OP); @@ -4248,9 +4274,10 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, } } - // Package elements into a vector. + // Package elements into a vector as needed. Value *retValNew = nullptr; - if (!Ty->isVectorTy()) { + // Scalar or native vector loads need not construct vectors from elements. + if (!Ty->isVectorTy() || opcode == OP::OpCode::RawBufferVectorLoad) { retValNew = Elts[0]; } else { retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents)); @@ -4348,6 +4375,10 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, case DxilResource::Kind::StructuredBuffer: IsTyped = false; opcode = OP::OpCode::RawBufferStore; + // Where shader model and type allows, use vector store intrinsic. + if (OP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() && + Ty->isVectorTy() && Ty->getVectorNumElements() > 1) + opcode = OP::OpCode::RawBufferVectorStore; break; case DxilResource::Kind::TypedBuffer: opcode = OP::OpCode::BufferStore; @@ -4390,7 +4421,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, EltTy = i32Ty; } - Function *F = OP->GetOpFunc(opcode, EltTy); llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode); llvm::Value *undefI = @@ -4404,6 +4434,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, unsigned OffsetIdx = 0; if (opcode == OP::OpCode::RawBufferStore || + opcode == OP::OpCode::RawBufferVectorStore || opcode == OP::OpCode::BufferStore) { // Append Coord0 (Index) value. if (Idx->getType()->isVectorTy()) { @@ -4423,7 +4454,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, OffsetIdx = storeArgs.size() - 1; // Coord1 (Offset). - // Only relevant when storing more than 4 elements to structured buffers. storeArgs.emplace_back(offset); } else { // texture store @@ -4444,6 +4474,16 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, // TODO: support mip for texture ST } + // RawBufferVectorStore only takes a single value and alignment arguments. + if (opcode == DXIL::OpCode::RawBufferVectorStore) { + storeArgs.emplace_back(val); + storeArgs.emplace_back(Alignment); + Function *F = OP->GetOpFunc(DXIL::OpCode::RawBufferVectorStore, Ty); + Builder.CreateCall(F, storeArgs); + return; + } + Function *F = OP->GetOpFunc(opcode, EltTy); + constexpr unsigned MaxStoreElemCount = 4; const unsigned CompCount = Ty->isVectorTy() ? Ty->getVectorNumElements() : 1; const unsigned StoreInstCount = diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index f9e011f8d4..027d7d3cbc 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -15193,7 +15193,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } // Disallow long vecs from $Global cbuffers. - if (isGlobal && !isStatic && !isGroupShared) { + if (isGlobal && !isStatic && !isGroupShared && !IS_BASIC_OBJECT(basicKind)) { // Suppress actual emitting of errors for incompletable types here // They are redundant to those produced in ActOnUninitializedDecl. struct SilentDiagnoser : public TypeDiagnoser { diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl new file mode 100644 index 0000000000..5305ee495b --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl @@ -0,0 +1,91 @@ +// RUN: %dxc -DTYPE=float -DNUM=4 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -DNUM=4 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -DNUM=2 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=double -DNUM=2 -T vs_6_9 %s | FileCheck %s + +// RUN: %dxc -DTYPE=float -DNUM=6 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -DNUM=13 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -DNUM=24 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=double -DNUM=32 -T vs_6_9 %s | FileCheck %s + +/////////////////////////////////////////////////////////////////////// +// Test codegen for various load and store operations and conversions +// for different scalar/vector buffer types and indices. +/////////////////////////////////////////////////////////////////////// + +// CHECK: %dx.types.ResRet.[[VTY:v[0-9]*[a-z][0-9][0-9]]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]>, i32 } + +ByteAddressBuffer RoByBuf : register(t1); +RWByteAddressBuffer RwByBuf : register(u1); + +StructuredBuffer > RoStBuf : register(t2); +RWStructuredBuffer > RwStBuf : register(u2); + +ConsumeStructuredBuffer > CnStBuf : register(u4); +AppendStructuredBuffer > ApStBuf : register(u5); + +// CHECK-LABEL: define void @main +[shader("vertex")] +void main(uint ix[2] : IX) { + // ByteAddressBuffer Tests + + // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) + // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) + + // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) + // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) + + // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) + // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) + + // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector babElt1 = RwByBuf.Load< vector >(ix[0]); + + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector babElt2 = RoByBuf.Load< vector >(ix[0]); + + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + RwByBuf.Store< vector >(ix[0], babElt1 + babElt2); + + // StructuredBuffer Tests + // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt1 = RwStBuf.Load(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt2 = RwStBuf[ix[1]]; + + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt4 = RoStBuf[ix[1]]; + + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + + // {Append/Consume}StructuredBuffer Tests + // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] + // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1) + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector cnElt = CnStBuf.Consume(); + + // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]] + // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1) + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]] + ApStBuf.Append(cnElt); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl new file mode 100644 index 0000000000..0a115bd709 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl @@ -0,0 +1,719 @@ +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DNUM=9 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t -DNUM=33 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Linking tests. +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 -Fo %t.1 %s +// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DNUM=3 -DDBL -Fo %t.2 %s +// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DNUM=12 -DINT -enable-16bit-types -Fo %t.3 %s +// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG + +// Test relevant operators on an assortment vector sizes and types with 6.9 native vectors. +// Tests in a CS environment where vector operations were previously disallowed to confirm that they are retained. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Uses non vector buffer to avoid interacting with that implementation. +// CHECK-DAG: %dx.types.ResRet.[[TY:v[0-9]*[a-z][0-9]*]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]> +// CHECK-DAG: %dx.types.ResRet.[[STY:[a-z][0-9]*]] = type { [[STYPE:[a-z0-9_]*]] +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> + +void assignments(inout vector things[11], TYPE scales[10]); +vector arithmetic(inout vector things[11])[11]; +vector scarithmetic(vector things[11], TYPE scales[10])[11]; +vector logic(vector truth[10], vector consequences[11])[10]; +vector index(vector things[11], int i)[11]; +void bittwiddlers(inout vector things[13]); + +struct Viface { + vector values[11]; +}; + +struct Siface { + TYPE values[10]; +}; + +struct Liface { + vector values[10]; +}; + +struct Binface { + vector values[13]; +}; + +RWStructuredBuffer Input : register(u11); +RWStructuredBuffer Output : register(u12); +RWStructuredBuffer Scales : register(u13); +RWStructuredBuffer Truths : register(u14); +RWStructuredBuffer Bits : register(u15); +RWStructuredBuffer > Offsets : register(u16); + +[shader("compute")] +[numthreads(8,1,1)] +// CHECK-LABEL: define void @main +void main(uint3 GID : SV_GroupThreadID) { + + // CHECK-DAG: [[Input:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11 + // CHECK-DAG: [[Output:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12 + // CHECK-DAG: [[Scales:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13 + // CHECK-DAG: [[Truths:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14 + // INT-DAG: [[Bits:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15 + + // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0) + // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1) + // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2) + // CHECK: [[scratch1:%.*]] = alloca [11 x <[[NUM]] x [[TYPE]]>] + // CHECK: [[scratch2:%.*]] = alloca [11 x <[[NUM]] x [[TYPE]]>] + + uint InIx1 = GID[0]; + uint InIx2 = GID[1]; + uint OutIx = GID[2]; + + // Assign vector offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> + Offsets[0] = vector(sizeof(vector)*0, + sizeof(vector)*1, + sizeof(vector)*2, + sizeof(vector)*3, + sizeof(vector)*4, + sizeof(vector)*5, + sizeof(vector)*6, + sizeof(vector)*7, + sizeof(vector)*8, + sizeof(vector)*9, + sizeof(vector)*10, + sizeof(vector)*11, + sizeof(vector)*12); + + // Assign scalar offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> + Offsets[1] = vector(sizeof(TYPE)*0, + sizeof(TYPE)*1, + sizeof(TYPE)*2, + sizeof(TYPE)*3, + sizeof(TYPE)*4, + sizeof(TYPE)*5, + sizeof(TYPE)*6, + sizeof(TYPE)*7, + sizeof(TYPE)*8, + sizeof(TYPE)*9, + sizeof(TYPE)*10, + sizeof(TYPE),// Effectively alignof. + sizeof(int));// Effectively integer alignof. + + // Assign boolean offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 2, i32 0, <13 x i32> + Offsets[2] = vector(sizeof(vector)*0, + sizeof(vector)*1, + sizeof(vector)*2, + sizeof(vector)*3, + sizeof(vector)*4, + sizeof(vector)*5, + sizeof(vector)*6, + sizeof(vector)*7, + sizeof(vector)*8, + sizeof(vector)*9, + sizeof(vector)*10, + sizeof(vector)*11, + sizeof(vector)*12); + + assignments(Input[InIx1+1].values, Scales[InIx2+1].values); + Output[OutIx+2].values = arithmetic(Input[InIx1+2].values); + Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values); + Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values); + Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5); +#ifdef INT + bittwiddlers(Bits[InIx1+6].values); +#endif +} + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +void assignments(inout vector things[11], TYPE scales[10]) { + + // CHECK: [[VcIx:%.*]] = add i32 [[InIx1]], 1 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]]) + // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]]) + // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]]) + // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1 + // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0 + // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + things[0] = scales[0]; + + // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec1]] + things[1] += things[5]; + + // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]] + things[2] -= things[6]; + + // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec7]], [[vec3]] + things[3] *= things[7]; + + // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]] + things[4] /= things[8]; + +#ifdef DBL + // DBL can't use remainder operator, do something anyway to keep the rest consistent. + // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float> + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> + // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]] + // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double> + vector f9 = (vector)things[9]; + vector f5 = (vector)things[5]; + f5 %= f9; + things[5] = f5; +#else + // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]] + things[5] %= things[9]; +#endif + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]] + things[6] += scales[1]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]] + things[7] -= scales[2]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]] + things[8] *= scales[3]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]] + things[9] /= scales[4]; + + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + +} + +// Test arithmetic operators. +vector arithmetic(inout vector things[11])[11] { + vector res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 2 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]]) + // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]]) + // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]]) + // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]]) + // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // NOINT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> <[[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000),.*}}>, [[vec0]] + // INT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> zeroinitializer, [[vec0]] + res[0] = -things[0]; + res[1] = +things[0]; + + // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]] + res[2] = things[1] + things[2]; + + // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + res[3] = things[2] - things[3]; + + // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + res[4] = things[3] * things[4]; + + // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + res[5] = things[4] / things[5]; + + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> +#ifdef DBL + // DBL can't use remainder operator, do something anyway to keep the rest consistent. + // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float> + // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]] + // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double> + res[6] = (vector)things[5] % (vector)things[6]; +#else + // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]] + res[7] = things[7]++; + + // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]], <[[TYPE]] [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]] + res[8] = things[8]--; + + // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]], <[[TYPE]] [[POS1]] + res[9] = ++things[9]; + + // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]], <[[TYPE]] [[NEG1]] + res[10] = --things[10]; + + // Things[] input gets all the result values since pre/post inc/decrements don't change the end result. + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]]) + + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // res1 is just vec0 since it was just the unary + operator. + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[vec0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + // res[] input gets either the original or the preincremented value. + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[vec7]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[vec8]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]]) + + return res; +} + +// Test arithmetic operators with scalars. +vector scarithmetic(vector things[11], TYPE scales[10])[11] { + vector res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 3 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3 + // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0 + // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]] + res[0] = things[0] + scales[0]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]] + res[1] = things[1] - scales[1]; + + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]] + res[2] = things[2] * scales[2]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]] + res[3] = things[3] / scales[3]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]] + res[4] = scales[4] + things[4]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0 + // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]] + res[5] = scales[5] - things[5]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0 + // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]] + res[6] = scales[6] * things[6]; + res[7] = res[8] = res[9] = res[10] = 0; + + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +vector logic(vector truth[10], vector consequences[11])[10] { + vector res[10]; + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4 + // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]] + // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i32 [[IALN]]) + // CHECK: [[ivec0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i32 [[IALN]]) + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i32 [[IALN]]) + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i32 [[IALN]]) + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i32 [[IALN]]) + // CHECK: [[ivec4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i32 [[IALN]]) + // CHECK: [[ivec5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 4 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec0]], zeroinitializer + // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32> + res[0] = !truth[0]; + + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec1]], zeroinitializer + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = truth[1] || truth[2]; + + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec3]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = truth[2] && truth[3]; + + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec4]], zeroinitializer + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec5]], zeroinitializer + // CHECK: [[bres3:%[0-9]*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]] + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32> + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32> + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32> + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]] + // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32> + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32> + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32> + res[9] = consequences[5] >= consequences[6]; + + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], <[[NUM]] x i32> [[res0]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], <[[NUM]] x i32> [[res1]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], <[[NUM]] x i32> [[res2]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], <[[NUM]] x i32> [[res3]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], <[[NUM]] x i32> [[res4]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], <[[NUM]] x i32> [[res5]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], <[[NUM]] x i32> [[res6]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], <[[NUM]] x i32> [[res7]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], <[[NUM]] x i32> [[res8]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], <[[NUM]] x i32> [[res9]], i32 4) + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +vector index(vector things[11], int i)[11] { + vector res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 5 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec1]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 2 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 3 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec3]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec4]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 5 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec5]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec6]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]]) + // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]]) + // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]]) + // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec9]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]]) + // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec10]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + + // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5 + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[adr0]], align [[ALN]] + res[0] = 0; + + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 [[Ix]] + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] [[POS1]],{{[^>]*}}>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[i] = 1; + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]],{{[^>]*}}>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[Ix] = 2; + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[3] = things[0]; + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 [[Ix]] + // CHECK: [[ldix:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[ldix]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[4] = things[i]; + + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[5] = things[Ix]; + + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> <[[TYPE]] [[TWO]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[vec0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[ldix]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[vec2]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + + return res; +} + +#ifdef INT +// Test bit twiddling operators. +void bittwiddlers(inout vector things[13]) { + // INT: [[VcIx:%.*]] = add i32 [[InIx1]], 6 + // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]] + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]]) + // INT: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]]) + // INT: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]]) + // INT: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]]) + // INT: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]]) + // INT: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]]) + // INT: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]]) + // INT: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]]) + // INT: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]]) + // INT: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], i32 [[ALN]]) + // INT: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], i32 [[ALN]]) + // INT: [[vec11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], i32 [[ALN]]) + // INT: [[vec12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // INT: [[res0:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1 + things[0] = ~things[1]; + + // INT: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]] + things[1] = things[2] | things[3]; + + // INT: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + things[2] = things[3] & things[4]; + + // INT: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + things[3] = things[4] ^ things[5]; + + // INT: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec6]] + // INT: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec5]], [[shv6]] + things[4] = things[5] << things[6]; + + // INT: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]] + // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]] + // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]] + things[5] = things[6] >> things[7]; + + // INT: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec8]], [[vec6]] + things[6] |= things[8]; + + // INT: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec9]], [[vec7]] + things[7] &= things[9]; + + // INT: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec8]], [[vec10]] + things[8] ^= things[10]; + + // INT: [[shv11:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec11]] + // INT: [[res9:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec9]], [[shv11]] + things[9] <<= things[11]; + + // INT: [[shv12:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec12]] + // UNSIG: [[res10:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]] + // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]] + things[10] >>= things[12]; + + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], <[[NUM]] x [[TYPE]]> [[vec11]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], <[[NUM]] x [[TYPE]]> [[vec12]], i32 [[ALN]]) + + // CHECK-LABEL: ret void +} +#endif // INT diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl new file mode 100644 index 0000000000..ca239a5b22 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl @@ -0,0 +1,680 @@ +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double -DDBL %s | FileCheck %s +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Scalar variants to confirm they match. +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=double -DDBL %s | FileCheck %s +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int16_t -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Linking tests. +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -Fo %t.1 %s +// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL -Fo %t.2 %s +// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DINT -enable-16bit-types -Fo %t.3 %s +// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG + +// Test relevant operators on vec1s in a 6.9 compute shader to ensure they continue to be treated as scalars. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9]*]] = type { [[TYPE:[a-z0-9_]*]] +// CHECK-DAG: %dx.types.ResRet.[[ITY:i32]] = type { i32 + +#ifdef SCL +#define VTYPE TYPE +#else +#define VTYPE vector +#endif + +void assignments(inout VTYPE things[11], TYPE scales[10]); +VTYPE arithmetic(inout VTYPE things[11])[11]; +VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11]; +bool1 logic(bool1 truth[10], VTYPE consequences[11])[10]; +VTYPE index(VTYPE things[11], int i)[11]; +void bittwiddlers(inout VTYPE things[13]); + +struct Viface { + VTYPE values[11]; +}; + +struct Siface { + TYPE values[10]; +}; + +struct Liface { + bool1 values[10]; +}; + +struct Binface { + VTYPE values[13]; +}; + +RWStructuredBuffer Input : register(u11); +RWStructuredBuffer Output : register(u12); +RWStructuredBuffer Scales : register(u13); +RWStructuredBuffer Truths : register(u14); +RWStructuredBuffer Bits : register(u15); +RWStructuredBuffer > Offsets : register(u16); + +[shader("compute")] +[numthreads(8,1,1)] +// CHECK-LABEL: define void @main +void main(uint3 GID : SV_GroupThreadID) { + + // CHECK-DAG: [[Input:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11 + // CHECK-DAG: [[Output:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12 + // CHECK-DAG: [[Scales:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13 + // CHECK-DAG: [[Truths:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14 + // INT-DAG: [[Bits:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15 + + // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0) + // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1) + // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2) + + uint InIx1 = GID[0]; + uint InIx2 = GID[1]; + uint OutIx = GID[2]; + + // Assign vector offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> + Offsets[0] = vector(sizeof(TYPE)*0, + sizeof(TYPE)*1, + sizeof(TYPE)*2, + sizeof(TYPE)*3, + sizeof(TYPE)*4, + sizeof(TYPE)*5, + sizeof(TYPE)*6, + sizeof(TYPE)*7, + sizeof(TYPE)*8, + sizeof(TYPE)*9, + sizeof(TYPE)*10, + sizeof(TYPE)*11, + sizeof(TYPE)*12); + + // Assign boolean offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> + Offsets[1] = vector(sizeof(int)*0, + sizeof(int)*1, + sizeof(int)*2, + sizeof(int)*3, + sizeof(int)*4, + sizeof(int)*5, + sizeof(int)*6, + sizeof(int)*7, + sizeof(int)*8, + sizeof(int)*9, + sizeof(int)*10, + sizeof(TYPE),// Effectively alignof. + sizeof(int));// Effectively integer alignof. + + assignments(Input[InIx1+1].values, Scales[InIx2+1].values); + Output[OutIx+2].values = arithmetic(Input[InIx1+2].values); + Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values); + Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values); + Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5); +#ifdef INT + bittwiddlers(Bits[InIx1+6].values); +#endif +} +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +void assignments(inout VTYPE things[11], TYPE scales[10]) { + + // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 1 + + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1 + // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // Nothing to check. Just a copy over. + things[0] = scales[0]; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]]{{( nsw)?}} [[TYPE]] [[val5]], [[val1]] + things[1] += things[5]; + + // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]]{{( nsw)?}} [[TYPE]] [[val2]], [[val6]] + things[2] -= things[6]; + + // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]]{{( nsw)?}} [[TYPE]] [[val7]], [[val3]] + things[3] *= things[7]; + + // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]]{{( nsw)?}} [[TYPE]] [[val4]], [[val8]] + things[4] /= things[8]; + +#ifdef DBL + things[5] = 0; // Gotta give it something in any case for validation. +#else + // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[val5]], [[val9]] + things[5] %= things[9]; +#endif + + // CHECK: [[res6:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl1]], [[val6]] + things[6] += scales[1]; + + // CHECK: [[res7:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val7]], [[scl2]] + things[7] -= scales[2]; + + // CHECK: [[res8:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl3]], [[val8]] + things[8] *= scales[3]; + + // CHECK: [[res9:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val9]], [[scl4]] + things[9] /= scales[4]; + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[scl0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[val10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + +} + +// Test arithmetic operators. +VTYPE arithmetic(inout VTYPE things[11])[11] { + TYPE res[11]; + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 2 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + res[0] = +things[0]; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[res1:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]] + res[1] = -things[0]; + + // CHECK: [[res2:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val2]], [[val1]] + res[2] = things[1] + things[2]; + + // CHECK: [[res3:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val2]], [[val3]] + res[3] = things[2] - things[3]; + + // CHECK: [[res4:%.*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[val4]], [[val3]] + res[4] = things[3] * things[4]; + + // CHECK: [[res5:%.*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val4]], [[val5]] + res[5] = things[4] / things[5]; + +#ifdef DBL + res[6] = 0; // Gotta give it something in any case for validation. +#else + // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[res7:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]] + res[7] = things[7]++; + + // CHECK: [[res8:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]] + res[8] = things[8]--; + + // CHECK: [[res9:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val9]], [[POS1]] + res[9] = ++things[9]; + + // CHECK: [[res10:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val10]], [[NEG1]] + res[10] = --things[10]; + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[val1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[val3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[val4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[val5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[val6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // Postincrement/decrements get the original value. + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[val7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[val8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + return res; +} + +// Test arithmetic operators with scalars. +VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11] { + VTYPE res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 3 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3 + // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[res0:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl0]], [[val0]] + res[0] = things[0] + scales[0]; + + // CHECK: [[res1:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val1]], [[scl1]] + res[1] = things[1] - scales[1]; + + // CHECK: [[res2:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl2]], [[val2]] + res[2] = things[2] * scales[2]; + + // CHECK: [[res3:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val3]], [[scl3]] + res[3] = things[3] / scales[3]; + + // CHECK: [[res4:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl4]], [[val4]] + res[4] = scales[4] + things[4]; + + // CHECK: [[res5:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[scl5]], [[val5]] + res[5] = scales[5] - things[5]; + + // CHECK: [[res6:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl6]], [[val6]] + res[6] = scales[6] * things[6]; + res[7] = res[8] = res[9] = res[10] = 0; + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + return res; +} + + +// Test logic operators. +// Only permissable in pre-HLSL2021 +bool1 logic(bool1 truth[10], VTYPE consequences[11])[10] { + bool1 res[10]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4 + // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]] + // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i8 1, i32 [[IALN]]) + // CHECK: [[ival0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i8 1, i32 [[IALN]]) + // CHECK: [[ival1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i8 1, i32 [[IALN]]) + // CHECK: [[ival2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i8 1, i32 [[IALN]]) + // CHECK: [[ival3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i8 1, i32 [[IALN]]) + // CHECK: [[ival4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i8 1, i32 [[IALN]]) + // CHECK: [[ival5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + + // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 4 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[bres0:%.*]] = icmp eq i32 [[ival0]], 0 + // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32 + res[0] = !truth[0]; + + // CHECK: [[res1:%.*]] = or i32 [[ival2]], [[ival1]] + // CHECK: [[bres1:%.*]] = icmp ne i32 [[res1]], 0 + // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32 + res[1] = truth[1] || truth[2]; + + // CHECK: [[bval2:%.*]] = icmp ne i32 [[ival2]], 0 + // CHECK: [[bval3:%.*]] = icmp ne i32 [[ival3]], 0 + // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]] + // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + res[2] = truth[2] && truth[3]; + + // CHECK: [[bval4:%.*]] = icmp ne i32 [[ival4]], 0 + // CHECK: [[bval5:%.*]] = icmp ne i32 [[ival5]], 0 + // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]] + // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]] + // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32 + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]] + // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32 + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]] + // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32 + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]] + // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32 + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]] + // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32 + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]] + // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32 + res[9] = consequences[5] >= consequences[6]; + + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], i32 [[res0]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], i32 [[res1]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], i32 [[res2]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], i32 [[res3]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], i32 [[res4]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], i32 [[res5]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], i32 [[res6]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], i32 [[res7]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], i32 [[res8]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], i32 [[res9]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +VTYPE index(VTYPE things[11], int i)[11] { + VTYPE res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 5 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1:%.*]], i32 0, i32 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val1]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 2 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 3 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val3]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val4]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 5 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val5]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val6]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val9]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val10]], [[TYPE]]* [[adr]], align [[ALN]] + + // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5 + + // CHECK: [[adr0:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2:%.*]], i32 0, i32 0 + // CHECK: store [[TYPE]] {{(0|0\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]], align [[ALN]] + res[0] = 0; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 [[Ix]] + // CHECK: store [[TYPE]] [[POS1]], [[TYPE]]* [[adr]] + res[i] = 1; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 2 + // CHECK: store [[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]], [[TYPE]]* [[adr]] + res[Ix] = 2; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 3 + // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]] + res[3] = things[0]; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 [[Ix]] + // CHECK: [[vali:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 4 + // CHECK: store [[TYPE]] [[vali]], [[TYPE]]* [[adr]] + res[4] = things[i]; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 5 + // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]] + res[5] = things[Ix]; + + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[TWO]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[vali]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + return res; +} + +#ifdef INT +// Test bit twiddling operators. +void bittwiddlers(inout VTYPE things[13]) { + // INT: [[ValIx:%.*]] = add i32 [[InIx1]], 6 + // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]] + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // INT: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // INT: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // INT: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // INT: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // INT: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // INT: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // INT: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // INT: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // INT: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // INT: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], i8 1, i32 [[ALN]]) + // INT: [[val11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], i8 1, i32 [[ALN]]) + // INT: [[val12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // INT: [[res0:%[0-9]*]] = xor [[TYPE]] [[val1]], -1 + things[0] = ~things[1]; + + // INT: [[res1:%[0-9]*]] = or [[TYPE]] [[val3]], [[val2]] + things[1] = things[2] | things[3]; + + // INT: [[res2:%[0-9]*]] = and [[TYPE]] [[val4]], [[val3]] + things[2] = things[3] & things[4]; + + // INT: [[res3:%[0-9]*]] = xor [[TYPE]] [[val5]], [[val4]] + things[3] = things[4] ^ things[5]; + + // INT: [[shv6:%[0-9]*]] = and [[TYPE]] [[val6]] + // INT: [[res4:%[0-9]*]] = shl [[TYPE]] [[val5]], [[shv6]] + things[4] = things[5] << things[6]; + + // INT: [[shv7:%[0-9]*]] = and [[TYPE]] [[val7]] + // UNSIG: [[res5:%[0-9]*]] = lshr [[TYPE]] [[val6]], [[shv7]] + // SIG: [[res5:%[0-9]*]] = ashr [[TYPE]] [[val6]], [[shv7]] + things[5] = things[6] >> things[7]; + + // INT: [[res6:%[0-9]*]] = or [[TYPE]] [[val8]], [[val6]] + things[6] |= things[8]; + + // INT: [[res7:%[0-9]*]] = and [[TYPE]] [[val9]], [[val7]] + things[7] &= things[9]; + + // INT: [[res8:%[0-9]*]] = xor [[TYPE]] [[val10]], [[val8]] + things[8] ^= things[10]; + + // INT: [[shv11:%[0-9]*]] = and [[TYPE]] [[val11]] + // INT: [[res9:%[0-9]*]] = shl [[TYPE]] [[val9]], [[shv11]] + things[9] <<= things[11]; + + // INT: [[shv12:%[0-9]*]] = and [[TYPE]] [[val12]] + // UNSIG: [[res10:%[0-9]*]] = lshr [[TYPE]] [[val10]], [[shv12]] + // SIG: [[res10:%[0-9]*]] = ashr [[TYPE]] [[val10]], [[shv12]] + things[10] >>= things[12]; + + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], [[TYPE]] [[val11]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], [[TYPE]] [[val12]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + // CHECK-LABEL: ret void +} +#endif // INT diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl index c366261406..44c9be17d4 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl @@ -1,51 +1,23 @@ -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float1 %s | FileCheck %s --check-prefixes=CHECK,NODBL -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int1 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double1 -DDBL %s | FileCheck %s --check-prefixes=CHECK -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t1 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t1 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL %s | FileCheck %s --check-prefixes=CHECK +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG -// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors. +// Test relevant operators on vec1s in 6.9 to ensure they continue to be treated as scalars. + +#define VTYPE vector // Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. // CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[ELTY:[a-z0-9_]*]] // CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:.*]] } -RWStructuredBuffer buf; - -export void assignments(inout TYPE things[10], TYPE scales[10]); -export TYPE arithmetic(inout TYPE things[11])[11]; -export bool logic(bool truth[10], TYPE consequences[10])[10]; -export TYPE index(TYPE things[10], int i, TYPE val)[10]; - -struct Interface { - TYPE assigned[10]; - TYPE arithmeticked[11]; - bool logicked[10]; - TYPE indexed[10]; - TYPE scales[10]; -}; - -#if 0 -// Requires vector loading support. Enable when available. -RWStructuredBuffer Input; -RWStructuredBuffer Output; - -TYPE g_val; - -[shader("compute")] -[numthreads(8,1,1)] -void main(uint GI : SV_GroupIndex) { - assignments(Output[GI].assigned, Input[GI].scales); - Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked); - Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned); - Output[GI].indexed = index(Input[GI].indexed, GI, g_val); -} -#endif +RWStructuredBuffer buf; // A mixed-type overload to test overload resolution and mingle different vector element types in ops // Test assignment operators. // CHECK-LABEL: define void @"\01?assignments -export void assignments(inout TYPE things[10]) { +export void assignments(inout VTYPE things[10]) { // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{8|4|2}}) // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0 @@ -111,8 +83,8 @@ export void assignments(inout TYPE things[10]) { // Test arithmetic operators. // CHECK-LABEL: define void @"\01?arithmetic -export TYPE arithmetic(inout TYPE things[11])[11] { - TYPE res[11]; +export VTYPE arithmetic(inout VTYPE things[11])[11] { + VTYPE res[11]; // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0 // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[res0]], i32 0 @@ -226,7 +198,7 @@ export TYPE arithmetic(inout TYPE things[11])[11] { // Test logic operators. // Only permissable in pre-HLSL2021 // CHECK-LABEL: define void @"\01?logic -export bool logic(bool truth[10], TYPE consequences[10])[10] { +export bool logic(bool truth[10], VTYPE consequences[10])[10] { bool res[10]; // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]] @@ -332,9 +304,9 @@ static const int Ix = 2; // Test indexing operators // CHECK-LABEL: define void @"\01?index -export TYPE index(TYPE things[10], int i)[10] { +export VTYPE index(VTYPE things[10], int i)[10] { // CHECK: [[res:%.*]] = alloca [10 x [[ELTY]]] - TYPE res[10]; + VTYPE res[10]; // CHECK: [[res0:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 0 // CHECK: store [[ELTY]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[ELTY]]* [[res0]] @@ -375,7 +347,7 @@ export TYPE index(TYPE things[10], int i)[10] { #ifdef INT // Test bit twiddling operators. // INT-LABEL: define void @"\01?bittwiddlers -export void bittwiddlers(inout TYPE things[13]) { +export void bittwiddlers(inout VTYPE things[13]) { // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 1 // INT: [[ld1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr1]] // INT: [[val1:%[0-9]*]] = extractelement [[TYPE]] [[ld1]], i32 0 diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl index ed7a2bff25..ba76eca619 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl @@ -48,24 +48,6 @@ struct Interface { TYPE scales[10]; }; -#if 0 -// Requires vector loading support. Enable when available. -RWStructuredBuffer Input; -RWStructuredBuffer Output; - -TYPE g_val; - -[shader("compute")] -[numthreads(8,1,1)] -void main(uint GI : SV_GroupIndex) { - assignments(Output[GI].assigned, Input[GI].scales); - Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked); - Output[GI].scarithmeticked = scarithmetic(Input[GI].scarithmeticked, Input[GI].scales); - Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned); - Output[GI].indexed = index(Input[GI].indexed, GI, g_val); -} -#endif - // A mixed-type overload to test overload resolution and mingle different vector element types in ops // Test assignment operators. // CHECK-LABEL: define void @"\01?assignments diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll new file mode 100644 index 0000000000..f9a9b3d677 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll @@ -0,0 +1,478 @@ +; RUN: %dxopt %s -hlsl-passes-resume -hlsl-dxil-scalarize-vector-load-stores -S | FileCheck %s + +; Verify that scalarize vector load stores pass will convert raw buffer vector operations +; into the equivalent collection of scalar load store calls. +; Sourced from buffer-load-stors-sm69.hlsl. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.v17f32 = type { <17 x float>, i32 } +%struct.ByteAddressBuffer = type { i32 } +%"class.StructuredBuffer >" = type { <17 x float> } +%struct.RWByteAddressBuffer = type { i32 } +%"class.RWStructuredBuffer >" = type { <17 x float> } +%"class.ConsumeStructuredBuffer >" = type { <17 x float> } +%"class.AppendStructuredBuffer >" = type { <17 x float> } + +@"\01?RoByBuf@@3UByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 +@"\01?RwByBuf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 +@"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 +@"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 +@"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 +@"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 + +define void @main() { +bb: + %tmp = load %dx.types.Handle, %dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A", align 4 + %tmp2 = load %dx.types.Handle, %dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp3 = load %dx.types.Handle, %dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp4 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp5 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A", align 4 + %tmp6 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef) + %tmp7 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp5) + %tmp8 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp9 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i32 4) + %tmp10 = extractvalue %dx.types.ResRet.v17f32 %tmp9, 0 + %tmp11 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp1) + %tmp12 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 11, i32 0 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix1]], i32 undef, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix2]], i32 undef, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix3]], i32 undef, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix4]], i32 undef, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp13 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i32 4) + %tmp14 = extractvalue %dx.types.ResRet.v17f32 %tmp13, 0 + %tmp15 = fadd fast <17 x float> %tmp14, %tmp10 + + ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp15, i64 0 + ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp15, i64 1 + ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp15, i64 2 + ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp15, i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4) + ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16 + ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp15, i64 4 + ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp15, i64 5 + ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp15, i64 6 + ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp15, i64 7 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4) + ; CHECK: [[ix2:%.*]] = add i32 %80, 16 + ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp15, i64 8 + ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp15, i64 9 + ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp15, i64 10 + ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp15, i64 11 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4) + ; CHECK: [[ix3:%.*]] = add i32 %85, 16 + ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp15, i64 12 + ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp15, i64 13 + ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp15, i64 14 + ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp15, i64 15 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4) + ; CHECK: [[ix4:%.*]] = add i32 %90, 16 + ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp15, i64 16 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, float [[val16]], float undef, float undef, float undef, i8 1, i32 4) + call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, <17 x float> %tmp15, i32 4) + %tmp16 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp4) + %tmp17 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp16, %dx.types.ResourceProperties { i32 4108, i32 68 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp18 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i32 4) + %tmp19 = extractvalue %dx.types.ResRet.v17f32 %tmp18, 0 + %tmp20 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 1, i8 0, i32 undef) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp21 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i32 4) + %tmp22 = extractvalue %dx.types.ResRet.v17f32 %tmp21, 0 + %tmp23 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp) + %tmp24 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp23, %dx.types.ResourceProperties { i32 12, i32 68 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp25 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i32 4) + %tmp26 = extractvalue %dx.types.ResRet.v17f32 %tmp25, 0 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp27 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i32 4) + %tmp28 = extractvalue %dx.types.ResRet.v17f32 %tmp27, 0 + %tmp29 = fadd fast <17 x float> %tmp22, %tmp19 + %tmp30 = fadd fast <17 x float> %tmp29, %tmp26 + %tmp31 = fadd fast <17 x float> %tmp30, %tmp28 + + ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp31, i64 0 + ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp31, i64 1 + ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp31, i64 2 + ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp31, i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp31, i64 4 + ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp31, i64 5 + ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp31, i64 6 + ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp31, i64 7 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp31, i64 8 + ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp31, i64 9 + ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp31, i64 10 + ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp31, i64 11 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp31, i64 12 + ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp31, i64 13 + ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp31, i64 14 + ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp31, i64 15 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4) + ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp31, i64 16 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4) + call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, <17 x float> %tmp31, i32 4) + %tmp32 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp3) + %tmp33 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp32, %dx.types.ResourceProperties { i32 36876, i32 68 }) + %tmp34 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp33, i8 -1) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp35 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i32 4) + %tmp36 = extractvalue %dx.types.ResRet.v17f32 %tmp35, 0 + %tmp37 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp2) + %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp37, %dx.types.ResourceProperties { i32 36876, i32 68 }) + %tmp39 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp38, i8 1) + + ; CHECK: [[val0:%.*]] = extractelement <17 x float> [[vec16]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <17 x float> [[vec16]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <17 x float> [[vec16]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <17 x float> [[vec16]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractelement <17 x float> [[vec16]], i64 4 + ; CHECK: [[val5:%.*]] = extractelement <17 x float> [[vec16]], i64 5 + ; CHECK: [[val6:%.*]] = extractelement <17 x float> [[vec16]], i64 6 + ; CHECK: [[val7:%.*]] = extractelement <17 x float> [[vec16]], i64 7 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractelement <17 x float> [[vec16]], i64 8 + ; CHECK: [[val9:%.*]] = extractelement <17 x float> [[vec16]], i64 9 + ; CHECK: [[val10:%.*]] = extractelement <17 x float> [[vec16]], i64 10 + ; CHECK: [[val11:%.*]] = extractelement <17 x float> [[vec16]], i64 11 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractelement <17 x float> [[vec16]], i64 12 + ; CHECK: [[val13:%.*]] = extractelement <17 x float> [[vec16]], i64 13 + ; CHECK: [[val14:%.*]] = extractelement <17 x float> [[vec16]], i64 14 + ; CHECK: [[val15:%.*]] = extractelement <17 x float> [[vec16]], i64 15 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4) + ; CHECK: [[val16:%.*]] = extractelement <17 x float> [[vec16]], i64 16 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4) + call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, <17 x float> %tmp36, i32 4) + ret void +} + +declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0 +declare %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32, %dx.types.Handle, i32, i32, i32) #1 +declare void @dx.op.rawBufferVectorStore.v17f32(i32, %dx.types.Handle, i32, i32, <17 x float>, i32) #2 +declare i32 @dx.op.bufferUpdateCounter(i32, %dx.types.Handle, i8) #2 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0 +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind } + +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.typeAnnotations = !{!13} +!dx.entryPoints = !{!17, !19} + +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{!4, !8, null, null} +!4 = !{!5, !6} +!5 = !{i32 0, %struct.ByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A" to %struct.ByteAddressBuffer*), !"RoByBuf", i32 0, i32 1, i32 1, i32 11, i32 0, null} +!6 = !{i32 1, %"class.StructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.StructuredBuffer >"*), !"RoStBuf", i32 0, i32 2, i32 1, i32 12, i32 0, !7} +!7 = !{i32 1, i32 68} +!8 = !{!9, !10, !11, !12} +!9 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"RwByBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!10 = !{i32 1, %"class.RWStructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.RWStructuredBuffer >"*), !"RwStBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7} +!11 = !{i32 2, %"class.ConsumeStructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.ConsumeStructuredBuffer >"*), !"CnStBuf", i32 0, i32 4, i32 1, i32 12, i1 false, i1 true, i1 false, !7} +!12 = !{i32 3, %"class.AppendStructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.AppendStructuredBuffer >"*), !"ApStBuf", i32 0, i32 5, i32 1, i32 12, i1 false, i1 true, i1 false, !7} +!13 = !{i32 1, void ()* @main, !14} +!14 = !{!15} +!15 = !{i32 0, !16, !16} +!16 = !{} +!17 = !{null, !"", null, !3, !18} +!18 = !{i32 0, i64 8589934608} +!19 = !{void ()* @main, !"main", !20, null, !24} +!20 = !{!21, null, null} +!21 = !{!22} +!22 = !{i32 0, !"IX", i8 5, i8 0, !23, i8 0, i32 2, i8 1, i32 0, i8 0, null} +!23 = !{i32 0, i32 1} +!24 = !{i32 8, i32 1, i32 5, !25} +!25 = !{i32 0} diff --git a/tools/clang/test/DXILValidation/load-store-validation.hlsl b/tools/clang/test/DXILValidation/load-store-validation.hlsl new file mode 100644 index 0000000000..d4e5e29db8 --- /dev/null +++ b/tools/clang/test/DXILValidation/load-store-validation.hlsl @@ -0,0 +1,74 @@ +// This file is not used directly for testing. +// This is the HLSL source for validation of various invalid load/store parameters. +// It is used to generate LitDxilValidation/load-store-validation.ll using `dxc -T ps_6_9`. +// Output is modified to trigger various validation errors. + +Texture1D Tex; +RWTexture1D RwTex; +SamplerState Samp; + +StructuredBuffer VecBuf; +StructuredBuffer ScalBuf; +ByteAddressBuffer BaBuf; + +RWStructuredBuffer OutVecBuf; +RWStructuredBuffer OutScalBuf; +RWByteAddressBuffer OutBaBuf; + +// Some simple ways to generate the vector ops in question. +float4 main(int i : IX) : SV_Target { + // Texture provides some invalid handles to plug in. + float4 TexVal = Tex.Sample(Samp, i); + RwTex[0] = TexVal; + + // For invalid RC on Load (and inevitably invalid RK). + float BadRCLd = ScalBuf[0]; + // For invalid RK on Load. + float BadRKLd = ScalBuf[1]; + // For non-constant alignment on Load. + float BadAlnLd = ScalBuf[2]; + // For undefined offset on Structured Buffer Load. + float BadStrOffLd = ScalBuf[3]; + // For defined (and therefore invalid) offset on Byte Address Buffer Load. + float BadBabOffLd = BaBuf.Load(0); + + // For invalid RC on Vector Load (and inevitably invalid RK). + float4 BadRCVcLd = VecBuf[0]; + // For invalid RK on Vector Load. + float4 BadRKVcLd = VecBuf[1]; + // For non-constant alignment on Vector Load. + float4 BadAlnVcLd = VecBuf[2]; + // For undefined offset on Structured Buffer Vector Load. + float4 BadStrOffVcLd = VecBuf[3]; + // For defined (and therefore invalid) offset on Byte Address Buffer Vector Load. + float4 BadBabOffVcLd = BaBuf.Load(4); + + // For Store to non-UAV. + OutScalBuf[0] = BadRCLd; + // For invalid RK on Store. + OutScalBuf[1] = BadRKLd; + // For non-constant alignment on Store. + OutScalBuf[2] = BadAlnLd; + // For undefined offset on Structured Buffer Store. + OutScalBuf[3] = BadStrOffLd; + // For undefined value Store. + OutScalBuf[4] = 77; + // For defined (and therefore invalid) offset on Byte Address Buffer Store. + OutBaBuf.Store(0, BadBabOffLd); + + // For Vector Store to non-UAV. + OutVecBuf[0] = BadRCVcLd; + // For invalid RK on Vector Store. + OutVecBuf[1] = BadRKVcLd; + // For non-constant alignment on Vector Store. + OutVecBuf[2] = BadAlnVcLd; + // For undefined offset on Structured Buffer Vector Store. + OutVecBuf[3] = BadStrOffVcLd; + // For undefinded value Vector Store. + OutVecBuf[4] = 77; + // For defined (and therefore invalid) offset on Byte Address Buffer Vector Store. + OutBaBuf.Store(4, BadBabOffVcLd); + + return TexVal; +} + diff --git a/tools/clang/test/DXILValidation/vector-validation.hlsl b/tools/clang/test/DXILValidation/vector-validation.hlsl new file mode 100644 index 0000000000..5d6a5cd4a2 --- /dev/null +++ b/tools/clang/test/DXILValidation/vector-validation.hlsl @@ -0,0 +1,14 @@ +// This file is not used directly for testing. +// This is the HLSL source for validation of disallowed 6.9 features in previous shader models. +// It is used to generate LitDxilValidation/vector-validation.ll using `dxc -T ps_6_9`. +// Output is modified to have shader model 6.8 instead. + +RWStructuredBuffer VecBuf; + +// some simple ways to generate the vector ops in question. +float4 main(float val : VAL) :SV_Position { + float4 vec = VecBuf[1]; + VecBuf[0] = val; + return vec[2]; +} + diff --git a/tools/clang/test/LitDXILValidation/load-store-validation.ll b/tools/clang/test/LitDXILValidation/load-store-validation.ll new file mode 100644 index 0000000000..34b2f6b602 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/load-store-validation.ll @@ -0,0 +1,229 @@ +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; Ensure proper validation errors are produced for invalid parameters to load and store operations. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.f32 = type { float, float, float, float, i32 } +%dx.types.ResRet.v4f32 = type { <4 x float>, i32 } +%"class.Texture1D >" = type { <4 x float>, %"class.Texture1D >::mips_type" } +%"class.Texture1D >::mips_type" = type { i32 } +%"class.StructuredBuffer >" = type { <4 x float> } +%"class.StructuredBuffer" = type { float } +%struct.ByteAddressBuffer = type { i32 } +%"class.RWStructuredBuffer >" = type { <4 x float> } +%"class.RWStructuredBuffer" = type { float } +%struct.RWByteAddressBuffer = type { i32 } +%struct.SamplerState = type { i32 } + +; Unfortunately, the validation errors come in weird orders. +; Inlining them isn't helpful, so we'll just dump them all here. +; Inline comments, variable names, and notes should help find the corresponding source. + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4)' +; CHECK: error: Assignment of undefined values to UAV. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4) +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4) +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4)' +; CHECK: error: store should be on uav resource. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4)' + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at '%badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4)' +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at '%badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4)' +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at '%badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer +; CHECK-NEXT: note: at '%badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4)' +; CHECK: error: load can only run on UAV/SRV resource. +; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)' +; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)' + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4)' +; CHECK: error: Assignment of undefined values to UAV. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4)' +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4)' +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4)' +; CHECK: error: store should be on uav resource. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4)' + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at '%badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4)' +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at '%badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4)' +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at '%badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer +; CHECK-NEXT: note: at '%badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4)' +; CHECK: error: load can only run on UAV/SRV resource. +; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)' +; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)' + +define void @main() { +bb: + %tmp = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) + %tmp1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) + %tmp2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) + %tmp3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false) + %tmp4 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) + %tmp5 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) + %tmp6 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false) + %tmp7 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 3 }, i32 0, i1 false) + %tmp8 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 0, i1 false) + %ix = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef) + %texIx = sitofp i32 %ix to float + %tex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 1, i32 1033 }) + %samp = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 14, i32 0 }) + %tmp10 = call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 60, %dx.types.Handle %tex, %dx.types.Handle %samp, float %texIx, float undef, float undef, float undef, i32 0, i32 undef, i32 undef, float undef) + %tmp11 = extractvalue %dx.types.ResRet.f32 %tmp10, 0 + %tmp12 = extractvalue %dx.types.ResRet.f32 %tmp10, 1 + %tmp13 = extractvalue %dx.types.ResRet.f32 %tmp10, 2 + %tmp14 = extractvalue %dx.types.ResRet.f32 %tmp10, 3 + %rwTex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp8, %dx.types.ResourceProperties { i32 4097, i32 1033 }) + call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %rwTex, i32 0, i32 undef, i32 undef, float %tmp11, float %tmp12, float %tmp13, float %tmp14, i8 15) + %scalBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp4, %dx.types.ResourceProperties { i32 12, i32 4 }) + ; Invalid RC on Load (and inevitably invalid RK). + %badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4) + %badRC = extractvalue %dx.types.ResRet.f32 %badRCLd, 0 + ; Invalid RK on Load. + %badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4) + %badRK = extractvalue %dx.types.ResRet.f32 %badRKLd, 0 + ; Non-constant alignment on Load. + %badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix) + %badAln = extractvalue %dx.types.ResRet.f32 %badAlnLd, 0 + ; Undefined offset on Structured Buffer Load. + %badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4) + %badStrOff = extractvalue %dx.types.ResRet.f32 %badStrOffLd, 0 + %baBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp3, %dx.types.ResourceProperties { i32 11, i32 0 }) + ; Defined (and therefore invalid) offset on Byte Address Buffer Load. + %badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4) + %badBabOff = extractvalue %dx.types.ResRet.f32 %badBabOffLd, 0 + + %vecBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp5, %dx.types.ResourceProperties { i32 12, i32 16 }) + ; Invalid RC on Vector Load (and inevitably invalid RK). + %badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4) + %badRCVc = extractvalue %dx.types.ResRet.v4f32 %badRCVcLd, 0 + ; Invalid RK on Vector Load. + %badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4) + %badRKVc = extractvalue %dx.types.ResRet.v4f32 %badRKVcLd, 0 + ; Non-constant alignment on Vector Load. + %badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix) + %badAlnVc = extractvalue %dx.types.ResRet.v4f32 %badAlnVcLd, 0 + ; Undefined offset on Structured Buffer Vector Load. + %badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4) + %badStrOffVc = extractvalue %dx.types.ResRet.v4f32 %badStrOffVcLd, 0 + ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Load. + %badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4) + %badBabOffVc = extractvalue %dx.types.ResRet.v4f32 %badBabOffVcLd, 0 + + ; Store to non-UAV. + %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4) + ; Invalid RK on Store. + %tmp39 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4) + ; Non-constant alignment on Store. + %tmp40 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix) + ; Undefined offset on Structured Buffer Store. + %tmp41 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4) + ; Undefined value Store. + %tmp42 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4) + ; Defined (and therefore invalid) offset on Byte Address Buffer Store. + %tmp44 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4) + + ; Vector Store to non-UAV. + %tmp45 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %rwTex, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4) + ; Invalid RK on Vector Store. + %tmp46 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4) + ; Non-constant alignment on Vector Store. + %tmp47 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix) + ; Undefined offset on Structured Buffer Vector Store. + %tmp48 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4) + ; Undefinded value Vector Store. + %tmp49 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4) + ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Store. + %tmp51 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4) + + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %tmp11) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %tmp12) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %tmp13) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %tmp14) + ret void +} + +declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #2 +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0 +declare %dx.types.ResRet.f32 @dx.op.sample.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float) #1 +declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #0 +declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #0 +declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1 +declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #0 +declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #1 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.viewIdState = !{!18} +!dx.entryPoints = !{!19} + +!1 = !{i32 1, i32 9} +!2 = !{!"ps", i32 6, i32 9} +!3 = !{!4, !12, null, !16} +!4 = !{!5, !7, !9, !11} +!5 = !{i32 0, %"class.Texture1D >"* undef, !"", i32 0, i32 0, i32 1, i32 1, i32 0, !6} +!6 = !{i32 0, i32 9} +!7 = !{i32 1, %"class.StructuredBuffer >"* undef, !"", i32 0, i32 1, i32 1, i32 12, i32 0, !8} +!8 = !{i32 1, i32 16} +!9 = !{i32 2, %"class.StructuredBuffer"* undef, !"", i32 0, i32 2, i32 1, i32 12, i32 0, !10} +!10 = !{i32 1, i32 4} +!11 = !{i32 3, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 3, i32 1, i32 11, i32 0, null} +!12 = !{!13, !14, !15} +!13 = !{i32 0, %"class.RWStructuredBuffer >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !8} +!14 = !{i32 1, %"class.RWStructuredBuffer"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !10} +!15 = !{i32 2, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 2, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!16 = !{!17} +!17 = !{i32 0, %struct.SamplerState* undef, !"", i32 0, i32 0, i32 1, i32 0, null} +!18 = !{[3 x i32] [i32 1, i32 4, i32 0]} +!19 = !{void ()* @main, !"main", !20, !3, !27} +!20 = !{!21, !24, null} +!21 = !{!22} +!22 = !{i32 0, !"IX", i8 4, i8 0, !23, i8 1, i32 1, i8 1, i32 0, i8 0, null} +!23 = !{i32 0} +!24 = !{!25} +!25 = !{i32 0, !"SV_Target", i8 9, i8 16, !23, i8 0, i32 1, i8 4, i32 0, i8 0, !26} +!26 = !{i32 3, i32 15} +!27 = !{i32 0, i64 8589934608} diff --git a/tools/clang/test/LitDXILValidation/vector-validation.ll b/tools/clang/test/LitDXILValidation/vector-validation.ll new file mode 100644 index 0000000000..74e8116e88 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/vector-validation.ll @@ -0,0 +1,78 @@ +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; Confirm that 6.9 specific LLVM operations and DXIL intrinsics fail in 6.8 + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.v4f32 = type { <4 x float>, i32 } +%"class.RWStructuredBuffer >" = type { <4 x float> } + +; CHECK: Function: main: error: Instructions must be of an allowed type. +; CHECK: note: at '%6 = insertelement <4 x float> undef, float %2, i32 0 +; CHECK: Function: main: error: Instructions must be of an allowed type. +; CHECK: note: at '%7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: Function: main: error: Instructions must be of an allowed type. +; CHECK: note: at '%8 = extractelement <4 x float> %5, i32 2 +; CHECK: Function: main: error: Opcode RawBufferVectorLoad not valid in shader model vs_6_8. +; CHECK: note: at '%4 = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %3, i32 1, i32 0, i32 8)' +; CHECK: Function: main: error: Opcode RawBufferVectorStore not valid in shader model vs_6_8. +; CHECK: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %3, i32 0, i32 0, <4 x float> %7, i32 4)' +; CHECK: Function: main: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: main: error: Function uses features incompatible with the shader model. +define void @main() { + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) + %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4108, i32 16 }) + %4 = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %3, i32 1, i32 0, i32 8) + %5 = extractvalue %dx.types.ResRet.v4f32 %4, 0 + %6 = insertelement <4 x float> undef, float %2, i32 0 + %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %3, i32 0, i32 0, <4 x float> %7, i32 4) + %8 = extractelement <4 x float> %5, i32 2 + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %8) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %8) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %8) + ret void +} + +declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0 +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1 +declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #2 +declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #1 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0 +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } +attributes #2 = { nounwind readonly } + +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.viewIdState = !{!7} +!dx.entryPoints = !{!8} + +!1 = !{i32 1, i32 8} +!2 = !{!"vs", i32 6, i32 8} +!3 = !{null, !4, null, null} +!4 = !{!5} +!5 = !{i32 0, %"class.RWStructuredBuffer >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !6} +!6 = !{i32 1, i32 16} +!7 = !{[3 x i32] [i32 1, i32 4, i32 0]} +!8 = !{void ()* @main, !"main", !9, !3, !17} +!9 = !{!10, !14, null} +!10 = !{!11} +!11 = !{i32 0, !"VAL", i8 9, i8 0, !12, i8 0, i32 1, i8 1, i32 0, i8 0, !13} +!12 = !{i32 0} +!13 = !{i32 3, i32 1} +!14 = !{!15} +!15 = !{i32 0, !"SV_Position", i8 9, i8 3, !12, i8 4, i32 1, i8 4, i32 0, i8 0, !16} +!16 = !{i32 3, i32 15} +!17 = !{i32 0, i64 8590000144} + diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl index 42eb6b077c..54c85191da 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl @@ -3,6 +3,8 @@ #define TYPE float #define NUM 5 +StructuredBuffer > sbuf; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + struct LongVec { float4 f; vector vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp index f69b0be204..01f24e0227 100644 --- a/tools/clang/unittests/HLSL/ValidationTest.cpp +++ b/tools/clang/unittests/HLSL/ValidationTest.cpp @@ -1506,21 +1506,23 @@ TEST_F(ValidationTest, StructBufStrideOutOfBound) { } TEST_F(ValidationTest, StructBufLoadCoordinates) { - RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", - "bufferLoad.f32(i32 68, %dx.types.Handle " - "%buf1_texture_structbuf, i32 1, i32 8)", - "bufferLoad.f32(i32 68, %dx.types.Handle " - "%buf1_texture_structbuf, i32 1, i32 undef)", - "structured buffer require 2 coordinates"); + RewriteAssemblyCheckMsg( + L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", + "bufferLoad.f32(i32 68, %dx.types.Handle " + "%buf1_texture_structbuf, i32 1, i32 8)", + "bufferLoad.f32(i32 68, %dx.types.Handle " + "%buf1_texture_structbuf, i32 1, i32 undef)", + "structured buffer requires defined index and offset coordinates"); } TEST_F(ValidationTest, StructBufStoreCoordinates) { - RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", - "bufferStore.f32(i32 69, %dx.types.Handle " - "%buf2_UAV_structbuf, i32 0, i32 0", - "bufferStore.f32(i32 69, %dx.types.Handle " - "%buf2_UAV_structbuf, i32 0, i32 undef", - "structured buffer require 2 coordinates"); + RewriteAssemblyCheckMsg( + L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", + "bufferStore.f32(i32 69, %dx.types.Handle " + "%buf2_UAV_structbuf, i32 0, i32 0", + "bufferStore.f32(i32 69, %dx.types.Handle " + "%buf2_UAV_structbuf, i32 0, i32 undef", + "structured buffer requires defined index and offset coordinates"); } TEST_F(ValidationTest, TypedBufRetType) { diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 5eb35fb52a..691c3ba58f 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -479,7 +479,7 @@ def populate_categories_and_models(self): self.name_idx[i].category = "Dot" for ( i - ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore".split( + ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore,RawBufferVectorLoad,RawBufferVectorStore".split( "," ): self.name_idx[i].category = "Resources" @@ -606,6 +606,8 @@ def populate_categories_and_models(self): for i in "RawBufferLoad,RawBufferStore".split(","): self.name_idx[i].shader_model = 6, 2 self.name_idx[i].shader_model_translated = 6, 0 + for i in "RawBufferVectorLoad,RawBufferVectorStore".split(","): + self.name_idx[i].shader_model = 6, 9 for i in "DispatchRaysIndex,DispatchRaysDimensions".split(","): self.name_idx[i].category = "Ray Dispatch Arguments" self.name_idx[i].shader_model = 6, 3 @@ -5778,6 +5780,84 @@ def UFI(name, **mappings): # Reserved block C next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10) + # Long Vectors + self.add_dxil_op( + "RawBufferVectorLoad", + next_op_idx, + "RawBufferVectorLoad", + "reads from a raw buffer and structured buffer", + "hfwidl<", + "ro", + [ + db_dxil_param(0, "$r", "", "the loaded value"), + db_dxil_param(2, "res", "buf", "handle of Raw Buffer to load from"), + db_dxil_param( + 3, + "i32", + "index", + "element index for StructuredBuffer, or byte offset for ByteAddressBuffer", + ), + db_dxil_param( + 4, + "i32", + "elementOffset", + "offset into element for StructuredBuffer, or undef for ByteAddressBuffer", + ), + db_dxil_param( + 5, + "i32", + "alignment", + "relative load access alignment", + is_const=True, + ), + ], + counters=("tex_load",), + ) + next_op_idx += 1 + + self.add_dxil_op( + "RawBufferVectorStore", + next_op_idx, + "RawBufferVectorStore", + "writes to a RWByteAddressBuffer or RWStructuredBuffer", + "hfwidl<", + "", + [ + db_dxil_param(0, "v", "", ""), + db_dxil_param(2, "res", "uav", "handle of UAV to store to"), + db_dxil_param( + 3, + "i32", + "index", + "element index for StructuredBuffer, or byte offset for ByteAddressBuffer", + ), + db_dxil_param( + 4, + "i32", + "elementOffset", + "offset into element for StructuredBuffer, or undef for ByteAddressBuffer", + ), + db_dxil_param(5, "$o", "value0", "value"), + db_dxil_param( + 6, + "i32", + "alignment", + "relative store access alignment", + is_const=True, + ), + ], + counters=("tex_store",), + ) + next_op_idx += 1 + + # End of DXIL 1.9 opcodes. + # NOTE!! Update and uncomment when DXIL 1.9 opcodes are finalized: + # self.set_op_count_for_version(1, 9, next_op_idx) + # assert next_op_idx == NNN, ( + # "NNN is expected next operation index but encountered %d and thus opcodes are broken" + # % next_op_idx + # ) + # Set interesting properties. self.build_indices() for ( @@ -6385,6 +6465,12 @@ def add_pass(name, type_name, doc, opts): "DXIL Lower createHandleForLib", [], ) + add_pass( + "hlsl-dxil-scalarize-vector-load-stores", + "DxilScalarizeVectorLoadStores", + "DXIL scalarize vector load/stores", + [], + ) add_pass( "hlsl-dxil-cleanup-dynamic-resource-handle", "DxilCleanupDynamicResourceHandle", @@ -7607,11 +7693,15 @@ def build_valrules(self): ) self.add_valrule( "Instr.CoordinateCountForRawTypedBuf", - "raw/typed buffer don't need 2 coordinates.", + "raw/typed buffer offset must be undef.", + ) + self.add_valrule( + "Instr.ConstAlignForRawBuf", + "Raw Buffer alignment value must be a constant.", ) self.add_valrule( "Instr.CoordinateCountForStructBuf", - "structured buffer require 2 coordinates.", + "structured buffer requires defined index and offset coordinates.", ) self.add_valrule( "Instr.MipLevelForGetDimension", From 9e9184426c9103a96ec8da2fe4da290f467d4486 Mon Sep 17 00:00:00 2001 From: Chris B Date: Mon, 7 Apr 2025 14:22:34 -0500 Subject: [PATCH 16/19] [NFC] containsLongVector -> ContainsLongVector (#7255) I provided feedback during code review that this function should be named following LLVM conventions. That feedback did not account for the fact that SemaHLSL is otherwise consistent using CamelCase instead of camelCase naming. This corrects my error by renaming to match the consistent style in SemaHLSL.h. I've also updated the parameter naming in the source file to conform to LLVM style since I was in the area anyways. --- tools/clang/include/clang/Sema/SemaHLSL.h | 2 +- tools/clang/lib/Sema/SemaDXR.cpp | 2 +- tools/clang/lib/Sema/SemaHLSL.cpp | 24 ++++++++++----------- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index d6103b55e6..ac6e08b3fa 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -128,7 +128,7 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema, const clang::InitListExpr *InitList, const clang::QualType EltTy); -bool containsLongVector(clang::QualType qt); +bool ContainsLongVector(clang::QualType); bool IsConversionToLessOrEqualElements(clang::Sema *self, const clang::ExprResult &sourceExpr, diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 0f27de8291..36ab55ea10 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -810,7 +810,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } - if (containsLongVector(Payload->getType())) { + if (ContainsLongVector(Payload->getType())) { const unsigned PayloadParametersIdx = 10; S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) << PayloadParametersIdx; diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 027d7d3cbc..6796badcb6 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5529,7 +5529,7 @@ class HLSLExternalSource : public ExternalSemaSource { m_sema->RequireCompleteType(argSrcLoc, argType, diag::err_typecheck_decl_incomplete_type); - if (containsLongVector(argType)) { + if (ContainsLongVector(argType)) { const unsigned ConstantBuffersOrTextureBuffersIdx = 0; m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) << ConstantBuffersOrTextureBuffersIdx; @@ -5637,7 +5637,7 @@ class HLSLExternalSource : public ExternalSemaSource { CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); if (Decl && !Decl->isCompleteDefinition()) return true; - if (containsLongVector(arg.getAsType())) { + if (ContainsLongVector(arg.getAsType())) { const unsigned TessellationPatchesIDx = 1; m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) @@ -5656,7 +5656,7 @@ class HLSLExternalSource : public ExternalSemaSource { CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); if (Decl && !Decl->isCompleteDefinition()) return true; - if (containsLongVector(arg.getAsType())) { + if (ContainsLongVector(arg.getAsType())) { const unsigned GeometryStreamsIdx = 2; m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) @@ -12545,14 +12545,14 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } -bool hlsl::containsLongVector(QualType qt) { - if (qt.isNull() || qt->isDependentType()) +bool hlsl::ContainsLongVector(QualType QT) { + if (QT.isNull() || QT->isDependentType()) return false; - while (const ArrayType *Arr = qt->getAsArrayTypeUnsafe()) - qt = Arr->getElementType(); + while (const ArrayType *Arr = QT->getAsArrayTypeUnsafe()) + QT = Arr->getElementType(); - if (CXXRecordDecl *Decl = qt->getAsCXXRecordDecl()) { + if (CXXRecordDecl *Decl = QT->getAsCXXRecordDecl()) { if (!Decl->isCompleteDefinition()) return false; return Decl->hasHLSLLongVector(); @@ -15201,7 +15201,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {} } SD; RequireCompleteType(D.getLocStart(), qt, SD); - if (containsLongVector(qt)) { + if (ContainsLongVector(qt)) { unsigned CbuffersOrTbuffersIdx = 4; Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) << CbuffersOrTbuffersIdx; @@ -16099,7 +16099,7 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { // Verify that user-defined intrinsic struct args contain no long vectors static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { - if (containsLongVector(Arg->getType())) { + if (ContainsLongVector(Arg->getType())) { const unsigned UserDefinedStructParameterIdx = 5; S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) << UserDefinedStructParameterIdx; @@ -16842,14 +16842,14 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { // Would be nice to check for resources here as they crash the compiler now. // See issue #7186. for (const auto *param : FD->params()) { - if (containsLongVector(param->getType())) { + if (ContainsLongVector(param->getType())) { const unsigned EntryFunctionParametersIdx = 6; S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) << EntryFunctionParametersIdx; } } - if (containsLongVector(FD->getReturnType())) { + if (ContainsLongVector(FD->getReturnType())) { const unsigned EntryFunctionReturnIdx = 7; S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) << EntryFunctionReturnIdx; diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index c562ee8d52..abca7cbf86 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -710,14 +710,14 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } } for (const auto *param : pPatchFnDecl->params()) - if (containsLongVector(param->getType())) { + if (ContainsLongVector(param->getType())) { const unsigned PatchConstantFunctionParametersIdx = 8; self->Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) << PatchConstantFunctionParametersIdx; } - if (containsLongVector(pPatchFnDecl->getReturnType())) { + if (ContainsLongVector(pPatchFnDecl->getReturnType())) { const unsigned PatchConstantFunctionReturnIdx = 9; self->Diag(pPatchFnDecl->getLocation(), diag::err_hlsl_unsupported_long_vector) From dc4a2b6e910f47ef51cc482c648f105e866f58f7 Mon Sep 17 00:00:00 2001 From: nopandbrk <202358470+nopandbrk@users.noreply.github.com> Date: Mon, 7 Apr 2025 15:13:08 -0700 Subject: [PATCH 17/19] [PIX] Add a pass for PIX to log missing NonUniformResourceIndex usage into a UAV (#7272) This is a pass to add instructions to determine missing usage of the NonUniformResourceIndex qualifier when dynamically indexing resources. The instruction numbers will be written out to a UAV for later ingestion by PIX to present a view of the output. --- include/dxc/DxilPIXPasses/DxilPIXPasses.h | 3 + lib/DxilPIXPasses/CMakeLists.txt | 1 + ...NonUniformResourceIndexInstrumentation.cpp | 173 ++++++++++++++ .../DxilShaderAccessTracking.cpp | 89 +------ lib/DxilPIXPasses/PixPassHelpers.cpp | 84 +++++++ lib/DxilPIXPasses/PixPassHelpers.h | 7 +- tools/clang/unittests/HLSL/PixTest.cpp | 219 ++++++++++++++++++ utils/hct/hctdb.py | 6 + 8 files changed, 499 insertions(+), 83 deletions(-) create mode 100644 lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp diff --git a/include/dxc/DxilPIXPasses/DxilPIXPasses.h b/include/dxc/DxilPIXPasses/DxilPIXPasses.h index ad0ddfdfd2..5cc7c4aa50 100644 --- a/include/dxc/DxilPIXPasses/DxilPIXPasses.h +++ b/include/dxc/DxilPIXPasses/DxilPIXPasses.h @@ -27,6 +27,7 @@ ModulePass *createDxilDebugInstrumentationPass(); ModulePass *createDxilShaderAccessTrackingPass(); ModulePass *createDxilPIXAddTidToAmplificationShaderPayloadPass(); ModulePass *createDxilPIXDXRInvocationsLogPass(); +ModulePass *createDxilNonUniformResourceIndexInstrumentationPass(); void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry &); void initializeDxilDbgValueToDbgDeclarePass(llvm::PassRegistry &); @@ -41,5 +42,7 @@ void initializeDxilShaderAccessTrackingPass(llvm::PassRegistry &); void initializeDxilPIXAddTidToAmplificationShaderPayloadPass( llvm::PassRegistry &); void initializeDxilPIXDXRInvocationsLogPass(llvm::PassRegistry &); +void initializeDxilNonUniformResourceIndexInstrumentationPass( + llvm::PassRegistry &); } // namespace llvm diff --git a/lib/DxilPIXPasses/CMakeLists.txt b/lib/DxilPIXPasses/CMakeLists.txt index c36d11d559..67e77f17cd 100644 --- a/lib/DxilPIXPasses/CMakeLists.txt +++ b/lib/DxilPIXPasses/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMDxilPIXPasses PixPassHelpers.cpp DxilPIXAddTidToAmplificationShaderPayload.cpp DxilPIXDXRInvocationsLog.cpp + DxilNonUniformResourceIndexInstrumentation.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR diff --git a/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp new file mode 100644 index 0000000000..a442bfabed --- /dev/null +++ b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp @@ -0,0 +1,173 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilNonUniformResourceIndexInstrumentation.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// Provides a pass to add instrumentation to determine missing usage of the // +// NonUniformResourceIndex qualifier when dynamically indexing resources. // +// Used by PIX. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "PixPassHelpers.h" +#include "dxc/DXIL/DxilInstructions.h" +#include "dxc/DxilPIXPasses/DxilPIXPasses.h" +#include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h" +#include "dxc/Support/Global.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; +using namespace hlsl; + +class DxilNonUniformResourceIndexInstrumentation : public ModulePass { + +public: + static char ID; // Pass identification, replacement for typeid + explicit DxilNonUniformResourceIndexInstrumentation() : ModulePass(ID) {} + StringRef getPassName() const override { + return "DXIL NonUniformResourceIndex Instrumentation"; + } + bool runOnModule(Module &M) override; +}; + +bool DxilNonUniformResourceIndexInstrumentation::runOnModule(Module &M) { + // This pass adds instrumentation for incorrect NonUniformResourceIndex usage + + DxilModule &DM = M.GetOrCreateDxilModule(); + LLVMContext &Ctx = M.getContext(); + OP *HlslOP = DM.GetOP(); + + hlsl::DxilResource *PixUAVResource = nullptr; + + UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(Ctx)); + + // Use WaveActiveAllEqual to check if a dynamic index is uniform + Function *WaveActiveAllEqualFunc = HlslOP->GetOpFunc( + DXIL::OpCode::WaveActiveAllEqual, Type::getInt32Ty(Ctx)); + Constant *WaveActiveAllEqualOpCode = + HlslOP->GetI32Const((int32_t)DXIL::OpCode::WaveActiveAllEqual); + + // Atomic operation to use for writing to the result uav resource + Function *AtomicOpFunc = + HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx)); + Constant *AtomicBinOpcode = + HlslOP->GetU32Const((uint32_t)OP::OpCode::AtomicBinOp); + Constant *AtomicOr = HlslOP->GetU32Const((uint32_t)DXIL::AtomicBinOpCode::Or); + + std::map FunctionToUAVHandle; + + // This is the main pass that will iterate through all of the resources that + // are dynamically indexed. If not already marked NonUniformResourceIndex, + // then insert WaveActiveAllEqual to determine if the index is uniform + // and finally write to a UAV resource with the result. + + PIXPassHelpers::ForEachDynamicallyIndexedResource( + DM, [&](bool IsNonUniformIndex, Instruction *CreateHandle, + Value *IndexOperand) { + if (IsNonUniformIndex) { + // The NonUniformResourceIndex qualifier was used, continue. + return true; + } + + if (!PixUAVResource) { + PixUAVResource = + PIXPassHelpers::CreateGlobalUAVResource(DM, 0, "PixUAVResource"); + } + + CallInst *PixUAVHandle = nullptr; + Function *F = CreateHandle->getParent()->getParent(); + + const auto FunctionToUAVHandleIter = FunctionToUAVHandle.lower_bound(F); + + if ((FunctionToUAVHandleIter != FunctionToUAVHandle.end()) && + (FunctionToUAVHandleIter->first == F)) { + PixUAVHandle = FunctionToUAVHandleIter->second; + } else { + IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt()); + + PixUAVHandle = PIXPassHelpers::CreateHandleForResource( + DM, Builder, PixUAVResource, "PixUAVHandle"); + + FunctionToUAVHandle.insert(FunctionToUAVHandleIter, + {F, PixUAVHandle}); + } + + IRBuilder<> Builder(CreateHandle); + + uint32_t InstructionNumber = 0; + if (!pix_dxil::PixDxilInstNum::FromInst(CreateHandle, + &InstructionNumber)) { + DXASSERT_NOMSG(false); + } + + // The output UAV is treated as a bit array where each bit corresponds + // to an instruction number. This determines what byte offset to write + // our result to based on the instruction number. + const uint32_t InstructionNumByteOffset = + (InstructionNumber / 32u) * sizeof(uint32_t); + const uint32_t InstructionNumBitPosition = (InstructionNumber % 32u); + const uint32_t InstructionNumBitMask = 1u << InstructionNumBitPosition; + + Constant *UAVByteOffsetArg = + HlslOP->GetU32Const(InstructionNumByteOffset); + + CallInst *WaveActiveAllEqualCall = Builder.CreateCall( + WaveActiveAllEqualFunc, {WaveActiveAllEqualOpCode, IndexOperand}); + + // This takes the result of the WaveActiveAllEqual result and shifts + // it into the same bit position as the instruction number, followed + // by an xor to determine what to write to the UAV + Value *IsWaveEqual = + Builder.CreateZExt(WaveActiveAllEqualCall, Builder.getInt32Ty()); + Value *WaveEqualBitMask = + Builder.CreateShl(IsWaveEqual, InstructionNumBitPosition); + Value *FinalResult = + Builder.CreateXor(WaveEqualBitMask, InstructionNumBitMask); + + // Generate instructions to bitwise OR a UAV value corresponding + // to the instruction number and result of WaveActiveAllEqual. + // If WaveActiveAllEqual was false, we write a 1, otherwise a 0. + Builder.CreateCall( + AtomicOpFunc, + { + AtomicBinOpcode, // i32, ; opcode + PixUAVHandle, // %dx.types.Handle, ; resource handle + AtomicOr, // i32, ; binary operation code : + // EXCHANGE, IADD, AND, OR, XOR + // IMIN, IMAX, UMIN, UMAX + UAVByteOffsetArg, // i32, ; coordinate c0: byte offset + UndefArg, // i32, ; coordinate c1 (unused) + UndefArg, // i32, ; coordinate c2 (unused) + FinalResult // i32); value + }, + "UAVInstructionNumberBitSet"); + return true; + }); + + const bool modified = (PixUAVResource != nullptr); + + if (modified) { + DM.ReEmitDxilResources(); + + if (OSOverride != nullptr) { + formatted_raw_ostream FOS(*OSOverride); + FOS << "\nFoundDynamicIndexingNoNuri\n"; + } + } + + return modified; +} + +char DxilNonUniformResourceIndexInstrumentation::ID = 0; + +ModulePass *llvm::createDxilNonUniformResourceIndexInstrumentationPass() { + return new DxilNonUniformResourceIndexInstrumentation(); +} + +INITIALIZE_PASS(DxilNonUniformResourceIndexInstrumentation, + "hlsl-dxil-non-uniform-resource-index-instrumentation", + "HLSL DXIL NonUniformResourceIndex instrumentation for PIX", + false, false) diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp index 4f4cc7c620..bd96d83965 100644 --- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp +++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp @@ -795,87 +795,6 @@ DxilShaderAccessTracking::GetResourceFromHandle(Value *resHandle, return ret; } -static bool CheckForDynamicIndexing(OP *HlslOP, LLVMContext &Ctx, - DxilModule &DM) { - bool FoundDynamicIndexing = false; - - for (llvm::Function &F : DM.GetModule()->functions()) { - if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) { - if (F.hasName()) { - if (F.getName().find("createHandleForLib") != StringRef::npos) { - auto FunctionUses = F.uses(); - for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) { - auto &FunctionUse = *FI++; - auto FunctionUser = FunctionUse.getUser(); - auto instruction = cast(FunctionUser); - Value *resourceLoad = - instruction->getOperand(kCreateHandleForLibResOpIdx); - if (auto *load = cast(resourceLoad)) { - auto *resOrGep = load->getOperand(0); - if (isa(resOrGep)) { - FoundDynamicIndexing = true; - break; - } - } - } - } - } - } - if (FoundDynamicIndexing) { - break; - } - } - - if (!FoundDynamicIndexing) { - auto CreateHandleFn = - HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx)); - for (auto FI = CreateHandleFn->user_begin(); - FI != CreateHandleFn->user_end();) { - auto *FunctionUser = *FI++; - auto instruction = cast(FunctionUser); - Value *index = instruction->getOperand(kCreateHandleResIndexOpIdx); - if (!isa(index)) { - FoundDynamicIndexing = true; - break; - } - } - } - - if (!FoundDynamicIndexing) { - auto CreateHandleFromBindingFn = HlslOP->GetOpFunc( - DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx)); - for (auto FI = CreateHandleFromBindingFn->user_begin(); - FI != CreateHandleFromBindingFn->user_end();) { - auto *FunctionUser = *FI++; - auto instruction = cast(FunctionUser); - Value *index = - instruction->getOperand(kCreateHandleFromBindingResIndexOpIdx); - if (!isa(index)) { - FoundDynamicIndexing = true; - break; - } - } - } - - if (!FoundDynamicIndexing) { - auto CreateHandleFromHeapFn = HlslOP->GetOpFunc( - DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx)); - for (auto FI = CreateHandleFromHeapFn->user_begin(); - FI != CreateHandleFromHeapFn->user_end();) { - auto *FunctionUser = *FI++; - auto instruction = cast(FunctionUser); - Value *index = - instruction->getOperand(kCreateHandleFromHeapHeapIndexOpIdx); - if (!isa(index)) { - FoundDynamicIndexing = true; - break; - } - } - } - - return FoundDynamicIndexing; -} - bool DxilShaderAccessTracking::runOnModule(Module &M) { // This pass adds instrumentation for shader access to resources @@ -887,7 +806,13 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) { if (m_CheckForDynamicIndexing) { - bool FoundDynamicIndexing = CheckForDynamicIndexing(HlslOP, Ctx, DM); + bool FoundDynamicIndexing = false; + + PIXPassHelpers::ForEachDynamicallyIndexedResource( + DM, [&FoundDynamicIndexing](bool, Instruction *, Value *) { + FoundDynamicIndexing = true; + return false; + }); if (FoundDynamicIndexing) { if (OSOverride != nullptr) { diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp index 69385ae048..65d9a660cc 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.cpp +++ b/lib/DxilPIXPasses/PixPassHelpers.cpp @@ -512,6 +512,90 @@ unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM, } } +void ForEachDynamicallyIndexedResource( + hlsl::DxilModule &DM, + const std::function &Visitor) { + OP *HlslOP = DM.GetOP(); + LLVMContext &Ctx = DM.GetModule()->getContext(); + + for (llvm::Function &F : DM.GetModule()->functions()) { + if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) { + if (F.hasName()) { + if (F.getName().find("createHandleForLib") != StringRef::npos) { + auto FunctionUses = F.uses(); + for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) { + auto &FunctionUse = *FI++; + auto FunctionUser = FunctionUse.getUser(); + auto instruction = cast(FunctionUser); + Value *resourceLoad = instruction->getOperand( + DXIL::OperandIndex::kCreateHandleForLibResOpIdx); + if (auto *load = cast(resourceLoad)) { + auto *resOrGep = load->getOperand(0); + if (auto *gep = dyn_cast(resOrGep)) { + if (!Visitor(DxilMDHelper::IsMarkedNonUniform(gep), load, + gep->getOperand(2))) { + return; + } + } + } + } + } + } + } + } + + auto CreateHandleFn = + HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx)); + for (auto FI = CreateHandleFn->user_begin(); + FI != CreateHandleFn->user_end();) { + auto *FunctionUser = *FI++; + auto instruction = cast(FunctionUser); + Value *index = + instruction->getOperand(DXIL::OperandIndex::kCreateHandleResIndexOpIdx); + if (!isa(index)) { + const DxilInst_CreateHandle createHandle(instruction); + if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction, + index)) { + return; + } + } + } + + auto CreateHandleFromBindingFn = HlslOP->GetOpFunc( + DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx)); + for (auto FI = CreateHandleFromBindingFn->user_begin(); + FI != CreateHandleFromBindingFn->user_end();) { + auto *FunctionUser = *FI++; + auto instruction = cast(FunctionUser); + Value *index = instruction->getOperand( + DXIL::OperandIndex::kCreateHandleFromBindingResIndexOpIdx); + if (!isa(index)) { + const DxilInst_CreateHandleFromBinding createHandle(instruction); + if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction, + index)) { + return; + } + } + } + + auto CreateHandleFromHeapFn = HlslOP->GetOpFunc( + DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx)); + for (auto FI = CreateHandleFromHeapFn->user_begin(); + FI != CreateHandleFromHeapFn->user_end();) { + auto *FunctionUser = *FI++; + auto instruction = cast(FunctionUser); + Value *index = instruction->getOperand( + DXIL::OperandIndex::kCreateHandleFromHeapHeapIndexOpIdx); + if (!isa(index)) { + const DxilInst_CreateHandleFromHeap createHandle(instruction); + if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction, + index)) { + return; + } + } + } +} + #ifdef PIX_DEBUG_DUMP_HELPER static int g_logIndent = 0; diff --git a/lib/DxilPIXPasses/PixPassHelpers.h b/lib/DxilPIXPasses/PixPassHelpers.h index 4cd0e1a549..d7b0b40af8 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.h +++ b/lib/DxilPIXPasses/PixPassHelpers.h @@ -9,6 +9,7 @@ #pragma once +#include #include #include "dxc/DXIL/DxilModule.h" @@ -16,7 +17,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -//#define PIX_DEBUG_DUMP_HELPER +// #define PIX_DEBUG_DUMP_HELPER #ifdef PIX_DEBUG_DUMP_HELPER #include "dxc/Support/Global.h" #endif @@ -82,4 +83,8 @@ void ReplaceAllUsesOfInstructionWithNewValueAndDeleteInstruction( llvm::Instruction *Instr, llvm::Value *newValue, llvm::Type *newType); unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM, unsigned UpStreamSVPosRow); +void ForEachDynamicallyIndexedResource( + hlsl::DxilModule &DM, + const std::function + &Visitor); } // namespace PIXPassHelpers diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index b97aa70c05..e337d2951c 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -153,6 +153,10 @@ class PixTest : public ::testing::Test { TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs) + TEST_METHOD(NonUniformResourceIndex_Resource) + TEST_METHOD(NonUniformResourceIndex_DescriptorHeap) + TEST_METHOD(NonUniformResourceIndex_Raytracing) + dxc::DxcDllSupport m_dllSupport; VersionSupportInfo m_ver; @@ -444,6 +448,11 @@ class PixTest : public ::testing::Test { std::string RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob); CComPtr RunDxilPIXMeshShaderOutputPass(IDxcBlob *blob); CComPtr RunDxilPIXDXRInvocationsLog(IDxcBlob *blob); + std::vector + RunDxilNonUniformResourceIndexInstrumentation(IDxcBlob *blob, + std::string &outputText); + void TestNuriCase(const char *source, const wchar_t *target, + uint32_t expectedResult); void TestPixUAVCase(char const *hlsl, wchar_t const *model, wchar_t const *entry); std::string Disassemble(IDxcBlob *pProgram); @@ -671,6 +680,29 @@ CComPtr PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) { return pOptimizedModule; } +std::vector PixTest::RunDxilNonUniformResourceIndexInstrumentation( + IDxcBlob *blob, std::string &outputText) { + + CComPtr dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob); + CComPtr pOptimizer; + VERIFY_SUCCEEDED( + m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer)); + std::array Options = { + L"-opt-mod-passes", L"-dxil-dbg-value-to-dbg-declare", + L"-dxil-annotate-with-virtual-regs", + L"-hlsl-dxil-non-uniform-resource-index-instrumentation"}; + + CComPtr pOptimizedModule; + CComPtr pText; + VERIFY_SUCCEEDED(pOptimizer->RunOptimizer( + dxil, Options.data(), Options.size(), &pOptimizedModule, &pText)); + + outputText = BlobToUtf8(pText); + + const std::string disassembly = Disassemble(pOptimizedModule); + return Tokenize(disassembly, "\n"); +} + std::string PixTest::RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob) { CComPtr dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob); @@ -2983,6 +3015,193 @@ void MyMiss(inout MyPayload payload) RunDxilPIXDXRInvocationsLog(compiledLib); } +uint32_t NuriGetWaveInstructionCount(const std::vector &lines) { + // This is the instruction we'll insert into the shader if we detect dynamic + // resource indexing + const char *const waveActiveAllEqual = "call i1 @dx.op.waveActiveAllEqual"; + + uint32_t instCount = 0; + for (const std::string &line : lines) { + instCount += line.find(waveActiveAllEqual) != std::string::npos; + } + return instCount; +} + +void PixTest::TestNuriCase(const char *source, const wchar_t *target, + uint32_t expectedResult) { + + for (const OptimizationChoice &choice : OptimizationChoices) { + const std::vector compilationOptions = {choice.Flag}; + + CComPtr compiledLib = + Compile(m_dllSupport, source, target, compilationOptions); + + std::string outputText; + const std::vector dxilLines = + RunDxilNonUniformResourceIndexInstrumentation(compiledLib, outputText); + + VERIFY_ARE_EQUAL(NuriGetWaveInstructionCount(dxilLines), expectedResult); + + bool foundDynamicIndexingNoNuri = false; + const std::vector outputTextLines = Tokenize(outputText, "\n"); + for (const std::string &line : outputTextLines) { + if (line.find("FoundDynamicIndexingNoNuri") != std::string::npos) { + foundDynamicIndexingNoNuri = true; + break; + } + } + + VERIFY_ARE_EQUAL((expectedResult != 0), foundDynamicIndexingNoNuri); + } +} + +TEST_F(PixTest, NonUniformResourceIndex_Resource) { + + const char *source = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint index = uv.x * uv.y; + return tex[index].Load(int3(0, 0, 0)); +})x"; + + const char *sourceWithNuri = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint i = uv.x * uv.y; + return tex[NonUniformResourceIndex(i)].Load(int3(0, 0, 0)); +})x"; + + TestNuriCase(source, L"ps_6_0", 1); + TestNuriCase(sourceWithNuri, L"ps_6_0", 0); + + if (m_ver.SkipDxilVersion(1, 6)) { + return; + } + + TestNuriCase(source, L"ps_6_6", 1); + TestNuriCase(sourceWithNuri, L"ps_6_6", 0); +} + +TEST_F(PixTest, NonUniformResourceIndex_DescriptorHeap) { + + if (m_ver.SkipDxilVersion(1, 6)) { + return; + } + + const char *source = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint i = uv.x + uv.y; + Texture2D dynResTex = + ResourceDescriptorHeap[i]; + SamplerState dynResSampler = + SamplerDescriptorHeap[i]; + return dynResTex.Sample(dynResSampler, uv); +})x"; + + const char *sourceWithNuri = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint i = uv.x + uv.y; + Texture2D dynResTex = + ResourceDescriptorHeap[NonUniformResourceIndex(i)]; + SamplerState dynResSampler = + SamplerDescriptorHeap[NonUniformResourceIndex(i)]; + return dynResTex.Sample(dynResSampler, uv); +})x"; + + TestNuriCase(source, L"ps_6_6", 2); + TestNuriCase(sourceWithNuri, L"ps_6_6", 0); +} + +TEST_F(PixTest, NonUniformResourceIndex_Raytracing) { + + if (m_ver.SkipDxilVersion(1, 5)) { + return; + } + + const char *source = R"x( +RWTexture2D RT[] : register(u0); + +[noinline] +void FuncNoInline(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.x * rayIndex.y; + float4 c = float4(0.5, 0.5, 0.5, 0); + RT[i][rayIndex.xy] += c; +} + +void Func(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.y; + float4 c = float4(0, 1, 0, 0); + RT[i][rayIndex.xy] += c; +} + +[shader("raygeneration")] +void Main() +{ + float2 rayIndex = DispatchRaysIndex().xy; + + uint i1 = rayIndex.x; + float4 c1 = float4(1, 0, 1, 1); + RT[i1][rayIndex.xy] += c1; + + uint i2 = rayIndex.x * rayIndex.y * 0.25; + float4 c2 = float4(0.25, 0, 0.25, 0); + RT[i2][rayIndex.xy] += c2; + + Func(i1); + FuncNoInline(i2); +})x"; + + const char *sourceWithNuri = R"x( +RWTexture2D RT[] : register(u0); + +[noinline] +void FuncNoInline(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.x * rayIndex.y; + float4 c = float4(0.5, 0.5, 0.5, 0); + RT[NonUniformResourceIndex(i)][rayIndex.xy] += c; +} + +void Func(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.y; + float4 c = float4(0, 1, 0, 0); + RT[NonUniformResourceIndex(i)][rayIndex.xy] += c; +} + +[shader("raygeneration")] +void Main() +{ + float2 rayIndex = DispatchRaysIndex().xy; + + uint i1 = rayIndex.x; + float4 c1 = float4(1, 0, 1, 1); + RT[NonUniformResourceIndex(i1)][rayIndex.xy] += c1; + + uint i2 = rayIndex.x * rayIndex.y * 0.25; + float4 c2 = float4(0.25, 0, 0.25, 0); + RT[NonUniformResourceIndex(i2)][rayIndex.xy] += c2; + + Func(i1); + FuncNoInline(i2); +})x"; + + TestNuriCase(source, L"lib_6_5", 4); + TestNuriCase(sourceWithNuri, L"lib_6_5", 0); +} + TEST_F(PixTest, DebugInstrumentation_TextOutput) { const char *source = R"x( diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 691c3ba58f..0008b752b1 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -6340,6 +6340,12 @@ def add_pass(name, type_name, doc, opts): "HLSL DXIL Logs all non-RayGen DXR 1.0 invocations into a UAV", [{"n": "maxNumEntriesInLog", "t": "int", "c": 1}], ) + add_pass( + "hlsl-dxil-non-uniform-resource-index-instrumentation", + "DxilNonUniformResourceIndexInstrumentation", + "HLSL DXIL NonUniformResourceIndex instrumentation for PIX", + [], + ) category_lib = "dxil_gen" From c940161bb3398ff988fafc343ed1623d4a3fad6c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 11:19:24 -0700 Subject: [PATCH 18/19] Bump cryptography from 43.0.1 to 44.0.1 in /utils/git (#7220) Bumps [cryptography](https://github.com/pyca/cryptography) from 43.0.1 to 44.0.1.
Changelog

Sourced from cryptography's changelog.

44.0.1 - 2025-02-11


* Updated Windows, macOS, and Linux wheels to be compiled with OpenSSL
3.4.1.
* We now build ``armv7l`` ``manylinux`` wheels and publish them to PyPI.
* We now build ``manylinux_2_34`` wheels and publish them to PyPI.

.. _v44-0-0:

44.0.0 - 2024-11-27

  • BACKWARDS INCOMPATIBLE: Dropped support for LibreSSL < 3.9.
  • Deprecated Python 3.7 support. Python 3.7 is no longer supported by the Python core team. Support for Python 3.7 will be removed in a future cryptography release.
  • Updated Windows, macOS, and Linux wheels to be compiled with OpenSSL 3.4.0.
  • macOS wheels are now built against the macOS 10.13 SDK. Users on older versions of macOS should upgrade, or they will need to build cryptography themselves.
  • Enforce the :rfc:5280 requirement that extended key usage extensions must not be empty.
  • Added support for timestamp extraction to the :class:~cryptography.fernet.MultiFernet class.
  • Relax the Authority Key Identifier requirements on root CA certificates during X.509 verification to allow fields permitted by :rfc:5280 but forbidden by the CA/Browser BRs.
  • Added support for :class:~cryptography.hazmat.primitives.kdf.argon2.Argon2id when using OpenSSL 3.2.0+.
  • Added support for the :class:~cryptography.x509.Admissions certificate extension.
  • Added basic support for PKCS7 decryption (including S/MIME 3.2) via :func:~cryptography.hazmat.primitives.serialization.pkcs7.pkcs7_decrypt_der, :func:~cryptography.hazmat.primitives.serialization.pkcs7.pkcs7_decrypt_pem, and :func:~cryptography.hazmat.primitives.serialization.pkcs7.pkcs7_decrypt_smime.

.. _v43-0-3:

43.0.3 - 2024-10-18


* Fixed release metadata for ``cryptography-vectors``

.. _v43-0-2:

43.0.2 - 2024-10-18

  • Fixed compilation when using LibreSSL 4.0.0.

.. _v43-0-1:

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=cryptography&package-manager=pip&previous-version=43.0.1&new-version=44.0.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/microsoft/DirectXShaderCompiler/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- utils/git/requirements_formatting.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/git/requirements_formatting.txt b/utils/git/requirements_formatting.txt index 06db8176c9..6f3e07dcf2 100644 --- a/utils/git/requirements_formatting.txt +++ b/utils/git/requirements_formatting.txt @@ -18,7 +18,7 @@ charset-normalizer==3.2.0 # via requests click==8.1.7 # via black -cryptography==43.0.1 +cryptography==44.0.1 # via pyjwt darker==1.7.2 # via -r llvm/utils/git/requirements_formatting.txt.in From f69f2810e3afe9b54fd6c9fb7aecd5f5fb4634d5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 9 Apr 2025 15:48:17 +0000 Subject: [PATCH 19/19] chore: autopublish 2025-04-09T15:48:17Z --- lib/DxilValidation/DxilValidation.cpp | 3 +- lib/HLSL/DxilLinker.cpp | 2 +- lib/HLSL/HLOperationLower.cpp | 149 ++++++++++++++------------ tools/clang/lib/Sema/SemaHLSL.cpp | 26 +++-- 4 files changed, 100 insertions(+), 80 deletions(-) diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 9e8f8574ac..a788f21d4e 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -2723,7 +2723,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { } // Instructions must be allowed. - if (!IsLLVMInstructionAllowed(I) || !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) { + if (!IsLLVMInstructionAllowed(I) || + !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) { if (!IsLLVMInstructionAllowedForLib(I, ValCtx)) { ValCtx.EmitInstrError(&I, ValidationRule::InstrAllowed); continue; diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp index c4dae4b69f..c58a2e909a 100644 --- a/lib/HLSL/DxilLinker.cpp +++ b/lib/HLSL/DxilLinker.cpp @@ -1278,7 +1278,7 @@ void DxilLinkJob::RunPreparePass(Module &M) { PM.add(createScalarizerPass()); // Need dxilelimvector for pre 6.9 - //PM.add(createDxilEliminateVectorPass()); + // PM.add(createDxilEliminateVectorPass()); PM.add(createPromoteMemoryToRegisterPass()); diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 6078455805..a68bddaf32 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -485,11 +485,11 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, } Value *TrivialDxilVectorOperation(Function *dxilFunc, OP::OpCode opcode, - ArrayRef refArgs, Type *Ty, - OP *hlslOP, IRBuilder<> &Builder) { + ArrayRef refArgs, Type *Ty, + OP *hlslOP, IRBuilder<> &Builder) { if (!Ty->isVoidTy()) { Value *retVal = - Builder.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode)); + Builder.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode)); return retVal; } else { // Cannot add name to void. @@ -497,20 +497,22 @@ Value *TrivialDxilVectorOperation(Function *dxilFunc, OP::OpCode opcode, } } - -Value *TrivialDxilVectorUnaryOperationRet(OP::OpCode opcode, Value *src, Type *Ty, - OP *hlslOP, IRBuilder<> &Builder) { +Value *TrivialDxilVectorUnaryOperationRet(OP::OpCode opcode, Value *src, + Type *Ty, OP *hlslOP, + IRBuilder<> &Builder) { Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *args[] = {opArg, src}; Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); - return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, Builder); + return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, + Builder); } -Value *TrivialDxilVectorBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { +Value *TrivialDxilVectorBinaryOperation(OP::OpCode opcode, Value *src0, + Value *src1, hlsl::OP *hlslOP, + IRBuilder<> &Builder) { Type *Ty = src0->getType(); Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); @@ -518,7 +520,8 @@ Value *TrivialDxilVectorBinaryOperation(OP::OpCode opcode, Value *src0, Value *s Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); - return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, Builder); + return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, + Builder); } Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy, @@ -547,24 +550,26 @@ Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); } -Value *TrivialDxilTrinaryOperationRet(OP::OpCode opcode, Value *src0, Value *src1, - Value *src2, Type *Ty, hlsl::OP *hlslOP, - IRBuilder<> &Builder) { +Value *TrivialDxilTrinaryOperationRet(OP::OpCode opcode, Value *src0, + Value *src1, Value *src2, Type *Ty, + hlsl::OP *hlslOP, IRBuilder<> &Builder) { Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *args[] = {opArg, src0, src1, src2}; return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); } -Value *TrivialDxilVectorTrinaryOperationRet(OP::OpCode opcode, Value *src0, Value *src1, - Value *src2, Type *Ty, hlsl::OP *hlslOP, - IRBuilder<> &Builder) { +Value *TrivialDxilVectorTrinaryOperationRet(OP::OpCode opcode, Value *src0, + Value *src1, Value *src2, Type *Ty, + hlsl::OP *hlslOP, + IRBuilder<> &Builder) { Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *args[] = {opArg, src0, src1, src2}; Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); - return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, Builder); + return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, + Builder); } Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -579,22 +584,20 @@ Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return retVal; } -Value *TrivialVectorizableUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { +Value *TrivialVectorizableUnaryOperation( + CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, + HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); Type *Ty = CI->getType(); IRBuilder<> Builder(CI); hlsl::OP *hlslOP = &helper.hlslOP; - if (Ty->isVectorTy() && - helper.M.GetShaderModel()->IsSM69Plus()) - return TrivialDxilVectorUnaryOperationRet(opcode, src0, Ty, - hlslOP, Builder); + if (Ty->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus()) + return TrivialDxilVectorUnaryOperationRet(opcode, src0, Ty, hlslOP, + Builder); else - return TrivialDxilUnaryOperationRet(opcode, src0, Ty, - hlslOP, Builder); + return TrivialDxilUnaryOperationRet(opcode, src0, Ty, hlslOP, Builder); } Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -611,10 +614,11 @@ Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return binOp; } -Value *TrivialVectorBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { +Value *TrivialVectorBinaryOperation(CallInst *CI, IntrinsicOp IOP, + OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); @@ -626,9 +630,9 @@ Value *TrivialVectorBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode op } Value *TranslateFMA(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; Type *Ty = CI->getType(); Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); @@ -636,11 +640,12 @@ Value *TranslateFMA(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); IRBuilder<> Builder(CI); - if (Ty->isVectorTy() && - helper.M.GetShaderModel()->IsSM69Plus()) - return TrivialDxilVectorTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, Builder); + if (Ty->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus()) + return TrivialDxilVectorTrinaryOperationRet(opcode, src0, src1, src2, Ty, + hlslOP, Builder); else - return TrivialDxilTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, Builder); + return TrivialDxilTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, + Builder); } Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -1986,15 +1991,16 @@ Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, IRBuilder<> Builder(CI); // min(max(x, minVal), maxVal). - if (Ty->isVectorTy() && - helper.M.GetShaderModel()->IsSM69Plus()) { + if (Ty->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus()) { Value *maxXMinVal = - TrivialDxilVectorBinaryOperation(maxOp, x, minVal, hlslOP, Builder); - return TrivialDxilVectorBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder); + TrivialDxilVectorBinaryOperation(maxOp, x, minVal, hlslOP, Builder); + return TrivialDxilVectorBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, + Builder); } else { Value *maxXMinVal = - TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder); - return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder); + TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder); + return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, + Builder); } } @@ -2308,11 +2314,12 @@ Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst); } val = Builder.CreateFMul(log2eConst, val); - if (Ty->isVectorTy() && - helper.M.GetShaderModel()->IsSM69Plus()) - return TrivialDxilVectorUnaryOperationRet(OP::OpCode::Exp, val, Ty, hlslOP, Builder); + if (Ty->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus()) + return TrivialDxilVectorUnaryOperationRet(OP::OpCode::Exp, val, Ty, hlslOP, + Builder); else - return TrivialDxilUnaryOperationRet(OP::OpCode::Exp, val, Ty, hlslOP, Builder); + return TrivialDxilUnaryOperationRet(OP::OpCode::Exp, val, Ty, hlslOP, + Builder); } Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2328,11 +2335,12 @@ Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const); } Value *log = nullptr; - if (Ty->isVectorTy() && - helper.M.GetShaderModel()->IsSM69Plus()) - log = TrivialDxilVectorUnaryOperationRet(OP::OpCode::Log, val, Ty, hlslOP, Builder); + if (Ty->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus()) + log = TrivialDxilVectorUnaryOperationRet(OP::OpCode::Log, val, Ty, hlslOP, + Builder); else - log = TrivialDxilUnaryOperationRet(OP::OpCode::Log, val, Ty, hlslOP, Builder); + log = + TrivialDxilUnaryOperationRet(OP::OpCode::Log, val, Ty, hlslOP, Builder); return Builder.CreateFMul(ln2Const, log); } @@ -2392,13 +2400,12 @@ Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, break; } } - if (CI->getType()->isVectorTy() && - helper.M.GetShaderModel()->IsSM69Plus()) + if (CI->getType()->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus()) return TrivialVectorBinaryOperation(CI, IOP, opcode, helper, pObjHelper, - Translated); + Translated); else return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, - Translated); + Translated); } Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2423,7 +2430,8 @@ Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); IRBuilder<> Builder(CI); - return TrivialDxilTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, Builder); + return TrivialDxilTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, + Builder); } Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2547,9 +2555,8 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1, // Instead of using a DXIL intrinsic, implement a dot product operation using // multiply and add operations. Used for integer dots and long vectors. -Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, - hlsl::OP *hlslOP, IRBuilder<> &Builder, - bool Unsigned = false) { +Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, + IRBuilder<> &Builder, bool Unsigned = false) { auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad; if (arg0->getType()->getScalarType()->isFloatingPointTy()) madOpCode = DXIL::OpCode::FMad; @@ -2559,8 +2566,8 @@ Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, for (unsigned Elt = 1; Elt < vecSize; ++Elt) { Elt0 = Builder.CreateExtractElement(arg0, Elt); Elt1 = Builder.CreateExtractElement(arg1, Elt); - Result = TrivialDxilTrinaryOperationRet(madOpCode, Elt0, Elt1, Result, Elt0->getType(), hlslOP, - Builder); + Result = TrivialDxilTrinaryOperationRet(madOpCode, Elt0, Elt1, Result, + Elt0->getType(), hlslOP, Builder); } return Result; @@ -2598,11 +2605,12 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, unsigned vecSize = Ty->getVectorNumElements(); Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); - if (Ty->getScalarType()->isFloatingPointTy() && Ty->getVectorNumElements() <= 4) { + if (Ty->getScalarType()->isFloatingPointTy() && + Ty->getVectorNumElements() <= 4) { return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); } else { return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_udot); + IOP == IntrinsicOp::IOP_udot); } } @@ -2785,8 +2793,9 @@ Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3); // Msad on vecref and byteSrc. - return TrivialDxilTrinaryOperationRet(DXIL::OpCode::Msad, vecRef, byteSrc, accum, - vecRef->getType(), hlslOP, Builder); + return TrivialDxilTrinaryOperationRet(DXIL::OpCode::Msad, vecRef, byteSrc, + accum, vecRef->getType(), hlslOP, + Builder); } Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -3151,7 +3160,7 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); } else { return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_umul); + IOP == IntrinsicOp::IOP_umul); } } else { // mul(vector, scalar) == vector * scalar-splat @@ -6554,7 +6563,8 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble}, {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes}, - {IntrinsicOp::IOP_atan, TrivialVectorizableUnaryOperation, DXIL::OpCode::Atan}, + {IntrinsicOp::IOP_atan, TrivialVectorizableUnaryOperation, + DXIL::OpCode::Atan}, {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi}, {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes}, @@ -6645,7 +6655,8 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt}, {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan}, - {IntrinsicOp::IOP_tanh, TrivialVectorizableUnaryOperation, DXIL::OpCode::Htan}, + {IntrinsicOp::IOP_tanh, TrivialVectorizableUnaryOperation, + DXIL::OpCode::Htan}, {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes}, diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 2ad97dcd9e..1ef555c6df 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -389,7 +389,7 @@ enum ArBasicKind { (IS_BPROP_AINT(_Props) && GET_BPROP_BITS(_Props) != BPROP_BITS12) #define IS_BPROP_ENUM(_Props) (((_Props)&BPROP_ENUM) != 0) -#define IS_BPROP_RAWBUFFER(_Props) (((_Props)&BPROP_RAWBUFFER) != 0) +#define IS_BPROP_RAWBUFFER(_Props) (((_Props) & BPROP_RAWBUFFER) != 0) const UINT g_uBasicKindProps[] = { BPROP_PRIMITIVE | BPROP_BOOLEAN | BPROP_INTEGER | BPROP_NUMERIC | @@ -518,14 +518,22 @@ const UINT g_uBasicKindProps[] = { BPROP_OBJECT | BPROP_RWBUFFER | BPROP_TEXTURE, // AR_OBJECT_RWTEXTURE3D BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_RWBUFFER - BPROP_OBJECT | BPROP_RBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_BYTEADDRESS_BUFFER - BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_RWBYTEADDRESS_BUFFER - BPROP_OBJECT | BPROP_RBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_STRUCTURED_BUFFER - BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER - BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER_ALLOC - BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER_CONSUME - BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_APPEND_STRUCTURED_BUFFER - BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_CONSUME_STRUCTURED_BUFFER + BPROP_OBJECT | BPROP_RBUFFER | + BPROP_RAWBUFFER, // AR_OBJECT_BYTEADDRESS_BUFFER + BPROP_OBJECT | BPROP_RWBUFFER | + BPROP_RAWBUFFER, // AR_OBJECT_RWBYTEADDRESS_BUFFER + BPROP_OBJECT | BPROP_RBUFFER | + BPROP_RAWBUFFER, // AR_OBJECT_STRUCTURED_BUFFER + BPROP_OBJECT | BPROP_RWBUFFER | + BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER + BPROP_OBJECT | BPROP_RWBUFFER | + BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER_ALLOC + BPROP_OBJECT | BPROP_RWBUFFER | + BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER_CONSUME + BPROP_OBJECT | BPROP_RWBUFFER | + BPROP_RAWBUFFER, // AR_OBJECT_APPEND_STRUCTURED_BUFFER + BPROP_OBJECT | BPROP_RWBUFFER | + BPROP_RAWBUFFER, // AR_OBJECT_CONSUME_STRUCTURED_BUFFER BPROP_OBJECT | BPROP_RBUFFER, // AR_OBJECT_CONSTANT_BUFFER BPROP_OBJECT | BPROP_RBUFFER, // AR_OBJECT_TEXTURE_BUFFER