diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 01ad1577b7..6cbdeb6270 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1,2 @@
-* @microsoft/hlsl-release
+# Uncomment the next line in release branches after ask-mode begins
+# * @microsoft/hlsl-release
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 74244c1d58..0977fa1246 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,15 +17,6 @@ if(POLICY CMP0022)
   cmake_policy(SET CMP0022 NEW) # automatic when 2.8.12 is required
 endif()
 
-if (POLICY CMP0051)
-  # CMake 3.1 and higher include generator expressions of the form
-  # $<TARGETLIB:obj> in the SOURCES property.  These need to be
-  # stripped everywhere that access the SOURCES property, so we just
-  # defer to the OLD behavior of not including generator expressions
-  # in the output for now.
-  cmake_policy(SET CMP0051 OLD)
-endif()
-
 if(CMAKE_VERSION VERSION_LESS 3.1.20141117)
   set(cmake_3_2_USES_TERMINAL)
 else()
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 233211f150..840b4f0f17 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -40,10 +40,32 @@ Before submitting a feature or substantial code contribution please discuss it w
 
 ### Coding guidelines
 
-The coding, style, and general engineering guidelines follow those described in the docs/CodingStandards.rst. For additional guidelines in code specific to HLSL, see the docs/HLSLChanges.rst file.
+The coding, style, and general engineering guidelines follow those described in the [LLVM Coding Standards](docs/CodingStandards.rst). For additional guidelines in code specific to HLSL, see the [HLSL Changes](docs/HLSLChanges.rst) docs.
 
 DXC has adopted a clang-format requirement for all incoming changes to C and C++ files. PRs to DXC should have the *changed code* clang formatted to the LLVM style, and leave the remaining portions of the file unchanged. This can be done using the `git-clang-format` tool or IDE driven workflows. A GitHub action will run on all PRs to validate that the change is properly formatted.
 
+#### Applying LLVM Standards
+
+All new code contributed to DXC should follow the LLVM coding standards.
+
+Note that the LLVM Coding Standards have a golden rule:
+
+> **If you are extending, enhancing, or bug fixing already implemented code, use the style that is already being used so that the source is uniform and easy to follow.**
+
+The golden rule should continue to be applied to places where DXC is self-consistent. A good example is DXC's common use of `PascalCase` instead of `camelCase` for APIs in some parts of the HLSL implementation. In any place where DXC is not self-consistent new code should follow the LLVM Coding Standard.
+
+A good secondary rule to follow is:
+
+> **When in doubt, follow LLVM.**
+
+Adopting LLVM's coding standards provides a consistent set of rules and guidelines to hold all contributions to. This allows patch authors to clearly understand the expectations placed on contributions, and allows reviewers to have a bar to measure contributions against. Aligning with LLVM by default ensures the path of least resistance for everyone.
+
+Since many of the LLVM Coding Standards are not enforced automatically we rely on code reviews to provide feedback and ensure contributions align with the expected coding standards. Since we rely on reviewers for enforcement and humans make mistakes, please keep in mind:
+
+> **Code review is a conversation.**
+
+It is completely reasonable for a patch author to question feedback and provide additional context about why something was done the way it was. Reviewers often see narrow slices in diffs rather than the full context of a file or part of the compiler, so they may not always provide perfect feedback. This is especially true with the application of the "golden rule" since it depends on understanding a wider context.
+
 ### Documenting Pull Requests
 
 Pull request descriptions should have the following format:
diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst
index 899b587492..b5e9c05079 100644
--- a/docs/SPIR-V.rst
+++ b/docs/SPIR-V.rst
@@ -320,6 +320,7 @@ Supported extensions
 * SPV_KHR_maximal_reconvergence
 * SPV_KHR_float_controls
 * SPV_NV_shader_subgroup_partitioned
+* SPV_KHR_quad_control
 
 Vulkan specific attributes
 --------------------------
@@ -4008,6 +4009,8 @@ Quad          ``QuadReadAcrossX()``        ``OpGroupNonUniformQuadSwap``
 Quad          ``QuadReadAcrossY()``        ``OpGroupNonUniformQuadSwap``
 Quad          ``QuadReadAcrossDiagonal()`` ``OpGroupNonUniformQuadSwap``
 Quad          ``QuadReadLaneAt()``         ``OpGroupNonUniformQuadBroadcast``
+Quad          ``QuadAny()``                ``OpGroupNonUniformQuadAnyKHR``
+Quad          ``QuadAll()``                ``OpGroupNonUniformQuadAllKHR``
 N/A           ``WaveMatch()``              ``OpGroupNonUniformPartitionNV``
 Multiprefix   ``WaveMultiPrefixSum()``     ``OpGroupNonUniform*Add``           ``PartitionedExclusiveScanNV``
 Multiprefix   ``WaveMultiPrefixProduct()`` ``OpGroupNonUniform*Mul``           ``PartitionedExclusiveScanNV``
@@ -4016,6 +4019,11 @@ Multiprefix   ``WaveMultiPrefixBitOr()``   ``OpGroupNonUniformLogicalOr``      `
 Multiprefix   ``WaveMultiPrefixBitXor()``  ``OpGroupNonUniformLogicalXor``     ``PartitionedExclusiveScanNV``
 ============= ============================ =================================== ==============================
 
+``QuadAny`` and ``QuadAll`` will use the ``OpGroupNonUniformQuadAnyKHR`` and
+``OpGroupNonUniformQuadAllKHR`` instructions if the ``SPV_KHR_quad_control``
+extension is enabled. If it is not, they will fall back to constructing the
+value using multiple calls to ``OpGroupNonUniformQuadBroadcast``.
+
 The Implicit ``vk`` Namespace
 =============================
 
diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools
index 393d5c7df1..4bd1536ed7 160000
--- a/external/SPIRV-Tools
+++ b/external/SPIRV-Tools
@@ -1 +1 @@
-Subproject commit 393d5c7df150532045c50affffea2df22e8231b0
+Subproject commit 4bd1536ed79003a5194a4bd8c9aa2fa17a84c15b
diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index 4bf98e3771..4f8c521851 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -487,9 +487,6 @@ inline bool IsFeedbackTexture(DXIL::ResourceKind ResourceKind) {
 // Enumeration for operations specified by DXIL
 enum class OpCode : unsigned {
   //
-  RawBufferVectorLoad = 303, // reads from a raw buffer and structured buffer
-  RawBufferVectorStore =
-      304,           // writes to a RWByteAddressBuffer or RWStructuredBuffer
   Reserved0 = 226,   // Reserved
   Reserved1 = 227,   // Reserved
   Reserved10 = 236,  // Reserved
@@ -901,8 +898,11 @@ enum class OpCode : unsigned {
   GetDimensions = 72,   // gets texture size information
   RawBufferLoad = 139,  // reads from a raw buffer and structured buffer
   RawBufferStore = 140, // writes to a RWByteAddressBuffer or RWStructuredBuffer
-  TextureLoad = 66,     // reads texel data without any filtering or sampling
-  TextureStore = 67,    // reads texel data without any filtering or sampling
+  RawBufferVectorLoad = 303, // reads from a raw buffer and structured buffer
+  RawBufferVectorStore =
+      304,           // writes to a RWByteAddressBuffer or RWStructuredBuffer
+  TextureLoad = 66,  // reads texel data without any filtering or sampling
+  TextureStore = 67, // reads texel data without any filtering or sampling
   TextureStoreSample = 225, // stores texel data at specified sample index
 
   // Sampler Feedback
@@ -1046,7 +1046,6 @@ enum class OpCode : unsigned {
   NumOpCodes_Dxil_1_6 = 222,
   NumOpCodes_Dxil_1_7 = 226,
   NumOpCodes_Dxil_1_8 = 258,
-  NumOpCodes_Dxil_1_9 = 305,
 
   NumOpCodes = 305 // exclusive last value of enumeration
 };
@@ -1060,8 +1059,6 @@ enum class OpCode : unsigned {
 // Groups for DXIL operations with equivalent function templates
 enum class OpCodeClass : unsigned {
   //
-  RawBufferVectorLoad,
-  RawBufferVectorStore,
   Reserved,
 
   // Amplification shader instructions
@@ -1284,6 +1281,8 @@ enum class OpCodeClass : unsigned {
   GetDimensions,
   RawBufferLoad,
   RawBufferStore,
+  RawBufferVectorLoad,
+  RawBufferVectorStore,
   TextureLoad,
   TextureStore,
   TextureStoreSample,
@@ -1361,7 +1360,6 @@ enum class OpCodeClass : unsigned {
   NumOpClasses_Dxil_1_6 = 149,
   NumOpClasses_Dxil_1_7 = 153,
   NumOpClasses_Dxil_1_8 = 174,
-  NumOpClasses_Dxil_1_9 = 179,
 
   NumOpClasses = 179 // exclusive last value of enumeration
 };
@@ -1422,6 +1420,12 @@ const unsigned kRawBufferLoadElementOffsetOpIdx = 3;
 const unsigned kRawBufferLoadMaskOpIdx = 4;
 const unsigned kRawBufferLoadAlignmentOpIdx = 5;
 
+// RawBufferVectorLoad.
+const unsigned kRawBufferVectorLoadHandleOpIdx = 1;
+const unsigned kRawBufferVectorLoadIndexOpIdx = 2;
+const unsigned kRawBufferVectorLoadElementOffsetOpIdx = 3;
+const unsigned kRawBufferVectorLoadAlignmentOpIdx = 4;
+
 // RawBufferStore
 const unsigned kRawBufferStoreHandleOpIdx = 1;
 const unsigned kRawBufferStoreIndexOpIdx = 2;
@@ -1433,6 +1437,13 @@ const unsigned kRawBufferStoreVal3OpIdx = 7;
 const unsigned kRawBufferStoreMaskOpIdx = 8;
 const unsigned kRawBufferStoreAlignmentOpIdx = 9;
 
+// RawBufferVectorStore
+const unsigned kRawBufferVectorStoreHandleOpIdx = 1;
+const unsigned kRawBufferVectorStoreIndexOpIdx = 2;
+const unsigned kRawBufferVectorStoreElementOffsetOpIdx = 3;
+const unsigned kRawBufferVectorStoreValOpIdx = 4;
+const unsigned kRawBufferVectorStoreAlignmentOpIdx = 5;
+
 // TextureStore.
 const unsigned kTextureStoreHandleOpIdx = 1;
 const unsigned kTextureStoreCoord0OpIdx = 2;
diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h
index 0bd855ae58..05021ce789 100644
--- a/include/dxc/DXIL/DxilOperations.h
+++ b/include/dxc/DXIL/DxilOperations.h
@@ -162,9 +162,9 @@ class OP {
 
   static bool IsDxilOpExtendedOverload(OpCode C);
 
-  // Return true if the overload name for this operation may be constructed
-  // based on a type name that may not represent the same type in different
-  // modules.
+  // Return true if the overload name suffix for this operation may be
+  // constructed based on a user-defined or user-influenced type name
+  // that may not represent the same type in different linked modules.
   static bool MayHaveNonCanonicalOverload(OpCode OC);
 
 private:
diff --git a/include/dxc/DxilPIXPasses/DxilPIXPasses.h b/include/dxc/DxilPIXPasses/DxilPIXPasses.h
index ad0ddfdfd2..5cc7c4aa50 100644
--- a/include/dxc/DxilPIXPasses/DxilPIXPasses.h
+++ b/include/dxc/DxilPIXPasses/DxilPIXPasses.h
@@ -27,6 +27,7 @@ ModulePass *createDxilDebugInstrumentationPass();
 ModulePass *createDxilShaderAccessTrackingPass();
 ModulePass *createDxilPIXAddTidToAmplificationShaderPayloadPass();
 ModulePass *createDxilPIXDXRInvocationsLogPass();
+ModulePass *createDxilNonUniformResourceIndexInstrumentationPass();
 
 void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry &);
 void initializeDxilDbgValueToDbgDeclarePass(llvm::PassRegistry &);
@@ -41,5 +42,7 @@ void initializeDxilShaderAccessTrackingPass(llvm::PassRegistry &);
 void initializeDxilPIXAddTidToAmplificationShaderPayloadPass(
     llvm::PassRegistry &);
 void initializeDxilPIXDXRInvocationsLogPass(llvm::PassRegistry &);
+void initializeDxilNonUniformResourceIndexInstrumentationPass(
+    llvm::PassRegistry &);
 
 } // namespace llvm
diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h
index 90f3fafd79..68b88822e8 100644
--- a/include/dxc/HlslIntrinsicOp.h
+++ b/include/dxc/HlslIntrinsicOp.h
@@ -231,6 +231,9 @@ enum class IntrinsicOp {
   IOP_VkReadClock = 223,
   IOP_Vkext_execution_mode = 224,
   IOP_Vkext_execution_mode_id = 225,
+  IOP_Vkreinterpret_pointer_cast = 360,
+  IOP_Vkstatic_pointer_cast = 361,
+  MOP_GetBufferContents = 362,
   MOP_Append = 226,
   MOP_RestartStrip = 227,
   MOP_CalculateLevelOfDetail = 228,
@@ -366,7 +369,7 @@ enum class IntrinsicOp {
   IOP_usign = 355,
   MOP_InterlockedUMax = 356,
   MOP_InterlockedUMin = 357,
-  Num_Intrinsics = 360,
+  Num_Intrinsics = 363,
 };
 inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
   switch (opcode) {
diff --git a/include/dxc/WinAdapter.h b/include/dxc/WinAdapter.h
index b8c6646871..d02ad1ac38 100644
--- a/include/dxc/WinAdapter.h
+++ b/include/dxc/WinAdapter.h
@@ -51,7 +51,8 @@
 #define _countof(a) (sizeof(a) / sizeof(*(a)))
 
 // If it is GCC, there is no UUID support and we must emulate it.
-#ifndef __clang__
+// Clang support depends on the -fms-extensions compiler flag.
+#if !defined(__clang__) || !defined(_MSC_EXTENSIONS)
 #define __EMULATE_UUID 1
 #endif // __clang__
 
diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h
index bf8a040673..d37054194b 100644
--- a/include/dxc/dxcapi.internal.h
+++ b/include/dxc/dxcapi.internal.h
@@ -7,6 +7,9 @@
 //                                                                           //
 // Provides non-public declarations for the DirectX Compiler component.      //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
 #ifndef __DXC_API_INTERNAL__
@@ -35,6 +38,7 @@ typedef struct ID3D10Blob ID3D10Blob;
 static const BYTE INTRIN_TEMPLATE_FROM_TYPE = 0xff;
 static const BYTE INTRIN_TEMPLATE_VARARGS = 0xfe;
 static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION = 0xfd;
+static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION_2 = 0xfc;
 
 // Use this enumeration to describe allowed templates (layouts) in intrinsics.
 enum LEGAL_INTRINSIC_TEMPLATES {
@@ -128,7 +132,12 @@ enum LEGAL_INTRINSIC_COMPTYPES {
 
   LICOMPTYPE_HIT_OBJECT = 51,
 
+#ifdef ENABLE_SPIRV_CODEGEN
+  LICOMPTYPE_VK_BUFFER_POINTER = 52,
+  LICOMPTYPE_COUNT = 53
+#else
   LICOMPTYPE_COUNT = 52
+#endif
 };
 
 static const BYTE IA_SPECIAL_BASE = 0xf0;
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index 5e0757aac1..e058ece8f5 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -2633,6 +2633,8 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      0,
      {},
      {}}, // Overloads: v
+
+    // Resources
     {OC::RawBufferVectorLoad,
      "RawBufferVectorLoad",
      OCC::RawBufferVectorLoad,
@@ -2816,8 +2818,7 @@ bool OP::IsOverloadLegal(OpCode opCode, Type *pType) {
   if (OpProps.NumOverloadDims > 1) {
     StructType *ST = dyn_cast<StructType>(pType);
     // Make sure multi-overload is well-formed.
-    if (!ST || ST->hasName() ||
-        ST->getNumContainedTypes() != OpProps.NumOverloadDims)
+    if (!ST || ST->hasName() || ST->getNumElements() != OpProps.NumOverloadDims)
       return false;
     for (unsigned I = 0; I < ST->getNumElements(); ++I)
       Types[I] = ST->getElementType(I);
@@ -5757,6 +5758,8 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pV);
     A(pI32);
     break;
+
+    // Resources
   case OpCode::RawBufferVectorLoad:
     RRT(pETy);
     A(pI32);
@@ -6224,11 +6227,12 @@ bool OP::IsResRetType(llvm::Type *Ty) {
     if (Ty == ResTy)
       return true;
   }
+  // Check for vector overload which isn't cached in m_pResRetType.
   StructType *ST = cast<StructType>(Ty);
-  if (!ST->hasName() || ST->getNumContainedTypes() < 2)
+  if (!ST->hasName() || ST->getNumElements() < 2 ||
+      !ST->getElementType(0)->isVectorTy())
     return false;
-  return Ty == GetResRetType(ST->getContainedType(0));
-  return false;
+  return Ty == GetResRetType(ST->getElementType(0));
 }
 
 Type *OP::GetResRetType(Type *pOverloadType) {
diff --git a/lib/DxilPIXPasses/CMakeLists.txt b/lib/DxilPIXPasses/CMakeLists.txt
index c36d11d559..67e77f17cd 100644
--- a/lib/DxilPIXPasses/CMakeLists.txt
+++ b/lib/DxilPIXPasses/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMDxilPIXPasses
   PixPassHelpers.cpp
   DxilPIXAddTidToAmplificationShaderPayload.cpp
   DxilPIXDXRInvocationsLog.cpp
+  DxilNonUniformResourceIndexInstrumentation.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR
diff --git a/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp
new file mode 100644
index 0000000000..a442bfabed
--- /dev/null
+++ b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp
@@ -0,0 +1,173 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilNonUniformResourceIndexInstrumentation.cpp                            //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides a pass to add instrumentation to determine missing usage of the  //
+// NonUniformResourceIndex qualifier when dynamically indexing resources.    //
+// Used by PIX.                                                              //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "PixPassHelpers.h"
+#include "dxc/DXIL/DxilInstructions.h"
+#include "dxc/DxilPIXPasses/DxilPIXPasses.h"
+#include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h"
+#include "dxc/Support/Global.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+class DxilNonUniformResourceIndexInstrumentation : public ModulePass {
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilNonUniformResourceIndexInstrumentation() : ModulePass(ID) {}
+  StringRef getPassName() const override {
+    return "DXIL NonUniformResourceIndex Instrumentation";
+  }
+  bool runOnModule(Module &M) override;
+};
+
+bool DxilNonUniformResourceIndexInstrumentation::runOnModule(Module &M) {
+  // This pass adds instrumentation for incorrect NonUniformResourceIndex usage
+
+  DxilModule &DM = M.GetOrCreateDxilModule();
+  LLVMContext &Ctx = M.getContext();
+  OP *HlslOP = DM.GetOP();
+
+  hlsl::DxilResource *PixUAVResource = nullptr;
+
+  UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
+
+  // Use WaveActiveAllEqual to check if a dynamic index is uniform
+  Function *WaveActiveAllEqualFunc = HlslOP->GetOpFunc(
+      DXIL::OpCode::WaveActiveAllEqual, Type::getInt32Ty(Ctx));
+  Constant *WaveActiveAllEqualOpCode =
+      HlslOP->GetI32Const((int32_t)DXIL::OpCode::WaveActiveAllEqual);
+
+  // Atomic operation to use for writing to the result uav resource
+  Function *AtomicOpFunc =
+      HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx));
+  Constant *AtomicBinOpcode =
+      HlslOP->GetU32Const((uint32_t)OP::OpCode::AtomicBinOp);
+  Constant *AtomicOr = HlslOP->GetU32Const((uint32_t)DXIL::AtomicBinOpCode::Or);
+
+  std::map<Function *, CallInst *> FunctionToUAVHandle;
+
+  // This is the main pass that will iterate through all of the resources that
+  // are dynamically indexed. If not already marked NonUniformResourceIndex,
+  // then insert WaveActiveAllEqual to determine if the index is uniform
+  // and finally write to a UAV resource with the result.
+
+  PIXPassHelpers::ForEachDynamicallyIndexedResource(
+      DM, [&](bool IsNonUniformIndex, Instruction *CreateHandle,
+              Value *IndexOperand) {
+        if (IsNonUniformIndex) {
+          // The NonUniformResourceIndex qualifier was used, continue.
+          return true;
+        }
+
+        if (!PixUAVResource) {
+          PixUAVResource =
+              PIXPassHelpers::CreateGlobalUAVResource(DM, 0, "PixUAVResource");
+        }
+
+        CallInst *PixUAVHandle = nullptr;
+        Function *F = CreateHandle->getParent()->getParent();
+
+        const auto FunctionToUAVHandleIter = FunctionToUAVHandle.lower_bound(F);
+
+        if ((FunctionToUAVHandleIter != FunctionToUAVHandle.end()) &&
+            (FunctionToUAVHandleIter->first == F)) {
+          PixUAVHandle = FunctionToUAVHandleIter->second;
+        } else {
+          IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
+
+          PixUAVHandle = PIXPassHelpers::CreateHandleForResource(
+              DM, Builder, PixUAVResource, "PixUAVHandle");
+
+          FunctionToUAVHandle.insert(FunctionToUAVHandleIter,
+                                     {F, PixUAVHandle});
+        }
+
+        IRBuilder<> Builder(CreateHandle);
+
+        uint32_t InstructionNumber = 0;
+        if (!pix_dxil::PixDxilInstNum::FromInst(CreateHandle,
+                                                &InstructionNumber)) {
+          DXASSERT_NOMSG(false);
+        }
+
+        // The output UAV is treated as a bit array where each bit corresponds
+        // to an instruction number. This determines what byte offset to write
+        // our result to based on the instruction number.
+        const uint32_t InstructionNumByteOffset =
+            (InstructionNumber / 32u) * sizeof(uint32_t);
+        const uint32_t InstructionNumBitPosition = (InstructionNumber % 32u);
+        const uint32_t InstructionNumBitMask = 1u << InstructionNumBitPosition;
+
+        Constant *UAVByteOffsetArg =
+            HlslOP->GetU32Const(InstructionNumByteOffset);
+
+        CallInst *WaveActiveAllEqualCall = Builder.CreateCall(
+            WaveActiveAllEqualFunc, {WaveActiveAllEqualOpCode, IndexOperand});
+
+        // This takes the result of the WaveActiveAllEqual result and shifts
+        // it into the same bit position as the instruction number, followed
+        // by an xor to determine what to write to the UAV
+        Value *IsWaveEqual =
+            Builder.CreateZExt(WaveActiveAllEqualCall, Builder.getInt32Ty());
+        Value *WaveEqualBitMask =
+            Builder.CreateShl(IsWaveEqual, InstructionNumBitPosition);
+        Value *FinalResult =
+            Builder.CreateXor(WaveEqualBitMask, InstructionNumBitMask);
+
+        // Generate instructions to bitwise OR a UAV value corresponding
+        // to the instruction number and result of WaveActiveAllEqual.
+        // If WaveActiveAllEqual was false, we write a 1, otherwise a 0.
+        Builder.CreateCall(
+            AtomicOpFunc,
+            {
+                AtomicBinOpcode,  // i32, ; opcode
+                PixUAVHandle,     // %dx.types.Handle, ; resource handle
+                AtomicOr,         // i32, ; binary operation code :
+                                  // EXCHANGE, IADD, AND, OR, XOR
+                                  // IMIN, IMAX, UMIN, UMAX
+                UAVByteOffsetArg, // i32, ; coordinate c0: byte offset
+                UndefArg,         // i32, ; coordinate c1 (unused)
+                UndefArg,         // i32, ; coordinate c2 (unused)
+                FinalResult       // i32);  value
+            },
+            "UAVInstructionNumberBitSet");
+        return true;
+      });
+
+  const bool modified = (PixUAVResource != nullptr);
+
+  if (modified) {
+    DM.ReEmitDxilResources();
+
+    if (OSOverride != nullptr) {
+      formatted_raw_ostream FOS(*OSOverride);
+      FOS << "\nFoundDynamicIndexingNoNuri\n";
+    }
+  }
+
+  return modified;
+}
+
+char DxilNonUniformResourceIndexInstrumentation::ID = 0;
+
+ModulePass *llvm::createDxilNonUniformResourceIndexInstrumentationPass() {
+  return new DxilNonUniformResourceIndexInstrumentation();
+}
+
+INITIALIZE_PASS(DxilNonUniformResourceIndexInstrumentation,
+                "hlsl-dxil-non-uniform-resource-index-instrumentation",
+                "HLSL DXIL NonUniformResourceIndex instrumentation for PIX",
+                false, false)
diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
index 4f4cc7c620..bd96d83965 100644
--- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
+++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
@@ -795,87 +795,6 @@ DxilShaderAccessTracking::GetResourceFromHandle(Value *resHandle,
   return ret;
 }
 
-static bool CheckForDynamicIndexing(OP *HlslOP, LLVMContext &Ctx,
-                                    DxilModule &DM) {
-  bool FoundDynamicIndexing = false;
-
-  for (llvm::Function &F : DM.GetModule()->functions()) {
-    if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) {
-      if (F.hasName()) {
-        if (F.getName().find("createHandleForLib") != StringRef::npos) {
-          auto FunctionUses = F.uses();
-          for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
-            auto &FunctionUse = *FI++;
-            auto FunctionUser = FunctionUse.getUser();
-            auto instruction = cast<Instruction>(FunctionUser);
-            Value *resourceLoad =
-                instruction->getOperand(kCreateHandleForLibResOpIdx);
-            if (auto *load = cast<LoadInst>(resourceLoad)) {
-              auto *resOrGep = load->getOperand(0);
-              if (isa<GetElementPtrInst>(resOrGep)) {
-                FoundDynamicIndexing = true;
-                break;
-              }
-            }
-          }
-        }
-      }
-    }
-    if (FoundDynamicIndexing) {
-      break;
-    }
-  }
-
-  if (!FoundDynamicIndexing) {
-    auto CreateHandleFn =
-        HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
-    for (auto FI = CreateHandleFn->user_begin();
-         FI != CreateHandleFn->user_end();) {
-      auto *FunctionUser = *FI++;
-      auto instruction = cast<Instruction>(FunctionUser);
-      Value *index = instruction->getOperand(kCreateHandleResIndexOpIdx);
-      if (!isa<Constant>(index)) {
-        FoundDynamicIndexing = true;
-        break;
-      }
-    }
-  }
-
-  if (!FoundDynamicIndexing) {
-    auto CreateHandleFromBindingFn = HlslOP->GetOpFunc(
-        DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx));
-    for (auto FI = CreateHandleFromBindingFn->user_begin();
-         FI != CreateHandleFromBindingFn->user_end();) {
-      auto *FunctionUser = *FI++;
-      auto instruction = cast<Instruction>(FunctionUser);
-      Value *index =
-          instruction->getOperand(kCreateHandleFromBindingResIndexOpIdx);
-      if (!isa<Constant>(index)) {
-        FoundDynamicIndexing = true;
-        break;
-      }
-    }
-  }
-
-  if (!FoundDynamicIndexing) {
-    auto CreateHandleFromHeapFn = HlslOP->GetOpFunc(
-        DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx));
-    for (auto FI = CreateHandleFromHeapFn->user_begin();
-         FI != CreateHandleFromHeapFn->user_end();) {
-      auto *FunctionUser = *FI++;
-      auto instruction = cast<Instruction>(FunctionUser);
-      Value *index =
-          instruction->getOperand(kCreateHandleFromHeapHeapIndexOpIdx);
-      if (!isa<Constant>(index)) {
-        FoundDynamicIndexing = true;
-        break;
-      }
-    }
-  }
-
-  return FoundDynamicIndexing;
-}
-
 bool DxilShaderAccessTracking::runOnModule(Module &M) {
   // This pass adds instrumentation for shader access to resources
 
@@ -887,7 +806,13 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
 
   if (m_CheckForDynamicIndexing) {
 
-    bool FoundDynamicIndexing = CheckForDynamicIndexing(HlslOP, Ctx, DM);
+    bool FoundDynamicIndexing = false;
+
+    PIXPassHelpers::ForEachDynamicallyIndexedResource(
+        DM, [&FoundDynamicIndexing](bool, Instruction *, Value *) {
+          FoundDynamicIndexing = true;
+          return false;
+        });
 
     if (FoundDynamicIndexing) {
       if (OSOverride != nullptr) {
diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp
index dfb4b3aa83..65d9a660cc 100644
--- a/lib/DxilPIXPasses/PixPassHelpers.cpp
+++ b/lib/DxilPIXPasses/PixPassHelpers.cpp
@@ -199,6 +199,18 @@ constexpr uint32_t toolsUAVRegister = 0;
 template <typename RootSigDesc, typename RootParameterDesc>
 void ExtendRootSig(RootSigDesc &rootSigDesc) {
   auto *existingParams = rootSigDesc.pParameters;
+  for (uint32_t i = 0; i < rootSigDesc.NumParameters; ++i) {
+    if (rootSigDesc.pParameters[i].ParameterType ==
+        DxilRootParameterType::UAV) {
+      if (rootSigDesc.pParameters[i].Descriptor.RegisterSpace ==
+              toolsRegisterSpace &&
+          rootSigDesc.pParameters[i].Descriptor.ShaderRegister ==
+              toolsUAVRegister) {
+        // Already added
+        return;
+      }
+    }
+  }
   auto *newParams = new RootParameterDesc[rootSigDesc.NumParameters + 1];
   if (existingParams != nullptr) {
     memcpy(newParams, existingParams,
@@ -500,6 +512,90 @@ unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM,
   }
 }
 
+void ForEachDynamicallyIndexedResource(
+    hlsl::DxilModule &DM,
+    const std::function<bool(bool, Instruction *, Value *)> &Visitor) {
+  OP *HlslOP = DM.GetOP();
+  LLVMContext &Ctx = DM.GetModule()->getContext();
+
+  for (llvm::Function &F : DM.GetModule()->functions()) {
+    if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) {
+      if (F.hasName()) {
+        if (F.getName().find("createHandleForLib") != StringRef::npos) {
+          auto FunctionUses = F.uses();
+          for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
+            auto &FunctionUse = *FI++;
+            auto FunctionUser = FunctionUse.getUser();
+            auto instruction = cast<Instruction>(FunctionUser);
+            Value *resourceLoad = instruction->getOperand(
+                DXIL::OperandIndex::kCreateHandleForLibResOpIdx);
+            if (auto *load = cast<LoadInst>(resourceLoad)) {
+              auto *resOrGep = load->getOperand(0);
+              if (auto *gep = dyn_cast<GetElementPtrInst>(resOrGep)) {
+                if (!Visitor(DxilMDHelper::IsMarkedNonUniform(gep), load,
+                             gep->getOperand(2))) {
+                  return;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  auto CreateHandleFn =
+      HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
+  for (auto FI = CreateHandleFn->user_begin();
+       FI != CreateHandleFn->user_end();) {
+    auto *FunctionUser = *FI++;
+    auto instruction = cast<Instruction>(FunctionUser);
+    Value *index =
+        instruction->getOperand(DXIL::OperandIndex::kCreateHandleResIndexOpIdx);
+    if (!isa<Constant>(index)) {
+      const DxilInst_CreateHandle createHandle(instruction);
+      if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction,
+                   index)) {
+        return;
+      }
+    }
+  }
+
+  auto CreateHandleFromBindingFn = HlslOP->GetOpFunc(
+      DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx));
+  for (auto FI = CreateHandleFromBindingFn->user_begin();
+       FI != CreateHandleFromBindingFn->user_end();) {
+    auto *FunctionUser = *FI++;
+    auto instruction = cast<Instruction>(FunctionUser);
+    Value *index = instruction->getOperand(
+        DXIL::OperandIndex::kCreateHandleFromBindingResIndexOpIdx);
+    if (!isa<Constant>(index)) {
+      const DxilInst_CreateHandleFromBinding createHandle(instruction);
+      if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction,
+                   index)) {
+        return;
+      }
+    }
+  }
+
+  auto CreateHandleFromHeapFn = HlslOP->GetOpFunc(
+      DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx));
+  for (auto FI = CreateHandleFromHeapFn->user_begin();
+       FI != CreateHandleFromHeapFn->user_end();) {
+    auto *FunctionUser = *FI++;
+    auto instruction = cast<Instruction>(FunctionUser);
+    Value *index = instruction->getOperand(
+        DXIL::OperandIndex::kCreateHandleFromHeapHeapIndexOpIdx);
+    if (!isa<Constant>(index)) {
+      const DxilInst_CreateHandleFromHeap createHandle(instruction);
+      if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction,
+                   index)) {
+        return;
+      }
+    }
+  }
+}
+
 #ifdef PIX_DEBUG_DUMP_HELPER
 
 static int g_logIndent = 0;
diff --git a/lib/DxilPIXPasses/PixPassHelpers.h b/lib/DxilPIXPasses/PixPassHelpers.h
index 4cd0e1a549..d7b0b40af8 100644
--- a/lib/DxilPIXPasses/PixPassHelpers.h
+++ b/lib/DxilPIXPasses/PixPassHelpers.h
@@ -9,6 +9,7 @@
 
 #pragma once
 
+#include <functional>
 #include <vector>
 
 #include "dxc/DXIL/DxilModule.h"
@@ -16,7 +17,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 
-//#define PIX_DEBUG_DUMP_HELPER
+// #define PIX_DEBUG_DUMP_HELPER
 #ifdef PIX_DEBUG_DUMP_HELPER
 #include "dxc/Support/Global.h"
 #endif
@@ -82,4 +83,8 @@ void ReplaceAllUsesOfInstructionWithNewValueAndDeleteInstruction(
     llvm::Instruction *Instr, llvm::Value *newValue, llvm::Type *newType);
 unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM,
                                   unsigned UpStreamSVPosRow);
+void ForEachDynamicallyIndexedResource(
+    hlsl::DxilModule &DM,
+    const std::function<bool(bool, llvm::Instruction *, llvm::Value *)>
+        &Visitor);
 } // namespace PIXPassHelpers
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index fd05495e86..a788f21d4e 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -65,8 +65,8 @@ using std::vector;
 namespace hlsl {
 
 // PrintDiagnosticContext methods.
-PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &printer)
-    : m_Printer(printer), m_errorsFound(false), m_warningsFound(false) {}
+PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &Printer)
+    : m_Printer(Printer), m_errorsFound(false), m_warningsFound(false) {}
 
 bool PrintDiagnosticContext::HasErrors() const { return m_errorsFound; }
 bool PrintDiagnosticContext::HasWarnings() const { return m_warningsFound; }
@@ -97,68 +97,68 @@ struct PSExecutionInfo {
 };
 
 static unsigned ValidateSignatureRowCol(Instruction *I,
-                                        DxilSignatureElement &SE, Value *rowVal,
-                                        Value *colVal, EntryStatus &Status,
+                                        DxilSignatureElement &SE, Value *RowVal,
+                                        Value *ColVal, EntryStatus &Status,
                                         ValidationContext &ValCtx) {
-  if (ConstantInt *constRow = dyn_cast<ConstantInt>(rowVal)) {
-    unsigned row = constRow->getLimitedValue();
-    if (row >= SE.GetRows()) {
-      std::string range = std::string("0~") + std::to_string(SE.GetRows());
+  if (ConstantInt *ConstRow = dyn_cast<ConstantInt>(RowVal)) {
+    unsigned Row = ConstRow->getLimitedValue();
+    if (Row >= SE.GetRows()) {
+      std::string Range = std::string("0~") + std::to_string(SE.GetRows());
       ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange,
-                                  {"Row", range, std::to_string(row)});
+                                  {"Row", Range, std::to_string(Row)});
     }
   }
 
-  if (!isa<ConstantInt>(colVal)) {
-    // col must be const
+  if (!isa<ConstantInt>(ColVal)) {
+    // Col must be const
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst,
                                 {"Col", "LoadInput/StoreOutput"});
     return 0;
   }
 
-  unsigned col = cast<ConstantInt>(colVal)->getLimitedValue();
+  unsigned Col = cast<ConstantInt>(ColVal)->getLimitedValue();
 
-  if (col > SE.GetCols()) {
-    std::string range = std::string("0~") + std::to_string(SE.GetCols());
+  if (Col > SE.GetCols()) {
+    std::string Range = std::string("0~") + std::to_string(SE.GetCols());
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange,
-                                {"Col", range, std::to_string(col)});
+                                {"Col", Range, std::to_string(Col)});
   } else {
     if (SE.IsOutput())
-      Status.outputCols[SE.GetID()] |= 1 << col;
+      Status.outputCols[SE.GetID()] |= 1 << Col;
     if (SE.IsPatchConstOrPrim())
-      Status.patchConstOrPrimCols[SE.GetID()] |= 1 << col;
+      Status.patchConstOrPrimCols[SE.GetID()] |= 1 << Col;
   }
 
-  return col;
+  return Col;
 }
 
 static DxilSignatureElement *
-ValidateSignatureAccess(Instruction *I, DxilSignature &sig, Value *sigID,
-                        Value *rowVal, Value *colVal, EntryStatus &Status,
+ValidateSignatureAccess(Instruction *I, DxilSignature &Sig, Value *SigId,
+                        Value *RowVal, Value *ColVal, EntryStatus &Status,
                         ValidationContext &ValCtx) {
-  if (!isa<ConstantInt>(sigID)) {
+  if (!isa<ConstantInt>(SigId)) {
     // inputID must be const
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst,
                                 {"SignatureID", "LoadInput/StoreOutput"});
     return nullptr;
   }
 
-  unsigned SEIdx = cast<ConstantInt>(sigID)->getLimitedValue();
-  if (sig.GetElements().size() <= SEIdx) {
+  unsigned SEIdx = cast<ConstantInt>(SigId)->getLimitedValue();
+  if (Sig.GetElements().size() <= SEIdx) {
     ValCtx.EmitInstrError(I, ValidationRule::InstrOpConstRange);
     return nullptr;
   }
 
-  DxilSignatureElement &SE = sig.GetElement(SEIdx);
-  bool isOutput = sig.IsOutput();
+  DxilSignatureElement &SE = Sig.GetElement(SEIdx);
+  bool IsOutput = Sig.IsOutput();
 
-  unsigned col = ValidateSignatureRowCol(I, SE, rowVal, colVal, Status, ValCtx);
+  unsigned Col = ValidateSignatureRowCol(I, SE, RowVal, ColVal, Status, ValCtx);
 
-  if (isOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) {
-    unsigned mask = Status.OutputPositionMask[SE.GetOutputStream()];
-    mask |= 1 << col;
+  if (IsOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) {
+    unsigned Mask = Status.OutputPositionMask[SE.GetOutputStream()];
+    Mask |= 1 << Col;
     if (SE.GetOutputStream() < DXIL::kNumOutputStreams)
-      Status.OutputPositionMask[SE.GetOutputStream()] = mask;
+      Status.OutputPositionMask[SE.GetOutputStream()] = Mask;
   }
   return &SE;
 }
@@ -183,9 +183,9 @@ static DxilResourceProperties GetResourceFromHandle(Value *Handle,
   return RP;
 }
 
-static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle,
+static DXIL::SamplerKind GetSamplerKind(Value *SamplerHandle,
                                         ValidationContext &ValCtx) {
-  DxilResourceProperties RP = GetResourceFromHandle(samplerHandle, ValCtx);
+  DxilResourceProperties RP = GetResourceFromHandle(SamplerHandle, ValCtx);
 
   if (RP.getResourceClass() != DXIL::ResourceClass::Sampler) {
     // must be sampler.
@@ -200,14 +200,14 @@ static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle,
 }
 
 static DXIL::ResourceKind
-GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy,
+GetResourceKindAndCompTy(Value *Handle, DXIL::ComponentType &CompTy,
                          DXIL::ResourceClass &ResClass,
                          ValidationContext &ValCtx) {
   CompTy = DXIL::ComponentType::Invalid;
   ResClass = DXIL::ResourceClass::Invalid;
   // TODO: validate ROV is used only in PS.
 
-  DxilResourceProperties RP = GetResourceFromHandle(handle, ValCtx);
+  DxilResourceProperties RP = GetResourceFromHandle(Handle, ValCtx);
   ResClass = RP.getResourceClass();
 
   switch (ResClass) {
@@ -230,19 +230,19 @@ GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy,
   return RP.getResourceKind();
 }
 
-DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys,
-                                        std::deque<unsigned> &offsets) {
+DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &TypeSys,
+                                        std::deque<unsigned> &Offsets) {
   unsigned CurIdx = 1;
-  unsigned LastIdx = offsets.size() - 1;
+  unsigned LastIdx = Offsets.size() - 1;
   DxilStructAnnotation *StructAnnot = nullptr;
 
-  for (; CurIdx < offsets.size(); ++CurIdx) {
+  for (; CurIdx < Offsets.size(); ++CurIdx) {
     if (const StructType *EltST = dyn_cast<StructType>(Ty)) {
-      if (DxilStructAnnotation *EltAnnot = typeSys.GetStructAnnotation(EltST)) {
+      if (DxilStructAnnotation *EltAnnot = TypeSys.GetStructAnnotation(EltST)) {
         StructAnnot = EltAnnot;
-        Ty = EltST->getElementType(offsets[CurIdx]);
+        Ty = EltST->getElementType(Offsets[CurIdx]);
         if (CurIdx == LastIdx) {
-          return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]);
+          return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]);
         }
       } else {
         return nullptr;
@@ -252,16 +252,16 @@ DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys,
       StructAnnot = nullptr;
     } else {
       if (StructAnnot)
-        return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]);
+        return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]);
     }
   }
   return nullptr;
 }
 
-DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) {
-  auto it = ResPropMap.find(resVal);
-  if (it != ResPropMap.end()) {
-    return it->second;
+DxilResourceProperties ValidationContext::GetResourceFromVal(Value *ResVal) {
+  auto It = ResPropMap.find(ResVal);
+  if (It != ResPropMap.end()) {
+    return It->second;
   } else {
     DxilResourceProperties RP;
     return RP;
@@ -269,34 +269,34 @@ DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) {
 }
 
 struct ResRetUsage {
-  bool x;
-  bool y;
-  bool z;
-  bool w;
-  bool status;
-  ResRetUsage() : x(false), y(false), z(false), w(false), status(false) {}
+  bool X;
+  bool Y;
+  bool Z;
+  bool W;
+  bool Status;
+  ResRetUsage() : X(false), Y(false), Z(false), W(false), Status(false) {}
 };
 
-static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet,
+static void CollectGetDimResRetUsage(ResRetUsage &Usage, Instruction *ResRet,
                                      ValidationContext &ValCtx) {
   for (User *U : ResRet->users()) {
     if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U)) {
-      for (unsigned idx : EVI->getIndices()) {
-        switch (idx) {
+      for (unsigned Idx : EVI->getIndices()) {
+        switch (Idx) {
         case 0:
-          usage.x = true;
+          Usage.X = true;
           break;
         case 1:
-          usage.y = true;
+          Usage.Y = true;
           break;
         case 2:
-          usage.z = true;
+          Usage.Z = true;
           break;
         case 3:
-          usage.w = true;
+          Usage.W = true;
           break;
         case DXIL::kResRetStatusIndex:
-          usage.status = true;
+          Usage.Status = true;
           break;
         default:
           // Emit index out of bound.
@@ -306,7 +306,7 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet,
         }
       }
     } else if (PHINode *PHI = dyn_cast<PHINode>(U)) {
-      CollectGetDimResRetUsage(usage, PHI, ValCtx);
+      CollectGetDimResRetUsage(Usage, PHI, ValCtx);
     } else {
       Instruction *User = cast<Instruction>(U);
       ValCtx.EmitInstrError(User, ValidationRule::InstrDxilStructUser);
@@ -314,18 +314,18 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet,
   }
 }
 
-static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind,
-                                  ArrayRef<Value *> coords,
+static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind ResKind,
+                                  ArrayRef<Value *> Coords,
                                   ValidationContext &ValCtx) {
-  const unsigned kMaxNumCoords = 4;
-  unsigned numCoords = DxilResource::GetNumCoords(resKind);
-  for (unsigned i = 0; i < kMaxNumCoords; i++) {
-    if (i < numCoords) {
-      if (isa<UndefValue>(coords[i])) {
+  const unsigned KMaxNumCoords = 4;
+  unsigned NumCoords = DxilResource::GetNumCoords(ResKind);
+  for (unsigned I = 0; I < KMaxNumCoords; I++) {
+    if (I < NumCoords) {
+      if (isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss);
       }
     } else {
-      if (!isa<UndefValue>(coords[i])) {
+      if (!isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrResourceCoordinateTooMany);
       }
@@ -334,18 +334,18 @@ static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind,
 }
 
 static void ValidateCalcLODResourceDimensionCoord(CallInst *CI,
-                                                  DXIL::ResourceKind resKind,
-                                                  ArrayRef<Value *> coords,
+                                                  DXIL::ResourceKind ResKind,
+                                                  ArrayRef<Value *> Coords,
                                                   ValidationContext &ValCtx) {
   const unsigned kMaxNumDimCoords = 3;
-  unsigned numCoords = DxilResource::GetNumDimensionsForCalcLOD(resKind);
-  for (unsigned i = 0; i < kMaxNumDimCoords; i++) {
-    if (i < numCoords) {
-      if (isa<UndefValue>(coords[i])) {
+  unsigned NumCoords = DxilResource::GetNumDimensionsForCalcLOD(ResKind);
+  for (unsigned I = 0; I < kMaxNumDimCoords; I++) {
+    if (I < NumCoords) {
+      if (isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss);
       }
     } else {
-      if (!isa<UndefValue>(coords[i])) {
+      if (!isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrResourceCoordinateTooMany);
       }
@@ -353,21 +353,21 @@ static void ValidateCalcLODResourceDimensionCoord(CallInst *CI,
   }
 }
 
-static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind,
-                                   ArrayRef<Value *> offsets,
+static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind ResKind,
+                                   ArrayRef<Value *> Offsets,
                                    ValidationContext &ValCtx) {
   const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
 
-  unsigned numOffsets = DxilResource::GetNumOffsets(resKind);
-  bool hasOffset = !isa<UndefValue>(offsets[0]);
+  unsigned NumOffsets = DxilResource::GetNumOffsets(ResKind);
+  bool HasOffset = !isa<UndefValue>(Offsets[0]);
 
-  auto validateOffset = [&](Value *offset) {
+  auto ValidateOffset = [&](Value *Offset) {
     // 6.7 Advanced Textures allow programmable offsets
     if (pSM->IsSM67Plus())
       return;
-    if (ConstantInt *cOffset = dyn_cast<ConstantInt>(offset)) {
-      int offset = cOffset->getValue().getSExtValue();
-      if (offset > 7 || offset < -8) {
+    if (ConstantInt *cOffset = dyn_cast<ConstantInt>(Offset)) {
+      int Offset = cOffset->getValue().getSExtValue();
+      if (Offset > 7 || Offset < -8) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrTextureOffset);
       }
     } else {
@@ -375,20 +375,20 @@ static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind,
     }
   };
 
-  if (hasOffset) {
-    validateOffset(offsets[0]);
+  if (HasOffset) {
+    ValidateOffset(Offsets[0]);
   }
 
-  for (unsigned i = 1; i < offsets.size(); i++) {
-    if (i < numOffsets) {
-      if (hasOffset) {
-        if (isa<UndefValue>(offsets[i]))
+  for (unsigned I = 1; I < Offsets.size(); I++) {
+    if (I < NumOffsets) {
+      if (HasOffset) {
+        if (isa<UndefValue>(Offsets[I]))
           ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss);
         else
-          validateOffset(offsets[i]);
+          ValidateOffset(Offsets[I]);
       }
     } else {
-      if (!isa<UndefValue>(offsets[i])) {
+      if (!isa<UndefValue>(Offsets[I])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany);
       }
     }
@@ -405,53 +405,53 @@ static void ValidateDerivativeOp(CallInst *CI, ValidationContext &ValCtx) {
         {"Derivatives in CS/MS/AS", "Shader Model 6.6+"});
 }
 
-static void ValidateSampleInst(CallInst *CI, Value *srvHandle,
-                               Value *samplerHandle, ArrayRef<Value *> coords,
-                               ArrayRef<Value *> offsets, bool IsSampleC,
+static void ValidateSampleInst(CallInst *CI, Value *SrvHandle,
+                               Value *SamplerHandle, ArrayRef<Value *> Coords,
+                               ArrayRef<Value *> Offsets, bool IsSampleC,
                                ValidationContext &ValCtx) {
   if (!IsSampleC) {
-    if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
+    if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample);
     }
   } else {
-    if (GetSamplerKind(samplerHandle, ValCtx) !=
+    if (GetSamplerKind(SamplerHandle, ValCtx) !=
         DXIL::SamplerKind::Comparison) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC);
     }
   }
 
-  DXIL::ComponentType compTy;
-  DXIL::ResourceClass resClass;
-  DXIL::ResourceKind resKind =
-      GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx);
-  bool isSampleCompTy = compTy == DXIL::ComponentType::F32;
-  isSampleCompTy |= compTy == DXIL::ComponentType::SNormF32;
-  isSampleCompTy |= compTy == DXIL::ComponentType::UNormF32;
-  isSampleCompTy |= compTy == DXIL::ComponentType::F16;
-  isSampleCompTy |= compTy == DXIL::ComponentType::SNormF16;
-  isSampleCompTy |= compTy == DXIL::ComponentType::UNormF16;
+  DXIL::ComponentType CompTy;
+  DXIL::ResourceClass ResClass;
+  DXIL::ResourceKind ResKind =
+      GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx);
+  bool IsSampleCompTy = CompTy == DXIL::ComponentType::F32;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF32;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF32;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::F16;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF16;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF16;
   const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
   if (pSM->IsSM67Plus() && !IsSampleC) {
-    isSampleCompTy |= compTy == DXIL::ComponentType::I16;
-    isSampleCompTy |= compTy == DXIL::ComponentType::U16;
-    isSampleCompTy |= compTy == DXIL::ComponentType::I32;
-    isSampleCompTy |= compTy == DXIL::ComponentType::U32;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::I16;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::U16;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::I32;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::U32;
   }
-  if (!isSampleCompTy) {
+  if (!IsSampleCompTy) {
     ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleCompType);
   }
 
-  if (resClass != DXIL::ResourceClass::SRV) {
+  if (ResClass != DXIL::ResourceClass::SRV) {
     ValCtx.EmitInstrError(CI,
                           ValidationRule::InstrResourceClassForSamplerGather);
   }
 
-  ValidationRule rule = ValidationRule::InstrResourceKindForSample;
+  ValidationRule Rule = ValidationRule::InstrResourceKindForSample;
   if (IsSampleC) {
-    rule = ValidationRule::InstrResourceKindForSampleC;
+    Rule = ValidationRule::InstrResourceKindForSampleC;
   }
 
-  switch (resKind) {
+  switch (ResKind) {
   case DXIL::ResourceKind::Texture1D:
   case DXIL::ResourceKind::Texture1DArray:
   case DXIL::ResourceKind::Texture2D:
@@ -461,64 +461,64 @@ static void ValidateSampleInst(CallInst *CI, Value *srvHandle,
     break;
   case DXIL::ResourceKind::Texture3D:
     if (IsSampleC) {
-      ValCtx.EmitInstrError(CI, rule);
+      ValCtx.EmitInstrError(CI, Rule);
     }
     break;
   default:
-    ValCtx.EmitInstrError(CI, rule);
+    ValCtx.EmitInstrError(CI, Rule);
     return;
   }
 
   // Coord match resource kind.
-  ValidateResourceCoord(CI, resKind, coords, ValCtx);
+  ValidateResourceCoord(CI, ResKind, Coords, ValCtx);
   // Offset match resource kind.
-  ValidateResourceOffset(CI, resKind, offsets, ValCtx);
+  ValidateResourceOffset(CI, ResKind, Offsets, ValCtx);
 }
 
-static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,
-                           ArrayRef<Value *> coords, ArrayRef<Value *> offsets,
+static void ValidateGather(CallInst *CI, Value *SrvHandle, Value *SamplerHandle,
+                           ArrayRef<Value *> Coords, ArrayRef<Value *> Offsets,
                            bool IsSampleC, ValidationContext &ValCtx) {
   if (!IsSampleC) {
-    if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
+    if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample);
     }
   } else {
-    if (GetSamplerKind(samplerHandle, ValCtx) !=
+    if (GetSamplerKind(SamplerHandle, ValCtx) !=
         DXIL::SamplerKind::Comparison) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC);
     }
   }
 
-  DXIL::ComponentType compTy;
-  DXIL::ResourceClass resClass;
-  DXIL::ResourceKind resKind =
-      GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx);
+  DXIL::ComponentType CompTy;
+  DXIL::ResourceClass ResClass;
+  DXIL::ResourceKind ResKind =
+      GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx);
 
-  if (resClass != DXIL::ResourceClass::SRV) {
+  if (ResClass != DXIL::ResourceClass::SRV) {
     ValCtx.EmitInstrError(CI,
                           ValidationRule::InstrResourceClassForSamplerGather);
     return;
   }
 
   // Coord match resource kind.
-  ValidateResourceCoord(CI, resKind, coords, ValCtx);
+  ValidateResourceCoord(CI, ResKind, Coords, ValCtx);
   // Offset match resource kind.
-  switch (resKind) {
+  switch (ResKind) {
   case DXIL::ResourceKind::Texture2D:
   case DXIL::ResourceKind::Texture2DArray: {
-    bool hasOffset = !isa<UndefValue>(offsets[0]);
-    if (hasOffset) {
-      if (isa<UndefValue>(offsets[1])) {
+    bool HasOffset = !isa<UndefValue>(Offsets[0]);
+    if (HasOffset) {
+      if (isa<UndefValue>(Offsets[1])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss);
       }
     }
   } break;
   case DXIL::ResourceKind::TextureCube:
   case DXIL::ResourceKind::TextureCubeArray: {
-    if (!isa<UndefValue>(offsets[0])) {
+    if (!isa<UndefValue>(Offsets[0])) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany);
     }
-    if (!isa<UndefValue>(offsets[1])) {
+    if (!isa<UndefValue>(Offsets[1])) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany);
     }
   } break;
@@ -529,21 +529,21 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,
   }
 }
 
-static unsigned StoreValueToMask(ArrayRef<Value *> vals) {
-  unsigned mask = 0;
-  for (unsigned i = 0; i < 4; i++) {
-    if (!isa<UndefValue>(vals[i])) {
-      mask |= 1 << i;
+static unsigned StoreValueToMask(ArrayRef<Value *> Vals) {
+  unsigned Mask = 0;
+  for (unsigned I = 0; I < 4; I++) {
+    if (!isa<UndefValue>(Vals[I])) {
+      Mask |= 1 << I;
     }
   }
-  return mask;
+  return Mask;
 }
 
-static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) {
-  DxilResourceProperties RP = GetResourceFromHandle(cbHandle, ValCtx);
+static int GetCBufSize(Value *CbHandle, ValidationContext &ValCtx) {
+  DxilResourceProperties RP = GetResourceFromHandle(CbHandle, ValCtx);
 
   if (RP.getResourceClass() != DXIL::ResourceClass::CBuffer) {
-    ValCtx.EmitInstrError(cast<CallInst>(cbHandle),
+    ValCtx.EmitInstrError(cast<CallInst>(CbHandle),
                           ValidationRule::InstrCBufferClassForCBufferHandle);
     return -1;
   }
@@ -554,7 +554,7 @@ static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) {
 // Make sure none of the handle arguments are undef / zero-initializer,
 // Also, do not accept any resource handles with invalid dxil resource
 // properties
-void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode,
+void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode Opcode,
                                       ValidationContext &ValCtx) {
 
   for (Value *op : CI->operands()) {
@@ -563,13 +563,13 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode,
     const Type *pNodeRecordHandleTy =
         ValCtx.DxilMod.GetOP()->GetNodeRecordHandleType();
 
-    const Type *argTy = op->getType();
-    if (argTy == pNodeHandleTy || argTy == pNodeRecordHandleTy ||
-        argTy == pHandleTy) {
+    const Type *ArgTy = op->getType();
+    if (ArgTy == pNodeHandleTy || ArgTy == pNodeRecordHandleTy ||
+        ArgTy == pHandleTy) {
 
       if (isa<UndefValue>(op) || isa<ConstantAggregateZero>(op)) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized);
-      } else if (argTy == pHandleTy) {
+      } else if (ArgTy == pHandleTy) {
         // GetResourceFromHandle will emit an error on an invalid handle
         GetResourceFromHandle(op, ValCtx);
       }
@@ -577,10 +577,10 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode,
   }
 }
 
-void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode,
+void ValidateHandleArgs(CallInst *CI, DXIL::OpCode Opcode,
                         ValidationContext &ValCtx) {
 
-  switch (opcode) {
+  switch (Opcode) {
     // TODO: add case DXIL::OpCode::IndexNodeRecordHandle:
 
   case DXIL::OpCode::AnnotateHandle:
@@ -591,12 +591,12 @@ void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode,
     break;
 
   default:
-    ValidateHandleArgsForInstruction(CI, opcode, ValCtx);
+    ValidateHandleArgsForInstruction(CI, Opcode, ValCtx);
     break;
   }
 }
 
-static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) {
+static unsigned GetNumVertices(DXIL::InputPrimitive InputPrimitive) {
   const unsigned InputPrimitiveVertexTab[] = {
       0,  // Undefined = 0,
       1,  // Point = 1,
@@ -641,26 +641,26 @@ static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) {
       0,  // LastEntry,
   };
 
-  unsigned primitiveIdx = static_cast<unsigned>(inputPrimitive);
-  return InputPrimitiveVertexTab[primitiveIdx];
+  unsigned PrimitiveIdx = static_cast<unsigned>(InputPrimitive);
+  return InputPrimitiveVertexTab[PrimitiveIdx];
 }
 
-static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
+static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode Opcode,
                                     ValidationContext &ValCtx) {
   Function *F = CI->getParent()->getParent();
   DxilModule &DM = ValCtx.DxilMod;
-  bool bIsPatchConstantFunc = false;
+  bool IsPatchConstantFunc = false;
   if (!DM.HasDxilEntryProps(F)) {
-    auto it = ValCtx.PatchConstantFuncMap.find(F);
-    if (it == ValCtx.PatchConstantFuncMap.end()) {
+    auto It = ValCtx.PatchConstantFuncMap.find(F);
+    if (It == ValCtx.PatchConstantFuncMap.end()) {
       // Missing entry props.
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrSignatureOperationNotInEntry);
       return;
     }
     // Use hull entry instead of patch constant function.
-    F = it->second.front();
-    bIsPatchConstantFunc = true;
+    F = It->second.front();
+    IsPatchConstantFunc = true;
   }
   if (!ValCtx.HasEntryStatus(F)) {
     return;
@@ -668,67 +668,67 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
   EntryStatus &Status = ValCtx.GetEntryStatus(F);
   DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F);
-  DxilFunctionProps &props = EntryProps.props;
+  DxilFunctionProps &Props = EntryProps.props;
   DxilEntrySignature &S = EntryProps.sig;
 
-  switch (opcode) {
+  switch (Opcode) {
   case DXIL::OpCode::LoadInput: {
-    Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
-    DxilSignature &inputSig = S.InputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
-    ValidateSignatureAccess(CI, inputSig, inputID, row, col, Status, ValCtx);
-
-    // Check vertexID in ps/vs. and none array input.
-    Value *vertexID =
+    Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
+    DxilSignature &InputSig = S.InputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
+    ValidateSignatureAccess(CI, InputSig, InputId, Row, Col, Status, ValCtx);
+
+    // Check VertexId in ps/vs. and none array input.
+    Value *VertexId =
         CI->getArgOperand(DXIL::OperandIndex::kLoadInputVertexIDOpIdx);
-    bool usedVertexID = vertexID && !isa<UndefValue>(vertexID);
-    if (props.IsVS() || props.IsPS()) {
-      if (usedVertexID) {
-        // use vertexID in VS/PS input.
+    bool UsedVertexId = VertexId && !isa<UndefValue>(VertexId);
+    if (Props.IsVS() || Props.IsPS()) {
+      if (UsedVertexId) {
+        // Use VertexId in VS/PS input.
         ValCtx.EmitInstrError(CI, ValidationRule::SmOperand);
         return;
       }
     } else {
-      if (ConstantInt *cVertexID = dyn_cast<ConstantInt>(vertexID)) {
-        int immVertexID = cVertexID->getValue().getLimitedValue();
-        if (cVertexID->getValue().isNegative()) {
-          immVertexID = cVertexID->getValue().getSExtValue();
+      if (ConstantInt *cVertexId = dyn_cast<ConstantInt>(VertexId)) {
+        int ImmVertexId = cVertexId->getValue().getLimitedValue();
+        if (cVertexId->getValue().isNegative()) {
+          ImmVertexId = cVertexId->getValue().getSExtValue();
         }
-        const int low = 0;
-        int high = 0;
-        if (props.IsGS()) {
-          DXIL::InputPrimitive inputPrimitive =
-              props.ShaderProps.GS.inputPrimitive;
-          high = GetNumVertices(inputPrimitive);
-        } else if (props.IsDS()) {
-          high = props.ShaderProps.DS.inputControlPoints;
-        } else if (props.IsHS()) {
-          high = props.ShaderProps.HS.inputControlPoints;
+        const int Low = 0;
+        int High = 0;
+        if (Props.IsGS()) {
+          DXIL::InputPrimitive InputPrimitive =
+              Props.ShaderProps.GS.inputPrimitive;
+          High = GetNumVertices(InputPrimitive);
+        } else if (Props.IsDS()) {
+          High = Props.ShaderProps.DS.inputControlPoints;
+        } else if (Props.IsHS()) {
+          High = Props.ShaderProps.HS.inputControlPoints;
         } else {
           ValCtx.EmitInstrFormatError(CI,
                                       ValidationRule::SmOpcodeInInvalidFunction,
                                       {"LoadInput", "VS/HS/DS/GS/PS"});
         }
-        if (immVertexID < low || immVertexID >= high) {
-          std::string range = std::to_string(low) + "~" + std::to_string(high);
+        if (ImmVertexId < Low || ImmVertexId >= High) {
+          std::string Range = std::to_string(Low) + "~" + std::to_string(High);
           ValCtx.EmitInstrFormatError(
               CI, ValidationRule::InstrOperandRange,
-              {"VertexID", range, std::to_string(immVertexID)});
+              {"VertexID", Range, std::to_string(ImmVertexId)});
         }
       }
     }
   } break;
   case DXIL::OpCode::DomainLocation: {
-    Value *colValue =
+    Value *ColValue =
         CI->getArgOperand(DXIL::OperandIndex::kDomainLocationColOpIdx);
-    if (!isa<ConstantInt>(colValue)) {
-      // col must be const
+    if (!isa<ConstantInt>(ColValue)) {
+      // Col must be const
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst,
                                   {"Col", "DomainLocation"});
     } else {
-      unsigned col = cast<ConstantInt>(colValue)->getLimitedValue();
-      if (col >= Status.domainLocSize) {
+      unsigned Col = cast<ConstantInt>(ColValue)->getLimitedValue();
+      if (Col >= Status.domainLocSize) {
         ValCtx.EmitInstrError(CI, ValidationRule::SmDomainLocationIdxOOB);
       }
     }
@@ -736,60 +736,60 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   case DXIL::OpCode::StoreOutput:
   case DXIL::OpCode::StoreVertexOutput:
   case DXIL::OpCode::StorePrimitiveOutput: {
-    Value *outputID =
+    Value *OutputId =
         CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx);
-    DxilSignature &outputSig = opcode == DXIL::OpCode::StorePrimitiveOutput
+    DxilSignature &OutputSig = Opcode == DXIL::OpCode::StorePrimitiveOutput
                                    ? S.PatchConstOrPrimSignature
                                    : S.OutputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
-    ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx);
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
+    ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx);
   } break;
   case DXIL::OpCode::OutputControlPointID: {
     // Only used in hull shader.
-    Function *func = CI->getParent()->getParent();
+    Function *Func = CI->getParent()->getParent();
     // Make sure this is inside hs shader entry function.
-    if (!(props.IsHS() && F == func)) {
+    if (!(Props.IsHS() && F == Func)) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"OutputControlPointID", "hull function"});
     }
   } break;
   case DXIL::OpCode::LoadOutputControlPoint: {
     // Only used in patch constant function.
-    Function *func = CI->getParent()->getParent();
-    if (ValCtx.entryFuncCallSet.count(func) > 0) {
+    Function *Func = CI->getParent()->getParent();
+    if (ValCtx.entryFuncCallSet.count(Func) > 0) {
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::SmOpcodeInInvalidFunction,
           {"LoadOutputControlPoint", "PatchConstant function"});
     }
-    Value *outputID =
+    Value *OutputId =
         CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx);
-    DxilSignature &outputSig = S.OutputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
-    ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx);
+    DxilSignature &OutputSig = S.OutputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
+    ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx);
   } break;
   case DXIL::OpCode::StorePatchConstant: {
     // Only used in patch constant function.
-    Function *func = CI->getParent()->getParent();
-    if (!bIsPatchConstantFunc) {
+    Function *Func = CI->getParent()->getParent();
+    if (!IsPatchConstantFunc) {
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::SmOpcodeInInvalidFunction,
           {"StorePatchConstant", "PatchConstant function"});
     } else {
-      auto &hullShaders = ValCtx.PatchConstantFuncMap[func];
-      for (Function *F : hullShaders) {
+      auto &HullShaders = ValCtx.PatchConstantFuncMap[Func];
+      for (Function *F : HullShaders) {
         EntryStatus &Status = ValCtx.GetEntryStatus(F);
         DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F);
         DxilEntrySignature &S = EntryProps.sig;
-        Value *outputID =
+        Value *OutputId =
             CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx);
-        DxilSignature &outputSig = S.PatchConstOrPrimSignature;
-        Value *row =
+        DxilSignature &OutputSig = S.PatchConstOrPrimSignature;
+        Value *Row =
             CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
-        Value *col =
+        Value *Col =
             CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
-        ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status,
+        ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status,
                                 ValCtx);
       }
     }
@@ -807,12 +807,12 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   case DXIL::OpCode::EvalSampleIndex:
   case DXIL::OpCode::EvalSnapped: {
     // Eval* share same operand index with load input.
-    Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
-    DxilSignature &inputSig = S.InputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
+    Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
+    DxilSignature &InputSig = S.InputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
     DxilSignatureElement *pSE = ValidateSignatureAccess(
-        CI, inputSig, inputID, row, col, Status, ValCtx);
+        CI, InputSig, InputId, Row, Col, Status, ValCtx);
     if (pSE) {
       switch (pSE->GetInterpolationMode()->GetKind()) {
       case DXIL::InterpolationMode::Linear:
@@ -836,11 +836,11 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   } break;
   case DXIL::OpCode::AttributeAtVertex: {
     Value *Attribute = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
-    DxilSignature &inputSig = S.InputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
+    DxilSignature &InputSig = S.InputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
     DxilSignatureElement *pSE = ValidateSignatureAccess(
-        CI, inputSig, Attribute, row, col, Status, ValCtx);
+        CI, InputSig, Attribute, Row, Col, Status, ValCtx);
     if (pSE && pSE->GetInterpolationMode()->GetKind() !=
                    hlsl::InterpolationMode::Kind::Constant) {
       ValCtx.EmitInstrFormatError(
@@ -851,35 +851,35 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   case DXIL::OpCode::CutStream:
   case DXIL::OpCode::EmitThenCutStream:
   case DXIL::OpCode::EmitStream: {
-    if (props.IsGS()) {
-      auto &GS = props.ShaderProps.GS;
-      unsigned streamMask = 0;
-      for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) {
-        if (GS.streamPrimitiveTopologies[i] !=
+    if (Props.IsGS()) {
+      auto &GS = Props.ShaderProps.GS;
+      unsigned StreamMask = 0;
+      for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) {
+        if (GS.streamPrimitiveTopologies[I] !=
             DXIL::PrimitiveTopology::Undefined) {
-          streamMask |= 1 << i;
+          StreamMask |= 1 << I;
         }
       }
-      Value *streamID =
+      Value *StreamId =
           CI->getArgOperand(DXIL::OperandIndex::kStreamEmitCutIDOpIdx);
-      if (ConstantInt *cStreamID = dyn_cast<ConstantInt>(streamID)) {
-        int immStreamID = cStreamID->getValue().getLimitedValue();
-        if (cStreamID->getValue().isNegative() || immStreamID >= 4) {
+      if (ConstantInt *cStreamId = dyn_cast<ConstantInt>(StreamId)) {
+        int ImmStreamId = cStreamId->getValue().getLimitedValue();
+        if (cStreamId->getValue().isNegative() || ImmStreamId >= 4) {
           ValCtx.EmitInstrFormatError(
               CI, ValidationRule::InstrOperandRange,
-              {"StreamID", "0~4", std::to_string(immStreamID)});
+              {"StreamID", "0~4", std::to_string(ImmStreamId)});
         } else {
-          unsigned immMask = 1 << immStreamID;
-          if ((streamMask & immMask) == 0) {
-            std::string range;
-            for (unsigned i = 0; i < 4; i++) {
-              if (streamMask & (1 << i)) {
-                range += std::to_string(i) + " ";
+          unsigned ImmMask = 1 << ImmStreamId;
+          if ((StreamMask & ImmMask) == 0) {
+            std::string Range;
+            for (unsigned I = 0; I < 4; I++) {
+              if (StreamMask & (1 << I)) {
+                Range += std::to_string(I) + " ";
               }
             }
             ValCtx.EmitInstrFormatError(
                 CI, ValidationRule::InstrOperandRange,
-                {"StreamID", range, std::to_string(immStreamID)});
+                {"StreamID", Range, std::to_string(ImmStreamId)});
           }
         }
 
@@ -893,25 +893,25 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
   } break;
   case DXIL::OpCode::EmitIndices: {
-    if (!props.IsMS()) {
+    if (!Props.IsMS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"EmitIndices", "Mesh shader"});
     }
   } break;
   case DXIL::OpCode::SetMeshOutputCounts: {
-    if (!props.IsMS()) {
+    if (!Props.IsMS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"SetMeshOutputCounts", "Mesh shader"});
     }
   } break;
   case DXIL::OpCode::GetMeshPayload: {
-    if (!props.IsMS()) {
+    if (!Props.IsMS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"GetMeshPayload", "Mesh shader"});
     }
   } break;
   case DXIL::OpCode::DispatchMesh: {
-    if (!props.IsAS()) {
+    if (!Props.IsAS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"DispatchMesh", "Amplification shader"});
     }
@@ -925,9 +925,9 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   }
 }
 
-static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode,
+static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode Opcode,
                                             ValidationContext &ValCtx) {
-  switch (opcode) {
+  switch (Opcode) {
   // Imm input value validation.
   case DXIL::OpCode::Asin: {
     DxilInst_Asin I(CI);
@@ -973,77 +973,77 @@ static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode,
 // Validate the type-defined mask compared to the store value mask which
 // indicates which parts were defined returns true if caller should continue
 // validation
-static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode opcode,
-                                 ConstantInt *mask, unsigned stValMask,
-                                 bool isTyped, ValidationContext &ValCtx) {
-  if (!mask) {
+static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode Opcode,
+                                 ConstantInt *Mask, unsigned StValMask,
+                                 bool IsTyped, ValidationContext &ValCtx) {
+  if (!Mask) {
     // Mask for buffer store should be immediate.
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst,
-                                {"Mask", hlsl::OP::GetOpCodeName(opcode)});
+                                {"Mask", hlsl::OP::GetOpCodeName(Opcode)});
     return false;
   }
 
-  unsigned uMask = mask->getLimitedValue();
-  if (isTyped && uMask != 0xf) {
+  unsigned UMask = Mask->getLimitedValue();
+  if (IsTyped && UMask != 0xf) {
     ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskForTypedUAVStore);
   }
 
   // write mask must be contiguous (.x .xy .xyz or .xyzw)
-  if (!((uMask == 0xf) || (uMask == 0x7) || (uMask == 0x3) || (uMask == 0x1))) {
+  if (!((UMask == 0xf) || (UMask == 0x7) || (UMask == 0x3) || (UMask == 0x1))) {
     ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskGapForUAV);
   }
 
-  // If a bit is set in the uMask (expected values) that isn't set in stValMask
+  // If a bit is set in the UMask (expected values) that isn't set in StValMask
   // (user provided values) then the user failed to define some of the output
   // values.
-  if (uMask & ~stValMask)
+  if (UMask & ~StValMask)
     ValCtx.EmitInstrError(I, ValidationRule::InstrUndefinedValueForUAVStore);
-  else if (uMask != stValMask)
+  else if (UMask != StValMask)
     ValCtx.EmitInstrFormatError(
         I, ValidationRule::InstrWriteMaskMatchValueForUAVStore,
-        {std::to_string(uMask), std::to_string(stValMask)});
+        {std::to_string(UMask), std::to_string(StValMask)});
 
   return true;
 }
 
-static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
+static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode,
                                    ValidationContext &ValCtx) {
-  switch (opcode) {
+  switch (Opcode) {
   case DXIL::OpCode::GetDimensions: {
-    DxilInst_GetDimensions getDim(CI);
-    Value *handle = getDim.get_handle();
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx);
+    DxilInst_GetDimensions GetDim(CI);
+    Value *Handle = GetDim.get_handle();
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
 
     // Check the result component use.
-    ResRetUsage usage;
-    CollectGetDimResRetUsage(usage, CI, ValCtx);
+    ResRetUsage Usage;
+    CollectGetDimResRetUsage(Usage, CI, ValCtx);
 
     // Mip level only for texture.
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
-      if (usage.y) {
+      if (Usage.Y) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"y", "Texture1D"});
       }
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture1D"});
       }
       break;
     case DXIL::ResourceKind::Texture1DArray:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture1DArray"});
       }
       break;
     case DXIL::ResourceKind::Texture2D:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture2D"});
@@ -1052,7 +1052,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     case DXIL::ResourceKind::Texture2DArray:
       break;
     case DXIL::ResourceKind::Texture2DMS:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture2DMS"});
@@ -1063,7 +1063,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     case DXIL::ResourceKind::Texture3D:
       break;
     case DXIL::ResourceKind::TextureCube:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "TextureCube"});
@@ -1075,12 +1075,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     case DXIL::ResourceKind::RawBuffer:
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::TBuffer: {
-      Value *mip = getDim.get_mipLevel();
-      if (!isa<UndefValue>(mip)) {
+      Value *Mip = GetDim.get_mipLevel();
+      if (!isa<UndefValue>(Mip)) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrMipLevelForGetDimension);
       }
-      if (resKind != DXIL::ResourceKind::Invalid) {
-        if (usage.y || usage.z || usage.w) {
+      if (ResKind != DXIL::ResourceKind::Invalid) {
+        if (Usage.Y || Usage.Z || Usage.W) {
           ValCtx.EmitInstrFormatError(
               CI, ValidationRule::InstrUndefResultForGetDimension,
               {"invalid", "resource"});
@@ -1092,38 +1092,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     } break;
     }
 
-    if (usage.status) {
+    if (Usage.Status) {
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::InstrUndefResultForGetDimension,
           {"invalid", "resource"});
     }
   } break;
   case DXIL::OpCode::CalculateLOD: {
-    DxilInst_CalculateLOD lod(CI);
-    Value *samplerHandle = lod.get_sampler();
-    DXIL::SamplerKind samplerKind = GetSamplerKind(samplerHandle, ValCtx);
-    if (samplerKind != DXIL::SamplerKind::Default) {
+    DxilInst_CalculateLOD LOD(CI);
+    Value *SamplerHandle = LOD.get_sampler();
+    DXIL::SamplerKind SamplerKind = GetSamplerKind(SamplerHandle, ValCtx);
+    if (SamplerKind != DXIL::SamplerKind::Default) {
       // After SM68, Comparison is supported.
       if (!ValCtx.DxilMod.GetShaderModel()->IsSM68Plus() ||
-          samplerKind != DXIL::SamplerKind::Comparison)
+          SamplerKind != DXIL::SamplerKind::Comparison)
         ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForLOD);
     }
-    Value *handle = lod.get_handle();
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx);
-    if (resClass != DXIL::ResourceClass::SRV) {
+    Value *Handle = LOD.get_handle();
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
+    if (ResClass != DXIL::ResourceClass::SRV) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrResourceClassForSamplerGather);
       return;
     }
     // Coord match resource.
     ValidateCalcLODResourceDimensionCoord(
-        CI, resKind, {lod.get_coord0(), lod.get_coord1(), lod.get_coord2()},
+        CI, ResKind, {LOD.get_coord0(), LOD.get_coord1(), LOD.get_coord2()},
         ValCtx);
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture1DArray:
     case DXIL::ResourceKind::Texture2D:
@@ -1140,67 +1140,67 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::TextureGather: {
-    DxilInst_TextureGather gather(CI);
-    ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
-                   {gather.get_coord0(), gather.get_coord1(),
-                    gather.get_coord2(), gather.get_coord3()},
-                   {gather.get_offset0(), gather.get_offset1()},
+    DxilInst_TextureGather Gather(CI);
+    ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(),
+                   {Gather.get_coord0(), Gather.get_coord1(),
+                    Gather.get_coord2(), Gather.get_coord3()},
+                   {Gather.get_offset0(), Gather.get_offset1()},
                    /*IsSampleC*/ false, ValCtx);
   } break;
   case DXIL::OpCode::TextureGatherCmp: {
-    DxilInst_TextureGatherCmp gather(CI);
-    ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
-                   {gather.get_coord0(), gather.get_coord1(),
-                    gather.get_coord2(), gather.get_coord3()},
-                   {gather.get_offset0(), gather.get_offset1()},
+    DxilInst_TextureGatherCmp Gather(CI);
+    ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(),
+                   {Gather.get_coord0(), Gather.get_coord1(),
+                    Gather.get_coord2(), Gather.get_coord3()},
+                   {Gather.get_offset0(), Gather.get_offset1()},
                    /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::Sample: {
-    DxilInst_Sample sample(CI);
+    DxilInst_Sample Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmp: {
-    DxilInst_SampleCmp sample(CI);
+    DxilInst_SampleCmp Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpLevel: {
     // sampler must be comparison mode.
-    DxilInst_SampleCmpLevel sample(CI);
+    DxilInst_SampleCmpLevel Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpLevelZero: {
     // sampler must be comparison mode.
-    DxilInst_SampleCmpLevelZero sample(CI);
+    DxilInst_SampleCmpLevelZero Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::SampleBias: {
-    DxilInst_SampleBias sample(CI);
-    Value *bias = sample.get_bias();
-    if (ConstantFP *cBias = dyn_cast<ConstantFP>(bias)) {
-      float fBias = cBias->getValueAPF().convertToFloat();
-      if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) {
+    DxilInst_SampleBias Sample(CI);
+    Value *Bias = Sample.get_bias();
+    if (ConstantFP *cBias = dyn_cast<ConstantFP>(Bias)) {
+      float FBias = cBias->getValueAPF().convertToFloat();
+      if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrImmBiasForSampleB,
             {std::to_string(DXIL::kMinMipLodBias),
@@ -1210,19 +1210,19 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
 
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpBias: {
-    DxilInst_SampleCmpBias sample(CI);
-    Value *bias = sample.get_bias();
-    if (ConstantFP *cBias = dyn_cast<ConstantFP>(bias)) {
-      float fBias = cBias->getValueAPF().convertToFloat();
-      if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) {
+    DxilInst_SampleCmpBias Sample(CI);
+    Value *Bias = Sample.get_bias();
+    if (ConstantFP *cBias = dyn_cast<ConstantFP>(Bias)) {
+      float FBias = cBias->getValueAPF().convertToFloat();
+      if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrImmBiasForSampleB,
             {std::to_string(DXIL::kMinMipLodBias),
@@ -1232,38 +1232,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
 
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleGrad: {
-    DxilInst_SampleGrad sample(CI);
+    DxilInst_SampleGrad Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpGrad: {
-    DxilInst_SampleCmpGrad sample(CI);
+    DxilInst_SampleCmpGrad Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::SampleLevel: {
-    DxilInst_SampleLevel sample(CI);
+    DxilInst_SampleLevel Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
   } break;
   case DXIL::OpCode::CheckAccessFullyMapped: {
@@ -1273,53 +1273,53 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
       ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped);
     } else {
       Value *V = EVI->getOperand(0);
-      bool isLegal = EVI->getNumIndices() == 1 &&
+      bool IsLegal = EVI->getNumIndices() == 1 &&
                      EVI->getIndices()[0] == DXIL::kResRetStatusIndex &&
                      ValCtx.DxilMod.GetOP()->IsResRetType(V->getType());
-      if (!isLegal) {
+      if (!IsLegal) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped);
       }
     }
   } break;
   case DXIL::OpCode::BufferStore: {
-    DxilInst_BufferStore bufSt(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx);
+    DxilInst_BufferStore BufSt(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx);
 
-    if (resClass != DXIL::ResourceClass::UAV) {
+    if (ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
     }
 
-    ConstantInt *mask = dyn_cast<ConstantInt>(bufSt.get_mask());
-    unsigned stValMask =
-        StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(),
-                          bufSt.get_value2(), bufSt.get_value3()});
+    ConstantInt *Mask = dyn_cast<ConstantInt>(BufSt.get_mask());
+    unsigned StValMask =
+        StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(),
+                          BufSt.get_value2(), BufSt.get_value3()});
 
-    if (!ValidateStorageMasks(CI, opcode, mask, stValMask,
-                              resKind == DXIL::ResourceKind::TypedBuffer ||
-                                  resKind == DXIL::ResourceKind::TBuffer,
+    if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask,
+                              ResKind == DXIL::ResourceKind::TypedBuffer ||
+                                  ResKind == DXIL::ResourceKind::TBuffer,
                               ValCtx))
       return;
-    Value *offset = bufSt.get_coord1();
+    Value *Offset = BufSt.get_coord1();
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::TBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1332,26 +1332,26 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
   } break;
   case DXIL::OpCode::TextureStore: {
-    DxilInst_TextureStore texSt(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(texSt.get_srv(), compTy, resClass, ValCtx);
+    DxilInst_TextureStore TexSt(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(TexSt.get_srv(), CompTy, ResClass, ValCtx);
 
-    if (resClass != DXIL::ResourceClass::UAV) {
+    if (ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
     }
 
-    ConstantInt *mask = dyn_cast<ConstantInt>(texSt.get_mask());
-    unsigned stValMask =
-        StoreValueToMask({texSt.get_value0(), texSt.get_value1(),
-                          texSt.get_value2(), texSt.get_value3()});
+    ConstantInt *Mask = dyn_cast<ConstantInt>(TexSt.get_mask());
+    unsigned StValMask =
+        StoreValueToMask({TexSt.get_value0(), TexSt.get_value1(),
+                          TexSt.get_value2(), TexSt.get_value3()});
 
-    if (!ValidateStorageMasks(CI, opcode, mask, stValMask, true /*isTyped*/,
+    if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, true /*IsTyped*/,
                               ValCtx))
       return;
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture1DArray:
     case DXIL::ResourceKind::Texture2D:
@@ -1367,30 +1367,30 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
   } break;
   case DXIL::OpCode::BufferLoad: {
-    DxilInst_BufferLoad bufLd(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx);
-
-    if (resClass != DXIL::ResourceClass::SRV &&
-        resClass != DXIL::ResourceClass::UAV) {
+    DxilInst_BufferLoad BufLd(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx);
+
+    if (ResClass != DXIL::ResourceClass::SRV &&
+        ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad);
     }
 
-    Value *offset = bufLd.get_wot();
+    Value *Offset = BufLd.get_wot();
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::TBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1403,33 +1403,33 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
   } break;
   case DXIL::OpCode::TextureLoad: {
-    DxilInst_TextureLoad texLd(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(texLd.get_srv(), compTy, resClass, ValCtx);
-
-    Value *mipLevel = texLd.get_mipLevelOrSampleCount();
-
-    if (resClass == DXIL::ResourceClass::UAV) {
-      bool noOffset = isa<UndefValue>(texLd.get_offset0());
-      noOffset &= isa<UndefValue>(texLd.get_offset1());
-      noOffset &= isa<UndefValue>(texLd.get_offset2());
-      if (!noOffset) {
+    DxilInst_TextureLoad TexLd(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(TexLd.get_srv(), CompTy, ResClass, ValCtx);
+
+    Value *MipLevel = TexLd.get_mipLevelOrSampleCount();
+
+    if (ResClass == DXIL::ResourceClass::UAV) {
+      bool NoOffset = isa<UndefValue>(TexLd.get_offset0());
+      NoOffset &= isa<UndefValue>(TexLd.get_offset1());
+      NoOffset &= isa<UndefValue>(TexLd.get_offset2());
+      if (!NoOffset) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrOffsetOnUAVLoad);
       }
-      if (!isa<UndefValue>(mipLevel)) {
-        if (resKind != DXIL::ResourceKind::Texture2DMS &&
-            resKind != DXIL::ResourceKind::Texture2DMSArray)
+      if (!isa<UndefValue>(MipLevel)) {
+        if (ResKind != DXIL::ResourceKind::Texture2DMS &&
+            ResKind != DXIL::ResourceKind::Texture2DMSArray)
           ValCtx.EmitInstrError(CI, ValidationRule::InstrMipOnUAVLoad);
       }
     } else {
-      if (resClass != DXIL::ResourceClass::SRV) {
+      if (ResClass != DXIL::ResourceClass::SRV) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad);
       }
     }
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture1DArray:
     case DXIL::ResourceKind::Texture2D:
@@ -1438,7 +1438,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
       break;
     case DXIL::ResourceKind::Texture2DMS:
     case DXIL::ResourceKind::Texture2DMSArray: {
-      if (isa<UndefValue>(mipLevel)) {
+      if (isa<UndefValue>(MipLevel)) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleIndexForLoad2DMS);
       }
     } break;
@@ -1449,66 +1449,70 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
 
     ValidateResourceOffset(
-        CI, resKind,
-        {texLd.get_offset0(), texLd.get_offset1(), texLd.get_offset2()},
+        CI, ResKind,
+        {TexLd.get_offset0(), TexLd.get_offset1(), TexLd.get_offset2()},
         ValCtx);
   } break;
   case DXIL::OpCode::CBufferLoad: {
     DxilInst_CBufferLoad CBLoad(CI);
-    Value *regIndex = CBLoad.get_byteOffset();
-    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(regIndex)) {
-      int offset = cIndex->getLimitedValue();
-      int size = GetCBufSize(CBLoad.get_handle(), ValCtx);
-      if (size > 0 && offset >= size) {
+    Value *RegIndex = CBLoad.get_byteOffset();
+    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(RegIndex)) {
+      int Offset = cIndex->getLimitedValue();
+      int Size = GetCBufSize(CBLoad.get_handle(), ValCtx);
+      if (Size > 0 && Offset >= Size) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound);
       }
     }
   } break;
   case DXIL::OpCode::CBufferLoadLegacy: {
     DxilInst_CBufferLoadLegacy CBLoad(CI);
-    Value *regIndex = CBLoad.get_regIndex();
-    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(regIndex)) {
-      int offset = cIndex->getLimitedValue() * 16; // 16 bytes align
-      int size = GetCBufSize(CBLoad.get_handle(), ValCtx);
-      if (size > 0 && offset >= size) {
+    Value *RegIndex = CBLoad.get_regIndex();
+    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(RegIndex)) {
+      int Offset = cIndex->getLimitedValue() * 16; // 16 bytes align
+      int Size = GetCBufSize(CBLoad.get_handle(), ValCtx);
+      if (Size > 0 && Offset >= Size) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound);
       }
     }
   } break;
-  case DXIL::OpCode::RawBufferLoad: {
+  case DXIL::OpCode::RawBufferLoad:
     if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) {
       Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferLoad,
                                      CI->getCalledFunction());
-      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) {
+      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32)
         ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore);
-      }
     }
-    DxilInst_RawBufferLoad bufLd(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufLd.get_buf(), compTy, resClass, ValCtx);
+    LLVM_FALLTHROUGH;
+  case DXIL::OpCode::RawBufferVectorLoad: {
+    Value *Handle =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferLoadHandleOpIdx);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
+
+    if (ResClass != DXIL::ResourceClass::SRV &&
+        ResClass != DXIL::ResourceClass::UAV)
 
-    if (resClass != DXIL::ResourceClass::SRV &&
-        resClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad);
-    }
 
-    Value *offset = bufLd.get_elementOffset();
-    Value *align = bufLd.get_alignment();
-    if (!isa<ConstantInt>(align)) {
-      ValCtx.EmitInstrError(CI,
-                            ValidationRule::InstrCoordinateCountForRawTypedBuf);
-    }
-    switch (resKind) {
+    unsigned AlignIdx = DXIL::OperandIndex::kRawBufferLoadAlignmentOpIdx;
+    if (DXIL::OpCode::RawBufferVectorLoad == Opcode)
+      AlignIdx = DXIL::OperandIndex::kRawBufferVectorLoadAlignmentOpIdx;
+    if (!isa<ConstantInt>(CI->getOperand(AlignIdx)))
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf);
+
+    Value *Offset =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx);
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1523,44 +1527,53 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) {
       Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferStore,
                                      CI->getCalledFunction());
-      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) {
+      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32)
         ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore);
-      }
     }
     DxilInst_RawBufferStore bufSt(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx);
-
-    if (resClass != DXIL::ResourceClass::UAV) {
-      ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
-    }
-
-    ConstantInt *mask = dyn_cast<ConstantInt>(bufSt.get_mask());
-    unsigned stValMask =
+    ConstantInt *Mask = dyn_cast<ConstantInt>(bufSt.get_mask());
+    unsigned StValMask =
         StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(),
                           bufSt.get_value2(), bufSt.get_value3()});
 
-    if (!ValidateStorageMasks(CI, opcode, mask, stValMask, false /*isTyped*/,
+    if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, false /*IsTyped*/,
                               ValCtx))
       return;
+  }
+    LLVM_FALLTHROUGH;
+  case DXIL::OpCode::RawBufferVectorStore: {
+    Value *Handle =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferStoreHandleOpIdx);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
+
+    if (ResClass != DXIL::ResourceClass::UAV)
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
 
-    Value *offset = bufSt.get_elementOffset();
-    Value *align = bufSt.get_alignment();
-    if (!isa<ConstantInt>(align)) {
-      ValCtx.EmitInstrError(CI,
-                            ValidationRule::InstrCoordinateCountForRawTypedBuf);
+    unsigned AlignIdx = DXIL::OperandIndex::kRawBufferStoreAlignmentOpIdx;
+    if (DXIL::OpCode::RawBufferVectorStore == Opcode) {
+      AlignIdx = DXIL::OperandIndex::kRawBufferVectorStoreAlignmentOpIdx;
+      unsigned ValueIx = DXIL::OperandIndex::kRawBufferVectorStoreValOpIdx;
+      if (isa<UndefValue>(CI->getOperand(ValueIx)))
+        ValCtx.EmitInstrError(CI,
+                              ValidationRule::InstrUndefinedValueForUAVStore);
     }
-    switch (resKind) {
+    if (!isa<ConstantInt>(CI->getOperand(AlignIdx)))
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf);
+
+    Value *Offset =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx);
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1572,9 +1585,9 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
   } break;
   case DXIL::OpCode::TraceRay: {
-    DxilInst_TraceRay traceRay(CI);
-    Value *hdl = traceRay.get_AccelerationStructure();
-    DxilResourceProperties RP = ValCtx.GetResourceFromVal(hdl);
+    DxilInst_TraceRay TraceRay(CI);
+    Value *Hdl = TraceRay.get_AccelerationStructure();
+    DxilResourceProperties RP = ValCtx.GetResourceFromVal(Hdl);
     if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay);
       return;
@@ -1589,12 +1602,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 }
 
 static void ValidateBarrierFlagArg(ValidationContext &ValCtx, CallInst *CI,
-                                   Value *Arg, unsigned validMask,
-                                   StringRef flagName, StringRef opName) {
+                                   Value *Arg, unsigned ValidMask,
+                                   StringRef FlagName, StringRef OpName) {
   if (ConstantInt *CArg = dyn_cast<ConstantInt>(Arg)) {
-    if ((CArg->getLimitedValue() & (uint32_t)(~validMask)) != 0) {
+    if ((CArg->getLimitedValue() & (uint32_t)(~ValidMask)) != 0) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrBarrierFlagInvalid,
-                                  {flagName, opName});
+                                  {FlagName, OpName});
     }
   } else {
     ValCtx.EmitInstrError(CI,
@@ -1616,35 +1629,35 @@ std::string GetLaunchTypeStr(DXIL::NodeLaunchType LT) {
 }
 
 static void ValidateDxilOperationCallInProfile(CallInst *CI,
-                                               DXIL::OpCode opcode,
+                                               DXIL::OpCode Opcode,
                                                const ShaderModel *pSM,
                                                ValidationContext &ValCtx) {
-  DXIL::ShaderKind shaderKind =
+  DXIL::ShaderKind ShaderKind =
       pSM ? pSM->GetKind() : DXIL::ShaderKind::Invalid;
   llvm::Function *F = CI->getParent()->getParent();
-  DXIL::NodeLaunchType nodeLaunchType = DXIL::NodeLaunchType::Invalid;
-  if (DXIL::ShaderKind::Library == shaderKind) {
+  DXIL::NodeLaunchType NodeLaunchType = DXIL::NodeLaunchType::Invalid;
+  if (DXIL::ShaderKind::Library == ShaderKind) {
     if (ValCtx.DxilMod.HasDxilFunctionProps(F)) {
-      DxilEntryProps &entryProps = ValCtx.DxilMod.GetDxilEntryProps(F);
-      shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
-      if (shaderKind == DXIL::ShaderKind::Node)
-        nodeLaunchType = entryProps.props.Node.LaunchType;
+      DxilEntryProps &EntryProps = ValCtx.DxilMod.GetDxilEntryProps(F);
+      ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
+      if (ShaderKind == DXIL::ShaderKind::Node)
+        NodeLaunchType = EntryProps.props.Node.LaunchType;
 
     } else if (ValCtx.DxilMod.IsPatchConstantShader(F))
-      shaderKind = DXIL::ShaderKind::Hull;
+      ShaderKind = DXIL::ShaderKind::Hull;
   }
 
   // These shader models are treted like compute
-  bool isCSLike = shaderKind == DXIL::ShaderKind::Compute ||
-                  shaderKind == DXIL::ShaderKind::Mesh ||
-                  shaderKind == DXIL::ShaderKind::Amplification ||
-                  shaderKind == DXIL::ShaderKind::Node;
+  bool IsCSLike = ShaderKind == DXIL::ShaderKind::Compute ||
+                  ShaderKind == DXIL::ShaderKind::Mesh ||
+                  ShaderKind == DXIL::ShaderKind::Amplification ||
+                  ShaderKind == DXIL::ShaderKind::Node;
   // Is called from a library function
-  bool isLibFunc = shaderKind == DXIL::ShaderKind::Library;
+  bool IsLibFunc = ShaderKind == DXIL::ShaderKind::Library;
 
-  ValidateHandleArgs(CI, opcode, ValCtx);
+  ValidateHandleArgs(CI, Opcode, ValCtx);
 
-  switch (opcode) {
+  switch (Opcode) {
   // Imm input value validation.
   case DXIL::OpCode::Asin:
   case DXIL::OpCode::Acos:
@@ -1653,7 +1666,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::DerivFineY:
   case DXIL::OpCode::DerivCoarseX:
   case DXIL::OpCode::DerivCoarseY:
-    ValidateImmOperandForMathDxilOp(CI, opcode, ValCtx);
+    ValidateImmOperandForMathDxilOp(CI, Opcode, ValCtx);
     break;
   // Resource validation.
   case DXIL::OpCode::GetDimensions:
@@ -1677,10 +1690,10 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::CBufferLoad:
   case DXIL::OpCode::CBufferLoadLegacy:
   case DXIL::OpCode::RawBufferLoad:
-  case DXIL::OpCode::RawBufferVectorLoad:
   case DXIL::OpCode::RawBufferStore:
+  case DXIL::OpCode::RawBufferVectorLoad:
   case DXIL::OpCode::RawBufferVectorStore:
-    ValidateResourceDxilOp(CI, opcode, ValCtx);
+    ValidateResourceDxilOp(CI, Opcode, ValCtx);
     break;
   // Input output.
   case DXIL::OpCode::LoadInput:
@@ -1701,13 +1714,13 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::EmitStream:
   case DXIL::OpCode::EmitThenCutStream:
   case DXIL::OpCode::CutStream:
-    ValidateSignatureDxilOp(CI, opcode, ValCtx);
+    ValidateSignatureDxilOp(CI, Opcode, ValCtx);
     break;
   // Special.
   case DXIL::OpCode::AllocateRayQuery: {
     // validate flags are immediate and compatible
-    llvm::Value *constRayFlag = CI->getOperand(1);
-    if (!llvm::isa<llvm::Constant>(constRayFlag)) {
+    llvm::Value *ConstRayFlag = CI->getOperand(1);
+    if (!llvm::isa<llvm::Constant>(ConstRayFlag)) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::DeclAllocateRayQueryFlagsAreConst);
     }
@@ -1715,9 +1728,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   }
   case DXIL::OpCode::AllocateRayQuery2: {
     // validate flags are immediate and compatible
-    llvm::Value *constRayFlag = CI->getOperand(1);
+    llvm::Value *ConstRayFlag = CI->getOperand(1);
     llvm::Value *RayQueryFlag = CI->getOperand(2);
-    if (!llvm::isa<llvm::Constant>(constRayFlag) ||
+    if (!llvm::isa<llvm::Constant>(ConstRayFlag) ||
         !llvm::isa<llvm::Constant>(RayQueryFlag)) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::DeclAllocateRayQuery2FlagsAreConst);
@@ -1726,7 +1739,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     // When the ForceOMM2State ConstRayFlag is given as an argument to
     // a RayQuery object, AllowOpacityMicromaps is expected
     // as a RayQueryFlag argument
-    llvm::ConstantInt *Arg1 = llvm::cast<llvm::ConstantInt>(constRayFlag);
+    llvm::ConstantInt *Arg1 = llvm::cast<llvm::ConstantInt>(ConstRayFlag);
     llvm::ConstantInt *Arg2 = llvm::cast<llvm::ConstantInt>(RayQueryFlag);
     if ((Arg1->getValue().getSExtValue() &
          (unsigned)DXIL::RayFlag::ForceOMM2State) &&
@@ -1740,9 +1753,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   }
 
   case DXIL::OpCode::BufferUpdateCounter: {
-    DxilInst_BufferUpdateCounter updateCounter(CI);
-    Value *handle = updateCounter.get_uav();
-    DxilResourceProperties RP = ValCtx.GetResourceFromVal(handle);
+    DxilInst_BufferUpdateCounter UpdateCounter(CI);
+    Value *Handle = UpdateCounter.get_uav();
+    DxilResourceProperties RP = ValCtx.GetResourceFromVal(Handle);
 
     if (!RP.isUAV()) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBufferUpdateCounterOnUAV);
@@ -1757,20 +1770,20 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
           CI, ValidationRule::InstrBufferUpdateCounterOnResHasCounter);
     }
 
-    Value *inc = updateCounter.get_inc();
-    if (ConstantInt *cInc = dyn_cast<ConstantInt>(inc)) {
-      bool isInc = cInc->getLimitedValue() == 1;
+    Value *Inc = UpdateCounter.get_inc();
+    if (ConstantInt *cInc = dyn_cast<ConstantInt>(Inc)) {
+      bool IsInc = cInc->getLimitedValue() == 1;
       if (!ValCtx.isLibProfile) {
-        auto it = ValCtx.HandleResIndexMap.find(handle);
-        if (it != ValCtx.HandleResIndexMap.end()) {
-          unsigned resIndex = it->second;
-          if (ValCtx.UavCounterIncMap.count(resIndex)) {
-            if (isInc != ValCtx.UavCounterIncMap[resIndex]) {
+        auto It = ValCtx.HandleResIndexMap.find(Handle);
+        if (It != ValCtx.HandleResIndexMap.end()) {
+          unsigned ResIndex = It->second;
+          if (ValCtx.UavCounterIncMap.count(ResIndex)) {
+            if (IsInc != ValCtx.UavCounterIncMap[ResIndex]) {
               ValCtx.EmitInstrError(CI,
                                     ValidationRule::InstrOnlyOneAllocConsume);
             }
           } else {
-            ValCtx.UavCounterIncMap[resIndex] = isInc;
+            ValCtx.UavCounterIncMap[ResIndex] = IsInc;
           }
         }
 
@@ -1785,35 +1798,35 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
 
   } break;
   case DXIL::OpCode::Barrier: {
-    DxilInst_Barrier barrier(CI);
-    Value *mode = barrier.get_barrierMode();
-    ConstantInt *cMode = dyn_cast<ConstantInt>(mode);
-    if (!cMode) {
+    DxilInst_Barrier Barrier(CI);
+    Value *Mode = Barrier.get_barrierMode();
+    ConstantInt *CMode = dyn_cast<ConstantInt>(Mode);
+    if (!CMode) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst,
                                   {"Mode", "Barrier"});
       return;
     }
 
-    const unsigned uglobal =
+    const unsigned Uglobal =
         static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
-    const unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
-    const unsigned ut =
+    const unsigned G = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
+    const unsigned Ut =
         static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
-    unsigned barrierMode = cMode->getLimitedValue();
+    unsigned BarrierMode = CMode->getLimitedValue();
 
-    if (isCSLike || isLibFunc) {
-      bool bHasUGlobal = barrierMode & uglobal;
-      bool bHasGroup = barrierMode & g;
-      bool bHasUGroup = barrierMode & ut;
-      if (bHasUGlobal && bHasUGroup) {
+    if (IsCSLike || IsLibFunc) {
+      bool HasUGlobal = BarrierMode & Uglobal;
+      bool HasGroup = BarrierMode & G;
+      bool HasUGroup = BarrierMode & Ut;
+      if (HasUGlobal && HasUGroup) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrBarrierModeUselessUGroup);
       }
-      if (!bHasUGlobal && !bHasGroup && !bHasUGroup) {
+      if (!HasUGlobal && !HasGroup && !HasUGroup) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeNoMemory);
       }
     } else {
-      if (uglobal != barrierMode) {
+      if (Uglobal != BarrierMode) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS);
       }
     }
@@ -1827,28 +1840,28 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     ValidateBarrierFlagArg(ValCtx, CI, DI.get_SemanticFlags(),
                            (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask,
                            "semantic", "BarrierByMemoryType");
-    if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node &&
+    if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node &&
         OP::BarrierRequiresNode(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode);
     }
-    if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) {
+    if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS);
     }
   } break;
   case DXIL::OpCode::BarrierByNodeRecordHandle:
   case DXIL::OpCode::BarrierByMemoryHandle: {
-    std::string opName = opcode == DXIL::OpCode::BarrierByNodeRecordHandle
+    std::string OpName = Opcode == DXIL::OpCode::BarrierByNodeRecordHandle
                              ? "barrierByNodeRecordHandle"
                              : "barrierByMemoryHandle";
     DxilInst_BarrierByMemoryHandle DIMH(CI);
     ValidateBarrierFlagArg(ValCtx, CI, DIMH.get_SemanticFlags(),
                            (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask,
-                           "semantic", opName);
-    if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node &&
+                           "semantic", OpName);
+    if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node &&
         OP::BarrierRequiresNode(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode);
     }
-    if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) {
+    if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS);
     }
   } break;
@@ -1860,7 +1873,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     break;
   case DXIL::OpCode::AtomicBinOp:
   case DXIL::OpCode::AtomicCompareExchange: {
-    Type *pOverloadType = OP::GetOverloadType(opcode, CI->getCalledFunction());
+    Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction());
     if ((pOverloadType->isIntegerTy(64)) && !pSM->IsSM66Plus())
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::SmOpcodeInInvalidFunction,
@@ -1886,73 +1899,73 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     break;
 
   case DXIL::OpCode::ThreadId: // SV_DispatchThreadID
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
       break;
 
     ValCtx.EmitInstrFormatError(
         CI, ValidationRule::InstrSVConflictingLaunchMode,
-        {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(nodeLaunchType)});
+        {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(NodeLaunchType)});
     break;
 
   case DXIL::OpCode::GroupId: // SV_GroupId
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
       break;
 
     ValCtx.EmitInstrFormatError(
         CI, ValidationRule::InstrSVConflictingLaunchMode,
-        {"GroupId", "SV_GroupId", GetLaunchTypeStr(nodeLaunchType)});
+        {"GroupId", "SV_GroupId", GetLaunchTypeStr(NodeLaunchType)});
     break;
 
   case DXIL::OpCode::ThreadIdInGroup: // SV_GroupThreadID
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
-        nodeLaunchType == DXIL::NodeLaunchType::Coalescing)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
+        NodeLaunchType == DXIL::NodeLaunchType::Coalescing)
       break;
 
     ValCtx.EmitInstrFormatError(CI,
                                 ValidationRule::InstrSVConflictingLaunchMode,
                                 {"ThreadIdInGroup", "SV_GroupThreadID",
-                                 GetLaunchTypeStr(nodeLaunchType)});
+                                 GetLaunchTypeStr(NodeLaunchType)});
 
     break;
 
   case DXIL::OpCode::FlattenedThreadIdInGroup: // SV_GroupIndex
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
-        nodeLaunchType == DXIL::NodeLaunchType::Coalescing)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
+        NodeLaunchType == DXIL::NodeLaunchType::Coalescing)
       break;
 
     ValCtx.EmitInstrFormatError(CI,
                                 ValidationRule::InstrSVConflictingLaunchMode,
                                 {"FlattenedThreadIdInGroup", "SV_GroupIndex",
-                                 GetLaunchTypeStr(nodeLaunchType)});
+                                 GetLaunchTypeStr(NodeLaunchType)});
 
     break;
 
   default:
-    // TODO: make sure every opcode is checked.
+    // TODO: make sure every Opcode is checked.
     // Skip opcodes don't need special check.
     break;
   }
 }
 
 static bool IsDxilFunction(llvm::Function *F) {
-  unsigned argSize = F->arg_size();
-  if (argSize < 1) {
+  unsigned ArgSize = F->arg_size();
+  if (ArgSize < 1) {
     // Cannot be a DXIL operation.
     return false;
   }
@@ -1987,9 +2000,9 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
   }
 
   const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
-  OP *hlslOP = ValCtx.DxilMod.GetOP();
-  bool isDxilOp = OP::IsDxilOpFunc(F);
-  Type *voidTy = Type::getVoidTy(F->getContext());
+  OP *HlslOP = ValCtx.DxilMod.GetOP();
+  bool IsDxilOp = OP::IsDxilOpFunc(F);
+  Type *VoidTy = Type::getVoidTy(F->getContext());
 
   for (User *user : F->users()) {
     CallInst *CI = dyn_cast<CallInst>(user);
@@ -2000,32 +2013,32 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
     }
 
     // Skip call to external user defined function
-    if (!isDxilOp)
+    if (!IsDxilOp)
       continue;
 
-    Value *argOpcode = CI->getArgOperand(0);
-    ConstantInt *constOpcode = dyn_cast<ConstantInt>(argOpcode);
-    if (!constOpcode) {
-      // opcode not immediate; function body will validate this error.
+    Value *ArgOpcode = CI->getArgOperand(0);
+    ConstantInt *ConstOpcode = dyn_cast<ConstantInt>(ArgOpcode);
+    if (!ConstOpcode) {
+      // Opcode not immediate; function body will validate this error.
       continue;
     }
 
-    unsigned opcode = constOpcode->getLimitedValue();
-    if (opcode >= (unsigned)DXIL::OpCode::NumOpCodes) {
-      // invalid opcode; function body will validate this error.
+    unsigned Opcode = ConstOpcode->getLimitedValue();
+    if (Opcode >= (unsigned)DXIL::OpCode::NumOpCodes) {
+      // invalid Opcode; function body will validate this error.
       continue;
     }
 
-    DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
+    DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode;
 
     // In some cases, no overloads are provided (void is exclusive to others)
-    Function *dxilFunc;
-    if (hlslOP->IsOverloadLegal(dxilOpcode, voidTy)) {
-      dxilFunc = hlslOP->GetOpFunc(dxilOpcode, voidTy);
+    Function *DxilFunc;
+    if (HlslOP->IsOverloadLegal(DxilOpcode, VoidTy)) {
+      DxilFunc = HlslOP->GetOpFunc(DxilOpcode, VoidTy);
     } else {
-      Type *Ty = OP::GetOverloadType(dxilOpcode, CI->getCalledFunction());
+      Type *Ty = OP::GetOverloadType(DxilOpcode, CI->getCalledFunction());
       try {
-        if (!hlslOP->IsOverloadLegal(dxilOpcode, Ty)) {
+        if (!HlslOP->IsOverloadLegal(DxilOpcode, Ty)) {
           ValCtx.EmitInstrError(CI, ValidationRule::InstrOload);
           continue;
         }
@@ -2033,75 +2046,75 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrOload);
         continue;
       }
-      dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty);
+      DxilFunc = HlslOP->GetOpFunc(DxilOpcode, Ty);
     }
 
-    if (!dxilFunc) {
-      // Cannot find dxilFunction based on opcode and type.
+    if (!DxilFunc) {
+      // Cannot find DxilFunction based on Opcode and type.
       ValCtx.EmitInstrError(CI, ValidationRule::InstrOload);
       continue;
     }
 
-    if (dxilFunc->getFunctionType() != F->getFunctionType()) {
+    if (DxilFunc->getFunctionType() != F->getFunctionType()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrCallOload,
-                                  {dxilFunc->getName()});
+                                  {DxilFunc->getName()});
       continue;
     }
 
     unsigned major = pSM->GetMajor();
     unsigned minor = pSM->GetMinor();
     if (ValCtx.isLibProfile) {
-      Function *callingFunction = CI->getParent()->getParent();
+      Function *CallingFunction = CI->getParent()->getParent();
       DXIL::ShaderKind SK = DXIL::ShaderKind::Library;
-      if (ValCtx.DxilMod.HasDxilFunctionProps(callingFunction))
-        SK = ValCtx.DxilMod.GetDxilFunctionProps(callingFunction).shaderKind;
-      else if (ValCtx.DxilMod.IsPatchConstantShader(callingFunction))
+      if (ValCtx.DxilMod.HasDxilFunctionProps(CallingFunction))
+        SK = ValCtx.DxilMod.GetDxilFunctionProps(CallingFunction).shaderKind;
+      else if (ValCtx.DxilMod.IsPatchConstantShader(CallingFunction))
         SK = DXIL::ShaderKind::Hull;
-      if (!ValidateOpcodeInProfile(dxilOpcode, SK, major, minor)) {
+      if (!ValidateOpcodeInProfile(DxilOpcode, SK, major, minor)) {
         // Opcode not available in profile.
         // produces: "lib_6_3(ps)", or "lib_6_3(anyhit)" for shader types
         // Or: "lib_6_3(lib)" for library function
-        std::string shaderModel = pSM->GetName();
-        shaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")";
+        std::string ShaderModel = pSM->GetName();
+        ShaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")";
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::SmOpcode,
-            {hlslOP->GetOpCodeName(dxilOpcode), shaderModel});
+            {HlslOP->GetOpCodeName(DxilOpcode), ShaderModel});
         continue;
       }
     } else {
-      if (!ValidateOpcodeInProfile(dxilOpcode, pSM->GetKind(), major, minor)) {
+      if (!ValidateOpcodeInProfile(DxilOpcode, pSM->GetKind(), major, minor)) {
         // Opcode not available in profile.
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::SmOpcode,
-            {hlslOP->GetOpCodeName(dxilOpcode), pSM->GetName()});
+            {HlslOP->GetOpCodeName(DxilOpcode), pSM->GetName()});
         continue;
       }
     }
 
     // Check more detail.
-    ValidateDxilOperationCallInProfile(CI, dxilOpcode, pSM, ValCtx);
+    ValidateDxilOperationCallInProfile(CI, DxilOpcode, pSM, ValCtx);
   }
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // Instruction validation functions.                                         //
 
-static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
-  if (ST == hlslOP->GetBinaryWithCarryType())
+static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *HlslOP) {
+  if (ST == HlslOP->GetBinaryWithCarryType())
     return true;
-  if (ST == hlslOP->GetBinaryWithTwoOutputsType())
+  if (ST == HlslOP->GetBinaryWithTwoOutputsType())
     return true;
-  if (ST == hlslOP->GetFourI32Type())
+  if (ST == HlslOP->GetFourI32Type())
     return true;
-  if (ST == hlslOP->GetFourI16Type())
+  if (ST == HlslOP->GetFourI16Type())
     return true;
-  if (ST == hlslOP->GetDimensionsType())
+  if (ST == HlslOP->GetDimensionsType())
     return true;
-  if (ST == hlslOP->GetHandleType())
+  if (ST == HlslOP->GetHandleType())
     return true;
-  if (ST == hlslOP->GetSamplePosType())
+  if (ST == HlslOP->GetSamplePosType())
     return true;
-  if (ST == hlslOP->GetSplitDoubleType())
+  if (ST == HlslOP->GetSplitDoubleType())
     return true;
 
   unsigned EltNum = ST->getNumElements();
@@ -2110,14 +2123,14 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
   case 2:
     // Check if it's a native vector resret.
     if (EltTy->isVectorTy())
-      return ST == hlslOP->GetResRetType(EltTy);
+      return ST == HlslOP->GetResRetType(EltTy);
     LLVM_FALLTHROUGH;
   case 4:
   case 8: // 2 for doubles, 8 for halfs.
-    return ST == hlslOP->GetCBufferRetType(EltTy);
+    return ST == HlslOP->GetCBufferRetType(EltTy);
     break;
   case 5:
-    return ST == hlslOP->GetResRetType(EltTy);
+    return ST == HlslOP->GetResRetType(EltTy);
     break;
   default:
     return false;
@@ -2128,11 +2141,11 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
 // inner type (UDT struct member) may be: [N dim array of]( UDT struct | scalar
 // ) scalar type may be: ( float(16|32|64) | int(16|32|64) )
 static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
-                         bool bInner = false) {
+                         bool IsInner = false) {
   DXASSERT_NOMSG(Ty != nullptr);
   if (Ty->isPointerTy()) {
     Type *EltTy = Ty->getPointerElementType();
-    if (bInner || EltTy->isPointerTy()) {
+    if (IsInner || EltTy->isPointerTy()) {
       ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoPtrToPtr);
       return false;
     }
@@ -2140,7 +2153,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
   }
   if (Ty->isArrayTy()) {
     Type *EltTy = Ty->getArrayElementType();
-    if (!bInner && isa<ArrayType>(EltTy)) {
+    if (!IsInner && isa<ArrayType>(EltTy)) {
       // Outermost array should be converted to single-dim,
       // but arrays inside struct are allowed to be multi-dim
       ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoMultiDim);
@@ -2151,7 +2164,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
     Ty = EltTy;
   }
   if (Ty->isStructTy()) {
-    bool result = true;
+    bool Result = true;
     StructType *ST = cast<StructType>(Ty);
 
     StringRef Name = ST->getName();
@@ -2159,28 +2172,28 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
       // Allow handle type.
       if (ValCtx.HandleTy == Ty)
         return true;
-      hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP();
-      if (IsDxilBuiltinStructType(ST, hlslOP)) {
+      hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP();
+      if (IsDxilBuiltinStructType(ST, HlslOP)) {
         ValCtx.EmitTypeError(Ty, ValidationRule::InstrDxilStructUser);
-        result = false;
+        Result = false;
       }
 
       ValCtx.EmitTypeError(Ty, ValidationRule::DeclDxilNsReserved);
-      result = false;
+      Result = false;
     }
     for (auto e : ST->elements()) {
-      if (!ValidateType(e, ValCtx, /*bInner*/ true)) {
-        result = false;
+      if (!ValidateType(e, ValCtx, /*IsInner*/ true)) {
+        Result = false;
       }
     }
-    return result;
+    return Result;
   }
   if (Ty->isFloatTy() || Ty->isHalfTy() || Ty->isDoubleTy()) {
     return true;
   }
   if (Ty->isIntegerTy()) {
-    unsigned width = Ty->getIntegerBitWidth();
-    if (width != 1 && width != 8 && width != 16 && width != 32 && width != 64) {
+    unsigned Width = Ty->getIntegerBitWidth();
+    if (Width != 1 && Width != 8 && Width != 16 && Width != 32 && Width != 64) {
       ValCtx.EmitTypeError(Ty, ValidationRule::TypesIntWidth);
       return false;
     }
@@ -2203,13 +2216,13 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
 }
 
 static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD,
-                                unsigned index, uint64_t *pValue) {
-  *pValue = 0;
-  if (pMD->getNumOperands() < index) {
+                                unsigned Index, uint64_t *PValue) {
+  *PValue = 0;
+  if (pMD->getNumOperands() < Index) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
     return false;
   }
-  ConstantAsMetadata *C = dyn_cast<ConstantAsMetadata>(pMD->getOperand(index));
+  ConstantAsMetadata *C = dyn_cast<ConstantAsMetadata>(pMD->getOperand(Index));
   if (C == nullptr) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
     return false;
@@ -2219,7 +2232,7 @@ static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD,
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
     return false;
   }
-  *pValue = CI->getValue().getZExtValue();
+  *PValue = CI->getValue().getZExtValue();
   return true;
 }
 
@@ -2233,14 +2246,14 @@ static bool IsPrecise(Instruction &I, ValidationContext &ValCtx) {
     return false;
   }
 
-  uint64_t val;
-  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) {
+  uint64_t Val;
+  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) {
     return false;
   }
-  if (val == 1) {
+  if (Val == 1) {
     return true;
   }
-  if (val != 0) {
+  if (Val != 0) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange);
   }
   return false;
@@ -2259,12 +2272,12 @@ static bool IsValueMinPrec(DxilModule &DxilMod, Value *V) {
 }
 
 static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
-                                 CallInst *setMeshOutputCounts,
-                                 CallInst *getMeshPayload) {
+                                 CallInst *SetMeshOutputCounts,
+                                 CallInst *GetMeshPayload) {
   if (ValCtx.DxilMod.HasDxilFunctionProps(F)) {
-    DXIL::ShaderKind shaderKind =
+    DXIL::ShaderKind ShaderKind =
         ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
-    if (shaderKind != DXIL::ShaderKind::Mesh)
+    if (ShaderKind != DXIL::ShaderKind::Mesh)
       return;
   } else {
     return;
@@ -2273,10 +2286,10 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
   DominatorTreeAnalysis DTA;
   DominatorTree DT = DTA.run(*F);
 
-  for (auto b = F->begin(), bend = F->end(); b != bend; ++b) {
-    bool foundSetMeshOutputCountsInCurrentBB = false;
-    for (auto i = b->begin(), iend = b->end(); i != iend; ++i) {
-      llvm::Instruction &I = *i;
+  for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) {
+    bool FoundSetMeshOutputCountsInCurrentBb = false;
+    for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) {
+      llvm::Instruction &I = *It;
 
       // Calls to external functions.
       CallInst *CI = dyn_cast<CallInst>(&I);
@@ -2292,22 +2305,22 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
             continue;
           }
 
-          if (CI == setMeshOutputCounts) {
-            foundSetMeshOutputCountsInCurrentBB = true;
+          if (CI == SetMeshOutputCounts) {
+            FoundSetMeshOutputCountsInCurrentBb = true;
           }
-          Value *opcodeVal = CI->getOperand(0);
-          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(opcodeVal);
-          unsigned opcode = OpcodeConst->getLimitedValue();
-          DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
-
-          if (dxilOpcode == DXIL::OpCode::StoreVertexOutput ||
-              dxilOpcode == DXIL::OpCode::StorePrimitiveOutput ||
-              dxilOpcode == DXIL::OpCode::EmitIndices) {
-            if (setMeshOutputCounts == nullptr) {
+          Value *OpcodeVal = CI->getOperand(0);
+          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(OpcodeVal);
+          unsigned Opcode = OpcodeConst->getLimitedValue();
+          DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode;
+
+          if (DxilOpcode == DXIL::OpCode::StoreVertexOutput ||
+              DxilOpcode == DXIL::OpCode::StorePrimitiveOutput ||
+              DxilOpcode == DXIL::OpCode::EmitIndices) {
+            if (SetMeshOutputCounts == nullptr) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrMissingSetMeshOutputCounts);
-            } else if (!foundSetMeshOutputCountsInCurrentBB &&
-                       !DT.dominates(setMeshOutputCounts->getParent(),
+            } else if (!FoundSetMeshOutputCountsInCurrentBb &&
+                       !DT.dominates(SetMeshOutputCounts->getParent(),
                                      I.getParent())) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrNonDominatingSetMeshOutputCounts);
@@ -2318,61 +2331,61 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
     }
   }
 
-  if (getMeshPayload) {
-    PointerType *payloadPTy = cast<PointerType>(getMeshPayload->getType());
-    StructType *payloadTy =
-        cast<StructType>(payloadPTy->getPointerElementType());
+  if (GetMeshPayload) {
+    PointerType *PayloadPTy = cast<PointerType>(GetMeshPayload->getType());
+    StructType *PayloadTy =
+        cast<StructType>(PayloadPTy->getPointerElementType());
     const DataLayout &DL = F->getParent()->getDataLayout();
-    unsigned payloadSize = DL.getTypeAllocSize(payloadTy);
+    unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy);
 
-    DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
+    DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
 
-    if (prop.ShaderProps.MS.payloadSizeInBytes < payloadSize) {
+    if (Prop.ShaderProps.MS.payloadSizeInBytes < PayloadSize) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmMeshShaderPayloadSizeDeclared,
-          {F->getName(), std::to_string(payloadSize),
-           std::to_string(prop.ShaderProps.MS.payloadSizeInBytes)});
+          {F->getName(), std::to_string(PayloadSize),
+           std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes)});
     }
 
-    if (prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
+    if (Prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmMeshShaderPayloadSize,
-          {F->getName(), std::to_string(prop.ShaderProps.MS.payloadSizeInBytes),
+          {F->getName(), std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes),
            std::to_string(DXIL::kMaxMSASPayloadBytes)});
     }
   }
 }
 
 static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx,
-                                 CallInst *dispatchMesh) {
+                                 CallInst *DispatchMesh) {
   if (ValCtx.DxilMod.HasDxilFunctionProps(F)) {
-    DXIL::ShaderKind shaderKind =
+    DXIL::ShaderKind ShaderKind =
         ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
-    if (shaderKind != DXIL::ShaderKind::Amplification)
+    if (ShaderKind != DXIL::ShaderKind::Amplification)
       return;
 
-    if (dispatchMesh) {
-      DxilInst_DispatchMesh dispatchMeshCall(dispatchMesh);
-      Value *operandVal = dispatchMeshCall.get_payload();
-      Type *payloadTy = operandVal->getType();
+    if (DispatchMesh) {
+      DxilInst_DispatchMesh DispatchMeshCall(DispatchMesh);
+      Value *OperandVal = DispatchMeshCall.get_payload();
+      Type *PayloadTy = OperandVal->getType();
       const DataLayout &DL = F->getParent()->getDataLayout();
-      unsigned payloadSize = DL.getTypeAllocSize(payloadTy);
+      unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy);
 
-      DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
+      DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
 
-      if (prop.ShaderProps.AS.payloadSizeInBytes < payloadSize) {
+      if (Prop.ShaderProps.AS.payloadSizeInBytes < PayloadSize) {
         ValCtx.EmitInstrFormatError(
-            dispatchMesh,
+            DispatchMesh,
             ValidationRule::SmAmplificationShaderPayloadSizeDeclared,
-            {F->getName(), std::to_string(payloadSize),
-             std::to_string(prop.ShaderProps.AS.payloadSizeInBytes)});
+            {F->getName(), std::to_string(PayloadSize),
+             std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes)});
       }
 
-      if (prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
+      if (Prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
         ValCtx.EmitInstrFormatError(
-            dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
+            DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
             {F->getName(),
-             std::to_string(prop.ShaderProps.AS.payloadSizeInBytes),
+             std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes),
              std::to_string(DXIL::kMaxMSASPayloadBytes)});
       }
     }
@@ -2381,7 +2394,7 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx,
     return;
   }
 
-  if (dispatchMesh == nullptr) {
+  if (DispatchMesh == nullptr) {
     ValCtx.EmitFnError(F, ValidationRule::InstrNotOnceDispatchMesh);
     return;
   }
@@ -2389,30 +2402,30 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx,
   PostDominatorTree PDT;
   PDT.runOnFunction(*F);
 
-  if (!PDT.dominates(dispatchMesh->getParent(), &F->getEntryBlock())) {
-    ValCtx.EmitInstrError(dispatchMesh,
+  if (!PDT.dominates(DispatchMesh->getParent(), &F->getEntryBlock())) {
+    ValCtx.EmitInstrError(DispatchMesh,
                           ValidationRule::InstrNonDominatingDispatchMesh);
   }
 
-  Function *dispatchMeshFunc = dispatchMesh->getCalledFunction();
-  FunctionType *dispatchMeshFuncTy = dispatchMeshFunc->getFunctionType();
-  PointerType *payloadPTy =
-      cast<PointerType>(dispatchMeshFuncTy->getParamType(4));
-  StructType *payloadTy = cast<StructType>(payloadPTy->getPointerElementType());
+  Function *DispatchMeshFunc = DispatchMesh->getCalledFunction();
+  FunctionType *DispatchMeshFuncTy = DispatchMeshFunc->getFunctionType();
+  PointerType *PayloadPTy =
+      cast<PointerType>(DispatchMeshFuncTy->getParamType(4));
+  StructType *PayloadTy = cast<StructType>(PayloadPTy->getPointerElementType());
   const DataLayout &DL = F->getParent()->getDataLayout();
-  unsigned payloadSize = DL.getTypeAllocSize(payloadTy);
+  unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy);
 
-  if (payloadSize > DXIL::kMaxMSASPayloadBytes) {
+  if (PayloadSize > DXIL::kMaxMSASPayloadBytes) {
     ValCtx.EmitInstrFormatError(
-        dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
-        {F->getName(), std::to_string(payloadSize),
+        DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
+        {F->getName(), std::to_string(PayloadSize),
          std::to_string(DXIL::kMaxMSASPayloadBytes)});
   }
 }
 
-static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) {
+static void ValidateControlFlowHint(BasicBlock &BB, ValidationContext &ValCtx) {
   // Validate controlflow hint.
-  TerminatorInst *TI = bb.getTerminator();
+  TerminatorInst *TI = BB.getTerminator();
   if (!TI)
     return;
 
@@ -2423,33 +2436,33 @@ static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) {
   if (pNode->getNumOperands() < 3)
     return;
 
-  bool bHasBranch = false;
-  bool bHasFlatten = false;
-  bool bForceCase = false;
+  bool HasBranch = false;
+  bool HasFlatten = false;
+  bool ForceCase = false;
 
-  for (unsigned i = 2; i < pNode->getNumOperands(); i++) {
-    uint64_t value = 0;
-    if (GetNodeOperandAsInt(ValCtx, pNode, i, &value)) {
-      DXIL::ControlFlowHint hint = static_cast<DXIL::ControlFlowHint>(value);
-      switch (hint) {
+  for (unsigned I = 2; I < pNode->getNumOperands(); I++) {
+    uint64_t Value = 0;
+    if (GetNodeOperandAsInt(ValCtx, pNode, I, &Value)) {
+      DXIL::ControlFlowHint Hint = static_cast<DXIL::ControlFlowHint>(Value);
+      switch (Hint) {
       case DXIL::ControlFlowHint::Flatten:
-        bHasFlatten = true;
+        HasFlatten = true;
         break;
       case DXIL::ControlFlowHint::Branch:
-        bHasBranch = true;
+        HasBranch = true;
         break;
       case DXIL::ControlFlowHint::ForceCase:
-        bForceCase = true;
+        ForceCase = true;
         break;
       default:
         ValCtx.EmitMetaError(pNode, ValidationRule::MetaInvalidControlFlowHint);
       }
     }
   }
-  if (bHasBranch && bHasFlatten) {
+  if (HasBranch && HasFlatten) {
     ValCtx.EmitMetaError(pNode, ValidationRule::MetaBranchFlatten);
   }
-  if (bForceCase && !isa<SwitchInst>(TI)) {
+  if (ForceCase && !isa<SwitchInst>(TI)) {
     ValCtx.EmitMetaError(pNode, ValidationRule::MetaForceCaseOnSwitch);
   }
 }
@@ -2462,30 +2475,30 @@ static void ValidateTBAAMetadata(MDNode *Node, ValidationContext &ValCtx) {
     }
   } break;
   case 2: {
-    MDNode *rootNode = dyn_cast<MDNode>(Node->getOperand(1));
-    if (!rootNode) {
+    MDNode *RootNode = dyn_cast<MDNode>(Node->getOperand(1));
+    if (!RootNode) {
       ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
     } else {
-      ValidateTBAAMetadata(rootNode, ValCtx);
+      ValidateTBAAMetadata(RootNode, ValCtx);
     }
   } break;
   case 3: {
-    MDNode *rootNode = dyn_cast<MDNode>(Node->getOperand(1));
-    if (!rootNode) {
+    MDNode *RootNode = dyn_cast<MDNode>(Node->getOperand(1));
+    if (!RootNode) {
       ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
     } else {
-      ValidateTBAAMetadata(rootNode, ValCtx);
+      ValidateTBAAMetadata(RootNode, ValCtx);
     }
-    ConstantAsMetadata *pointsToConstMem =
+    ConstantAsMetadata *PointsToConstMem =
         dyn_cast<ConstantAsMetadata>(Node->getOperand(2));
-    if (!pointsToConstMem) {
+    if (!PointsToConstMem) {
       ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
     } else {
-      ConstantInt *isConst =
-          dyn_cast<ConstantInt>(pointsToConstMem->getValue());
-      if (!isConst) {
+      ConstantInt *IsConst =
+          dyn_cast<ConstantInt>(PointsToConstMem->getValue());
+      if (!IsConst) {
         ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
-      } else if (isConst->getValue().getLimitedValue() > 1) {
+      } else if (IsConst->getValue().getLimitedValue() > 1) {
         ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
       }
     }
@@ -2566,11 +2579,11 @@ static void ValidateNonUniformMetadata(Instruction &I, MDNode *pMD,
   if (pMD->getNumOperands() != 1) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
   }
-  uint64_t val;
-  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) {
+  uint64_t Val;
+  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
   }
-  if (val != 1) {
+  if (Val != 1) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange);
   }
 }
@@ -2605,31 +2618,31 @@ static void ValidateInstructionMetadata(Instruction *I,
 }
 
 static void ValidateFunctionAttribute(Function *F, ValidationContext &ValCtx) {
-  AttributeSet attrSet = F->getAttributes().getFnAttributes();
+  AttributeSet AttrSet = F->getAttributes().getFnAttributes();
   // fp32-denorm-mode
-  if (attrSet.hasAttribute(AttributeSet::FunctionIndex,
+  if (AttrSet.hasAttribute(AttributeSet::FunctionIndex,
                            DXIL::kFP32DenormKindString)) {
-    Attribute attr = attrSet.getAttribute(AttributeSet::FunctionIndex,
+    Attribute Attr = AttrSet.getAttribute(AttributeSet::FunctionIndex,
                                           DXIL::kFP32DenormKindString);
-    StringRef value = attr.getValueAsString();
-    if (!value.equals(DXIL::kFP32DenormValueAnyString) &&
-        !value.equals(DXIL::kFP32DenormValueFtzString) &&
-        !value.equals(DXIL::kFP32DenormValuePreserveString)) {
-      ValCtx.EmitFnAttributeError(F, attr.getKindAsString(),
-                                  attr.getValueAsString());
+    StringRef StrValue = Attr.getValueAsString();
+    if (!StrValue.equals(DXIL::kFP32DenormValueAnyString) &&
+        !StrValue.equals(DXIL::kFP32DenormValueFtzString) &&
+        !StrValue.equals(DXIL::kFP32DenormValuePreserveString)) {
+      ValCtx.EmitFnAttributeError(F, Attr.getKindAsString(),
+                                  Attr.getValueAsString());
     }
   }
   // TODO: If validating libraries, we should remove all unknown function
   // attributes. For each attribute, check if it is a known attribute
-  for (unsigned I = 0, E = attrSet.getNumSlots(); I != E; ++I) {
-    for (auto AttrIter = attrSet.begin(I), AttrEnd = attrSet.end(I);
+  for (unsigned I = 0, E = AttrSet.getNumSlots(); I != E; ++I) {
+    for (auto AttrIter = AttrSet.begin(I), AttrEnd = AttrSet.end(I);
          AttrIter != AttrEnd; ++AttrIter) {
       if (!AttrIter->isStringAttribute()) {
         continue;
       }
-      StringRef kind = AttrIter->getKindAsString();
-      if (!kind.equals(DXIL::kFP32DenormKindString) &&
-          !kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) {
+      StringRef Kind = AttrIter->getKindAsString();
+      if (!Kind.equals(DXIL::kFP32DenormKindString) &&
+          !Kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) {
         ValCtx.EmitFnAttributeError(F, AttrIter->getKindAsString(),
                                     AttrIter->getValueAsString());
       }
@@ -2679,10 +2692,10 @@ static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I,
                                                    ValidationContext &ValCtx) {
   if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
     return true;
-  unsigned OpCode = I.getOpcode();
-  if (OpCode == Instruction::InsertElement ||
-      OpCode == Instruction::ExtractElement ||
-      OpCode == Instruction::ShuffleVector)
+  unsigned Opcode = I.getOpcode();
+  if (Opcode == Instruction::InsertElement ||
+      Opcode == Instruction::ExtractElement ||
+      Opcode == Instruction::ShuffleVector)
     return false;
 
   return true;
@@ -2693,16 +2706,16 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
       ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision;
   bool SupportsLifetimeIntrinsics =
       ValCtx.DxilMod.GetShaderModel()->IsSM66Plus();
-  SmallVector<CallInst *, 16> gradientOps;
-  SmallVector<CallInst *, 16> barriers;
-  CallInst *setMeshOutputCounts = nullptr;
-  CallInst *getMeshPayload = nullptr;
-  CallInst *dispatchMesh = nullptr;
-  hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP();
+  SmallVector<CallInst *, 16> GradientOps;
+  SmallVector<CallInst *, 16> Barriers;
+  CallInst *SetMeshOutputCounts = nullptr;
+  CallInst *GetMeshPayload = nullptr;
+  CallInst *DispatchMesh = nullptr;
+  hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP();
 
-  for (auto b = F->begin(), bend = F->end(); b != bend; ++b) {
-    for (auto i = b->begin(), iend = b->end(); i != iend; ++i) {
-      llvm::Instruction &I = *i;
+  for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) {
+    for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) {
+      llvm::Instruction &I = *It;
 
       if (I.hasMetadata()) {
 
@@ -2710,7 +2723,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
       }
 
       // Instructions must be allowed.
-      if (!IsLLVMInstructionAllowed(I) || !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) {
+      if (!IsLLVMInstructionAllowed(I) ||
+          !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) {
         if (!IsLLVMInstructionAllowedForLib(I, ValCtx)) {
           ValCtx.EmitInstrError(&I, ValidationRule::InstrAllowed);
           continue;
@@ -2740,27 +2754,27 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
             continue;
           }
 
-          Value *opcodeVal = CI->getOperand(0);
-          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(opcodeVal);
+          Value *OpcodeVal = CI->getOperand(0);
+          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(OpcodeVal);
           if (OpcodeConst == nullptr) {
             ValCtx.EmitInstrFormatError(&I, ValidationRule::InstrOpConst,
                                         {"Opcode", "DXIL operation"});
             continue;
           }
 
-          unsigned opcode = OpcodeConst->getLimitedValue();
-          if (opcode >= static_cast<unsigned>(DXIL::OpCode::NumOpCodes)) {
+          unsigned Opcode = OpcodeConst->getLimitedValue();
+          if (Opcode >= static_cast<unsigned>(DXIL::OpCode::NumOpCodes)) {
             ValCtx.EmitInstrFormatError(
                 &I, ValidationRule::InstrIllegalDXILOpCode,
                 {std::to_string((unsigned)DXIL::OpCode::NumOpCodes),
-                 std::to_string(opcode)});
+                 std::to_string(Opcode)});
             continue;
           }
-          DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
+          DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode;
 
           bool IllegalOpFunc = true;
-          for (auto &it : hlslOP->GetOpFuncList(dxilOpcode)) {
-            if (it.second == FCalled) {
+          for (auto &It : HlslOP->GetOpFuncList(DxilOpcode)) {
+            if (It.second == FCalled) {
               IllegalOpFunc = false;
               break;
             }
@@ -2769,46 +2783,46 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
           if (IllegalOpFunc) {
             ValCtx.EmitInstrFormatError(
                 &I, ValidationRule::InstrIllegalDXILOpFunction,
-                {FCalled->getName(), OP::GetOpCodeName(dxilOpcode)});
+                {FCalled->getName(), OP::GetOpCodeName(DxilOpcode)});
             continue;
           }
 
-          if (OP::IsDxilOpGradient(dxilOpcode)) {
-            gradientOps.push_back(CI);
+          if (OP::IsDxilOpGradient(DxilOpcode)) {
+            GradientOps.push_back(CI);
           }
 
-          if (dxilOpcode == DXIL::OpCode::Barrier) {
-            barriers.push_back(CI);
+          if (DxilOpcode == DXIL::OpCode::Barrier) {
+            Barriers.push_back(CI);
           }
           // External function validation will check the parameter
           // list. This function will check that the call does not
           // violate any rules.
 
-          if (dxilOpcode == DXIL::OpCode::SetMeshOutputCounts) {
+          if (DxilOpcode == DXIL::OpCode::SetMeshOutputCounts) {
             // validate the call count of SetMeshOutputCounts
-            if (setMeshOutputCounts != nullptr) {
+            if (SetMeshOutputCounts != nullptr) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrMultipleSetMeshOutputCounts);
             }
-            setMeshOutputCounts = CI;
+            SetMeshOutputCounts = CI;
           }
 
-          if (dxilOpcode == DXIL::OpCode::GetMeshPayload) {
+          if (DxilOpcode == DXIL::OpCode::GetMeshPayload) {
             // validate the call count of GetMeshPayload
-            if (getMeshPayload != nullptr) {
+            if (GetMeshPayload != nullptr) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrMultipleGetMeshPayload);
             }
-            getMeshPayload = CI;
+            GetMeshPayload = CI;
           }
 
-          if (dxilOpcode == DXIL::OpCode::DispatchMesh) {
+          if (DxilOpcode == DXIL::OpCode::DispatchMesh) {
             // validate the call count of DispatchMesh
-            if (dispatchMesh != nullptr) {
+            if (DispatchMesh != nullptr) {
               ValCtx.EmitInstrError(&I,
                                     ValidationRule::InstrNotOnceDispatchMesh);
             }
-            dispatchMesh = CI;
+            DispatchMesh = CI;
           }
         }
         continue;
@@ -2816,23 +2830,23 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
 
       for (Value *op : I.operands()) {
         if (isa<UndefValue>(op)) {
-          bool legalUndef = isa<PHINode>(&I);
+          bool LegalUndef = isa<PHINode>(&I);
           if (isa<InsertElementInst>(&I)) {
-            legalUndef = op == I.getOperand(0);
+            LegalUndef = op == I.getOperand(0);
           }
           if (isa<ShuffleVectorInst>(&I)) {
-            legalUndef = op == I.getOperand(1);
+            LegalUndef = op == I.getOperand(1);
           }
           if (isa<StoreInst>(&I)) {
-            legalUndef = op == I.getOperand(0);
+            LegalUndef = op == I.getOperand(0);
           }
 
-          if (!legalUndef)
+          if (!LegalUndef)
             ValCtx.EmitInstrError(&I,
                                   ValidationRule::InstrNoReadingUninitialized);
         } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(op)) {
-          for (Value *opCE : CE->operands()) {
-            if (isa<UndefValue>(opCE)) {
+          for (Value *OpCE : CE->operands()) {
+            if (isa<UndefValue>(OpCE)) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrNoReadingUninitialized);
             }
@@ -2862,8 +2876,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
         }
       }
 
-      unsigned opcode = I.getOpcode();
-      switch (opcode) {
+      unsigned Opcode = I.getOpcode();
+      switch (Opcode) {
       case Instruction::Alloca: {
         AllocaInst *AI = cast<AllocaInst>(&I);
         // TODO: validate address space and alignment
@@ -2904,26 +2918,26 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
           continue;
         }
         GetElementPtrInst *GEP = cast<GetElementPtrInst>(&I);
-        bool allImmIndex = true;
+        bool AllImmIndex = true;
         for (auto Idx = GEP->idx_begin(), E = GEP->idx_end(); Idx != E; Idx++) {
           if (!isa<ConstantInt>(Idx)) {
-            allImmIndex = false;
+            AllImmIndex = false;
             break;
           }
         }
-        if (allImmIndex) {
+        if (AllImmIndex) {
           const DataLayout &DL = ValCtx.DL;
 
           Value *Ptr = GEP->getPointerOperand();
-          unsigned size =
+          unsigned Size =
               DL.getTypeAllocSize(Ptr->getType()->getPointerElementType());
-          unsigned valSize =
+          unsigned ValSize =
               DL.getTypeAllocSize(GEP->getType()->getPointerElementType());
 
           SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
-          unsigned offset =
+          unsigned Offset =
               DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
-          if ((offset + valSize) > size) {
+          if ((Offset + ValSize) > Size) {
             ValCtx.EmitInstrError(GEP, ValidationRule::InstrInBoundsAccess);
           }
         }
@@ -2997,16 +3011,16 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
       case Instruction::AtomicCmpXchg:
       case Instruction::AtomicRMW: {
         Value *Ptr = I.getOperand(AtomicRMWInst::getPointerOperandIndex());
-        PointerType *ptrType = cast<PointerType>(Ptr->getType());
-        Type *elType = ptrType->getElementType();
+        PointerType *PtrType = cast<PointerType>(Ptr->getType());
+        Type *ElType = PtrType->getElementType();
         const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
-        if ((elType->isIntegerTy(64)) && !pSM->IsSM66Plus())
+        if ((ElType->isIntegerTy(64)) && !pSM->IsSM66Plus())
           ValCtx.EmitInstrFormatError(
               &I, ValidationRule::SmOpcodeInInvalidFunction,
               {"64-bit atomic operations", "Shader Model 6.6+"});
 
-        if (ptrType->getAddressSpace() != DXIL::kTGSMAddrSpace &&
-            ptrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace)
+        if (PtrType->getAddressSpace() != DXIL::kTGSMAddrSpace &&
+            PtrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace)
           ValCtx.EmitInstrError(
               &I, ValidationRule::InstrAtomicOpNonGroupsharedOrRecord);
 
@@ -3057,12 +3071,12 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
         }
       }
     }
-    ValidateControlFlowHint(*b, ValCtx);
+    ValidateControlFlowHint(*B, ValCtx);
   }
 
-  ValidateMsIntrinsics(F, ValCtx, setMeshOutputCounts, getMeshPayload);
+  ValidateMsIntrinsics(F, ValCtx, SetMeshOutputCounts, GetMeshPayload);
 
-  ValidateAsIntrinsics(F, ValCtx, dispatchMesh);
+  ValidateAsIntrinsics(F, ValCtx, DispatchMesh);
 }
 
 static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) {
@@ -3070,39 +3084,39 @@ static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) {
   // to do here
   if (!ValCtx.DxilMod.HasDxilFunctionProps(F))
     return;
-  auto &props = ValCtx.DxilMod.GetDxilFunctionProps(F);
-  if (!props.IsNode())
+  auto &Props = ValCtx.DxilMod.GetDxilFunctionProps(F);
+  if (!Props.IsNode())
     return;
-  if (props.InputNodes.size() > 1) {
+  if (Props.InputNodes.size() > 1) {
     ValCtx.EmitFnFormatError(
         F, ValidationRule::DeclMultipleNodeInputs,
-        {F->getName(), std::to_string(props.InputNodes.size())});
+        {F->getName(), std::to_string(Props.InputNodes.size())});
   }
-  for (auto &input : props.InputNodes) {
-    if (!input.Flags.RecordTypeMatchesLaunchType(props.Node.LaunchType)) {
+  for (auto &input : Props.InputNodes) {
+    if (!input.Flags.RecordTypeMatchesLaunchType(Props.Node.LaunchType)) {
       // We allow EmptyNodeInput here, as that may have been added implicitly
       // if there was no input specified
       if (input.Flags.IsEmptyInput())
         continue;
 
-      llvm::StringRef validInputs = "";
-      switch (props.Node.LaunchType) {
+      llvm::StringRef ValidInputs = "";
+      switch (Props.Node.LaunchType) {
       case DXIL::NodeLaunchType::Broadcasting:
-        validInputs = "{RW}DispatchNodeInputRecord";
+        ValidInputs = "{RW}DispatchNodeInputRecord";
         break;
       case DXIL::NodeLaunchType::Coalescing:
-        validInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput";
+        ValidInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput";
         break;
       case DXIL::NodeLaunchType::Thread:
-        validInputs = "{RW}ThreadNodeInputRecord";
+        ValidInputs = "{RW}ThreadNodeInputRecord";
         break;
       default:
         llvm_unreachable("invalid launch type");
       }
       ValCtx.EmitFnFormatError(
           F, ValidationRule::DeclNodeLaunchInputType,
-          {ShaderModel::GetNodeLaunchTypeName(props.Node.LaunchType),
-           F->getName(), validInputs});
+          {ShaderModel::GetNodeLaunchTypeName(Props.Node.LaunchType),
+           F->getName(), ValidInputs});
     }
   }
 }
@@ -3113,26 +3127,26 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
     if (F.isIntrinsic() || IsDxilFunction(&F))
       return;
   } else {
-    DXIL::ShaderKind shaderKind = DXIL::ShaderKind::Library;
-    bool isShader = ValCtx.DxilMod.HasDxilFunctionProps(&F);
-    unsigned numUDTShaderArgs = 0;
-    if (isShader) {
-      shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind;
-      switch (shaderKind) {
+    DXIL::ShaderKind ShaderKind = DXIL::ShaderKind::Library;
+    bool IsShader = ValCtx.DxilMod.HasDxilFunctionProps(&F);
+    unsigned NumUDTShaderArgs = 0;
+    if (IsShader) {
+      ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind;
+      switch (ShaderKind) {
       case DXIL::ShaderKind::AnyHit:
       case DXIL::ShaderKind::ClosestHit:
-        numUDTShaderArgs = 2;
+        NumUDTShaderArgs = 2;
         break;
       case DXIL::ShaderKind::Miss:
       case DXIL::ShaderKind::Callable:
-        numUDTShaderArgs = 1;
+        NumUDTShaderArgs = 1;
         break;
       case DXIL::ShaderKind::Compute: {
         DxilModule &DM = ValCtx.DxilMod;
         if (DM.HasDxilEntryProps(&F)) {
-          DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F);
+          DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F);
           // Check that compute has no node metadata
-          if (entryProps.props.IsNode()) {
+          if (EntryProps.props.IsNode()) {
             ValCtx.EmitFnFormatError(&F, ValidationRule::MetaComputeWithNode,
                                      {F.getName()});
           }
@@ -3143,45 +3157,45 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
         break;
       }
     } else {
-      isShader = ValCtx.DxilMod.IsPatchConstantShader(&F);
+      IsShader = ValCtx.DxilMod.IsPatchConstantShader(&F);
     }
 
     // Entry function should not have parameter.
-    if (isShader && 0 == numUDTShaderArgs && !F.arg_empty())
+    if (IsShader && 0 == NumUDTShaderArgs && !F.arg_empty())
       ValCtx.EmitFnFormatError(&F, ValidationRule::FlowFunctionCall,
                                {F.getName()});
 
     // Shader functions should return void.
-    if (isShader && !F.getReturnType()->isVoidTy())
+    if (IsShader && !F.getReturnType()->isVoidTy())
       ValCtx.EmitFnFormatError(&F, ValidationRule::DeclShaderReturnVoid,
                                {F.getName()});
 
-    auto ArgFormatError = [&](Function &F, Argument &arg, ValidationRule rule) {
-      if (arg.hasName())
-        ValCtx.EmitFnFormatError(&F, rule, {arg.getName().str(), F.getName()});
+    auto ArgFormatError = [&](Function &F, Argument &Arg, ValidationRule Rule) {
+      if (Arg.hasName())
+        ValCtx.EmitFnFormatError(&F, Rule, {Arg.getName().str(), F.getName()});
       else
-        ValCtx.EmitFnFormatError(&F, rule,
-                                 {std::to_string(arg.getArgNo()), F.getName()});
+        ValCtx.EmitFnFormatError(&F, Rule,
+                                 {std::to_string(Arg.getArgNo()), F.getName()});
     };
 
-    unsigned numArgs = 0;
-    for (auto &arg : F.args()) {
-      Type *argTy = arg.getType();
-      if (argTy->isPointerTy())
-        argTy = argTy->getPointerElementType();
-
-      numArgs++;
-      if (numUDTShaderArgs) {
-        if (arg.getArgNo() >= numUDTShaderArgs) {
-          ArgFormatError(F, arg, ValidationRule::DeclExtraArgs);
-        } else if (!argTy->isStructTy()) {
-          switch (shaderKind) {
+    unsigned NumArgs = 0;
+    for (auto &Arg : F.args()) {
+      Type *ArgTy = Arg.getType();
+      if (ArgTy->isPointerTy())
+        ArgTy = ArgTy->getPointerElementType();
+
+      NumArgs++;
+      if (NumUDTShaderArgs) {
+        if (Arg.getArgNo() >= NumUDTShaderArgs) {
+          ArgFormatError(F, Arg, ValidationRule::DeclExtraArgs);
+        } else if (!ArgTy->isStructTy()) {
+          switch (ShaderKind) {
           case DXIL::ShaderKind::Callable:
-            ArgFormatError(F, arg, ValidationRule::DeclParamStruct);
+            ArgFormatError(F, Arg, ValidationRule::DeclParamStruct);
             break;
           default:
-            ArgFormatError(F, arg,
-                           arg.getArgNo() == 0
+            ArgFormatError(F, Arg,
+                           Arg.getArgNo() == 0
                                ? ValidationRule::DeclPayloadStruct
                                : ValidationRule::DeclAttrStruct);
           }
@@ -3189,24 +3203,24 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
         continue;
       }
 
-      while (argTy->isArrayTy()) {
-        argTy = argTy->getArrayElementType();
+      while (ArgTy->isArrayTy()) {
+        ArgTy = ArgTy->getArrayElementType();
       }
 
-      if (argTy->isStructTy() && !ValCtx.isLibProfile) {
-        ArgFormatError(F, arg, ValidationRule::DeclFnFlattenParam);
+      if (ArgTy->isStructTy() && !ValCtx.isLibProfile) {
+        ArgFormatError(F, Arg, ValidationRule::DeclFnFlattenParam);
         break;
       }
     }
 
-    if (numArgs < numUDTShaderArgs && shaderKind != DXIL::ShaderKind::Node) {
-      StringRef argType[2] = {
-          shaderKind == DXIL::ShaderKind::Callable ? "params" : "payload",
+    if (NumArgs < NumUDTShaderArgs && ShaderKind != DXIL::ShaderKind::Node) {
+      StringRef ArgType[2] = {
+          ShaderKind == DXIL::ShaderKind::Callable ? "params" : "payload",
           "attributes"};
-      for (unsigned i = numArgs; i < numUDTShaderArgs; i++) {
+      for (unsigned I = NumArgs; I < NumUDTShaderArgs; I++) {
         ValCtx.EmitFnFormatError(
             &F, ValidationRule::DeclShaderMissingArg,
-            {ShaderModel::GetKindName(shaderKind), F.getName(), argType[i]});
+            {ShaderModel::GetKindName(ShaderKind), F.getName(), ArgType[I]});
       }
     }
 
@@ -3243,25 +3257,25 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
 
 static void ValidateGlobalVariable(GlobalVariable &GV,
                                    ValidationContext &ValCtx) {
-  bool isInternalGV =
+  bool IsInternalGv =
       dxilutil::IsStaticGlobal(&GV) || dxilutil::IsSharedMemoryGlobal(&GV);
 
   if (ValCtx.isLibProfile) {
-    auto isCBufferGlobal =
+    auto IsCBufferGlobal =
         [&](const std::vector<std::unique_ptr<DxilCBuffer>> &ResTab) -> bool {
       for (auto &Res : ResTab)
         if (Res->GetGlobalSymbol() == &GV)
           return true;
       return false;
     };
-    auto isResourceGlobal =
+    auto IsResourceGlobal =
         [&](const std::vector<std::unique_ptr<DxilResource>> &ResTab) -> bool {
       for (auto &Res : ResTab)
         if (Res->GetGlobalSymbol() == &GV)
           return true;
       return false;
     };
-    auto isSamplerGlobal =
+    auto IsSamplerGlobal =
         [&](const std::vector<std::unique_ptr<DxilSampler>> &ResTab) -> bool {
       for (auto &Res : ResTab)
         if (Res->GetGlobalSymbol() == &GV)
@@ -3269,32 +3283,32 @@ static void ValidateGlobalVariable(GlobalVariable &GV,
       return false;
     };
 
-    bool isRes = isCBufferGlobal(ValCtx.DxilMod.GetCBuffers());
-    isRes |= isResourceGlobal(ValCtx.DxilMod.GetUAVs());
-    isRes |= isResourceGlobal(ValCtx.DxilMod.GetSRVs());
-    isRes |= isSamplerGlobal(ValCtx.DxilMod.GetSamplers());
-    isInternalGV |= isRes;
+    bool IsRes = IsCBufferGlobal(ValCtx.DxilMod.GetCBuffers());
+    IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetUAVs());
+    IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetSRVs());
+    IsRes |= IsSamplerGlobal(ValCtx.DxilMod.GetSamplers());
+    IsInternalGv |= IsRes;
 
     // Allow special dx.ishelper for library target
     if (GV.getName().compare(DXIL::kDxIsHelperGlobalName) == 0) {
       Type *Ty = GV.getType()->getPointerElementType();
       if (Ty->isIntegerTy() && Ty->getScalarSizeInBits() == 32) {
-        isInternalGV = true;
+        IsInternalGv = true;
       }
     }
   }
 
-  if (!isInternalGV) {
+  if (!IsInternalGv) {
     if (!GV.user_empty()) {
-      bool hasInstructionUser = false;
+      bool HasInstructionUser = false;
       for (User *U : GV.users()) {
         if (isa<Instruction>(U)) {
-          hasInstructionUser = true;
+          HasInstructionUser = true;
           break;
         }
       }
       // External GV should not have instruction user.
-      if (hasInstructionUser) {
+      if (HasInstructionUser) {
         ValCtx.EmitGlobalVariableFormatError(
             &GV, ValidationRule::DeclNotUsedExternal, {GV.getName()});
       }
@@ -3317,14 +3331,14 @@ static void ValidateGlobalVariable(GlobalVariable &GV,
 }
 
 static void CollectFixAddressAccess(Value *V,
-                                    std::vector<StoreInst *> &fixAddrTGSMList) {
+                                    std::vector<StoreInst *> &FixAddrTGSMList) {
   for (User *U : V->users()) {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
       if (isa<ConstantExpr>(GEP) || GEP->hasAllConstantIndices()) {
-        CollectFixAddressAccess(GEP, fixAddrTGSMList);
+        CollectFixAddressAccess(GEP, FixAddrTGSMList);
       }
     } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      fixAddrTGSMList.emplace_back(SI);
+      FixAddrTGSMList.emplace_back(SI);
     }
   }
 }
@@ -3334,16 +3348,16 @@ static bool IsDivergent(Value *V) {
   return false;
 }
 
-static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &fixAddrTGSMList,
+static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &FixAddrTGSMList,
                                       ValidationContext &ValCtx) {
-  std::unordered_set<Function *> fixAddrTGSMFuncSet;
-  for (StoreInst *I : fixAddrTGSMList) {
+  std::unordered_set<Function *> FixAddrTGSMFuncSet;
+  for (StoreInst *I : FixAddrTGSMList) {
     BasicBlock *BB = I->getParent();
-    fixAddrTGSMFuncSet.insert(BB->getParent());
+    FixAddrTGSMFuncSet.insert(BB->getParent());
   }
 
   for (auto &F : ValCtx.DxilMod.GetModule()->functions()) {
-    if (F.isDeclaration() || !fixAddrTGSMFuncSet.count(&F))
+    if (F.isDeclaration() || !FixAddrTGSMFuncSet.count(&F))
       continue;
 
     PostDominatorTree PDT;
@@ -3351,7 +3365,7 @@ static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &fixAddrTGSMList,
 
     BasicBlock *Entry = &F.getEntryBlock();
 
-    for (StoreInst *SI : fixAddrTGSMList) {
+    for (StoreInst *SI : FixAddrTGSMList) {
       BasicBlock *BB = SI->getParent();
       if (BB->getParent() == &F) {
         if (PDT.dominates(BB, Entry)) {
@@ -3370,7 +3384,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
   bool TGSMAllowed = pSM->IsCS() || pSM->IsAS() || pSM->IsMS() || pSM->IsLib();
 
   unsigned TGSMSize = 0;
-  std::vector<StoreInst *> fixAddrTGSMList;
+  std::vector<StoreInst *> FixAddrTGSMList;
   const DataLayout &DL = M.GetModule()->getDataLayout();
   for (GlobalVariable &GV : M.GetModule()->globals()) {
     ValidateGlobalVariable(GV, ValCtx);
@@ -3385,9 +3399,9 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
           if (Instruction *I = dyn_cast<Instruction>(U)) {
             llvm::Function *F = I->getParent()->getParent();
             if (M.HasDxilEntryProps(F)) {
-              DxilFunctionProps &props = M.GetDxilEntryProps(F).props;
-              if (!props.IsCS() && !props.IsAS() && !props.IsMS() &&
-                  !props.IsNode()) {
+              DxilFunctionProps &Props = M.GetDxilEntryProps(F).props;
+              if (!Props.IsCS() && !Props.IsAS() && !Props.IsMS() &&
+                  !Props.IsNode()) {
                 ValCtx.EmitInstrFormatError(I,
                                             ValidationRule::SmTGSMUnsupported,
                                             {"from non-compute entry points"});
@@ -3397,7 +3411,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
         }
       }
       TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType());
-      CollectFixAddressAccess(&GV, fixAddrTGSMList);
+      CollectFixAddressAccess(&GV, FixAddrTGSMList);
     }
   }
 
@@ -3421,8 +3435,8 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
         GV, Rule, {std::to_string(TGSMSize), std::to_string(MaxSize)});
   }
 
-  if (!fixAddrTGSMList.empty()) {
-    ValidateTGSMRaceCondition(fixAddrTGSMList, ValCtx);
+  if (!FixAddrTGSMList.empty()) {
+    ValidateTGSMRaceCondition(FixAddrTGSMList, ValCtx);
   }
 }
 
@@ -3435,20 +3449,20 @@ static void ValidateValidatorVersion(ValidationContext &ValCtx) {
   if (pNode->getNumOperands() == 1) {
     MDTuple *pVerValues = dyn_cast<MDTuple>(pNode->getOperand(0));
     if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) {
-      uint64_t majorVer, minorVer;
-      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) &&
-          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
-        unsigned curMajor, curMinor;
-        GetValidationVersion(&curMajor, &curMinor);
+      uint64_t MajorVer, MinorVer;
+      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) &&
+          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) {
+        unsigned CurMajor, CurMinor;
+        GetValidationVersion(&CurMajor, &CurMinor);
         // This will need to be updated as major/minor versions evolve,
         // depending on the degree of compat across versions.
-        if (majorVer == curMajor && minorVer <= curMinor) {
+        if (MajorVer == CurMajor && MinorVer <= CurMinor) {
           return;
         } else {
           ValCtx.EmitFormatError(
               ValidationRule::MetaVersionSupported,
-              {"Validator", std::to_string(majorVer), std::to_string(minorVer),
-               std::to_string(curMajor), std::to_string(curMinor)});
+              {"Validator", std::to_string(MajorVer), std::to_string(MinorVer),
+               std::to_string(CurMajor), std::to_string(CurMinor)});
           return;
         }
       }
@@ -3466,19 +3480,19 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) {
   if (pNode->getNumOperands() == 1) {
     MDTuple *pVerValues = dyn_cast<MDTuple>(pNode->getOperand(0));
     if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) {
-      uint64_t majorVer, minorVer;
-      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) &&
-          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
+      uint64_t MajorVer, MinorVer;
+      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) &&
+          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) {
         // This will need to be updated as dxil major/minor versions evolve,
         // depending on the degree of compat across versions.
-        if ((majorVer == DXIL::kDxilMajor && minorVer <= DXIL::kDxilMinor) &&
-            (majorVer == ValCtx.m_DxilMajor &&
-             minorVer == ValCtx.m_DxilMinor)) {
+        if ((MajorVer == DXIL::kDxilMajor && MinorVer <= DXIL::kDxilMinor) &&
+            (MajorVer == ValCtx.m_DxilMajor &&
+             MinorVer == ValCtx.m_DxilMinor)) {
           return;
         } else {
           ValCtx.EmitFormatError(ValidationRule::MetaVersionSupported,
-                                 {"Dxil", std::to_string(majorVer),
-                                  std::to_string(minorVer),
+                                 {"Dxil", std::to_string(MajorVer),
+                                  std::to_string(MinorVer),
                                   std::to_string(DXIL::kDxilMajor),
                                   std::to_string(DXIL::kDxilMinor)});
           return;
@@ -3496,16 +3510,16 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) {
     NamedMDNode *TA = pModule->getNamedMetadata("dx.typeAnnotations");
     if (TA == nullptr)
       return;
-    for (unsigned i = 0, end = TA->getNumOperands(); i < end; ++i) {
-      MDTuple *TANode = dyn_cast<MDTuple>(TA->getOperand(i));
+    for (unsigned I = 0, End = TA->getNumOperands(); I < End; ++I) {
+      MDTuple *TANode = dyn_cast<MDTuple>(TA->getOperand(I));
       if (TANode->getNumOperands() < 3) {
         ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed);
         return;
       }
-      ConstantInt *tag = mdconst::extract<ConstantInt>(TANode->getOperand(0));
-      uint64_t tagValue = tag->getZExtValue();
-      if (tagValue != DxilMDHelper::kDxilTypeSystemStructTag &&
-          tagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) {
+      ConstantInt *Tag = mdconst::extract<ConstantInt>(TANode->getOperand(0));
+      uint64_t TagValue = Tag->getZExtValue();
+      if (TagValue != DxilMDHelper::kDxilTypeSystemStructTag &&
+          TagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) {
         ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed);
         return;
       }
@@ -3514,11 +3528,11 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) {
 }
 
 static void ValidateBitcode(ValidationContext &ValCtx) {
-  std::string diagStr;
-  raw_string_ostream diagStream(diagStr);
-  if (llvm::verifyModule(ValCtx.M, &diagStream)) {
+  std::string DiagStr;
+  raw_string_ostream DiagStream(DiagStr);
+  if (llvm::verifyModule(ValCtx.M, &DiagStream)) {
     ValCtx.EmitError(ValidationRule::BitcodeValid);
-    dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), diagStream.str());
+    dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), DiagStream.str());
   }
 }
 
@@ -3532,18 +3546,18 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
   if (!EPs)
     return;
 
-  for (unsigned i = 0, end = EPs->getNumOperands(); i < end; ++i) {
-    MDTuple *EPNodeRef = dyn_cast<MDTuple>(EPs->getOperand(i));
+  for (unsigned I = 0, End = EPs->getNumOperands(); I < End; ++I) {
+    MDTuple *EPNodeRef = dyn_cast<MDTuple>(EPs->getOperand(I));
     if (EPNodeRef->getNumOperands() < 5) {
       ValCtx.EmitMetaError(EPNodeRef, ValidationRule::MetaWellFormed);
       return;
     }
     // get access to the digit that represents the metadata number that
     // would store entry properties
-    const llvm::MDOperand &mOp =
+    const llvm::MDOperand &MOp =
         EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1);
     // the final operand to the entry points tuple should be a tuple.
-    if (mOp == nullptr || (mOp.get())->getMetadataID() != Metadata::MDTupleKind)
+    if (MOp == nullptr || (MOp.get())->getMetadataID() != Metadata::MDTupleKind)
       continue;
 
     // get access to the node that stores entry properties
@@ -3551,29 +3565,29 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
         EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1));
     // find any incompatible tags inside the entry properties
     // increment j by 2 to only analyze tags, not values
-    bool foundTag = false;
-    for (unsigned j = 0, end2 = EPropNode->getNumOperands(); j < end2; j += 2) {
-      const MDOperand &propertyTagOp = EPropNode->getOperand(j);
+    bool FoundTag = false;
+    for (unsigned J = 0, End2 = EPropNode->getNumOperands(); J < End2; J += 2) {
+      const MDOperand &PropertyTagOp = EPropNode->getOperand(J);
       // note, we are only looking for tags, which will be a constant
       // integer
-      DXASSERT(!(propertyTagOp == nullptr ||
-                 (propertyTagOp.get())->getMetadataID() !=
+      DXASSERT(!(PropertyTagOp == nullptr ||
+                 (PropertyTagOp.get())->getMetadataID() !=
                      Metadata::ConstantAsMetadataKind),
                "tag operand should be a constant integer.");
 
-      ConstantInt *tag = mdconst::extract<ConstantInt>(propertyTagOp);
-      uint64_t tagValue = tag->getZExtValue();
+      ConstantInt *Tag = mdconst::extract<ConstantInt>(PropertyTagOp);
+      uint64_t TagValue = Tag->getZExtValue();
 
       // legacy wavesize is only supported between 6.6 and 6.7, so we
       // should fail if we find the ranged wave size metadata tag
-      if (tagValue == DxilMDHelper::kDxilRangedWaveSizeTag) {
+      if (TagValue == DxilMDHelper::kDxilRangedWaveSizeTag) {
         // if this tag is already present in the
         // current entry point, emit an error
-        if (foundTag) {
+        if (FoundTag) {
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {});
           return;
         }
-        foundTag = true;
+        FoundTag = true;
         if (SM->IsSM66Plus() && !SM->IsSM68Plus()) {
 
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeRangeNeedsSM68Plus,
@@ -3582,36 +3596,36 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
         }
         // get the metadata that contains the
         // parameters to the wavesize attribute
-        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(j + 1));
+        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(J + 1));
         if (WaveTuple->getNumOperands() != 3) {
           ValCtx.EmitFormatError(
               ValidationRule::SmWaveSizeRangeExpectsThreeParams, {});
           return;
         }
-        for (int k = 0; k < 3; k++) {
-          const MDOperand &param = WaveTuple->getOperand(k);
-          if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
+        for (int K = 0; K < 3; K++) {
+          const MDOperand &Param = WaveTuple->getOperand(K);
+          if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
             ValCtx.EmitFormatError(
                 ValidationRule::SmWaveSizeNeedsConstantOperands, {});
             return;
           }
         }
 
-      } else if (tagValue == DxilMDHelper::kDxilWaveSizeTag) {
+      } else if (TagValue == DxilMDHelper::kDxilWaveSizeTag) {
         // if this tag is already present in the
         // current entry point, emit an error
-        if (foundTag) {
+        if (FoundTag) {
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {});
           return;
         }
-        foundTag = true;
-        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(j + 1));
+        FoundTag = true;
+        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(J + 1));
         if (WaveTuple->getNumOperands() != 1) {
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeExpectsOneParam, {});
           return;
         }
-        const MDOperand &param = WaveTuple->getOperand(0);
-        if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
+        const MDOperand &Param = WaveTuple->getOperand(0);
+        if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
           ValCtx.EmitFormatError(
               ValidationRule::SmWaveSizeNeedsConstantOperands, {});
           return;
@@ -3632,9 +3646,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
   ValidateDxilVersion(ValCtx);
 
   Module *pModule = &ValCtx.M;
-  const std::string &target = pModule->getTargetTriple();
-  if (target != "dxil-ms-dx") {
-    ValCtx.EmitFormatError(ValidationRule::MetaTarget, {target});
+  const std::string &Target = pModule->getTargetTriple();
+  if (Target != "dxil-ms-dx") {
+    ValCtx.EmitFormatError(ValidationRule::MetaTarget, {Target});
   }
 
   // The llvm.dbg.(cu/contents/defines/mainFileName/arg) named metadata nodes
@@ -3642,9 +3656,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
   // llvm.bitsets is also disallowed.
   //
   // These are verified in lib/IR/Verifier.cpp.
-  StringMap<bool> llvmNamedMeta;
-  llvmNamedMeta["llvm.ident"];
-  llvmNamedMeta["llvm.module.flags"];
+  StringMap<bool> LlvmNamedMeta;
+  LlvmNamedMeta["llvm.ident"];
+  LlvmNamedMeta["llvm.module.flags"];
 
   for (auto &NamedMetaNode : pModule->named_metadata()) {
     if (!DxilModule::IsKnownNamedMetaData(NamedMetaNode)) {
@@ -3652,7 +3666,7 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
       if (!name.startswith_lower("llvm.")) {
         ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()});
       } else {
-        if (llvmNamedMeta.count(name) == 0) {
+        if (LlvmNamedMeta.count(name) == 0) {
           ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()});
         }
       }
@@ -3685,35 +3699,35 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
 }
 
 static void ValidateResourceOverlap(
-    hlsl::DxilResourceBase &res,
-    SpacesAllocator<unsigned, DxilResourceBase> &spaceAllocator,
+    hlsl::DxilResourceBase &Res,
+    SpacesAllocator<unsigned, DxilResourceBase> &SpaceAllocator,
     ValidationContext &ValCtx) {
-  unsigned base = res.GetLowerBound();
-  if (ValCtx.isLibProfile && !res.IsAllocated()) {
+  unsigned Base = Res.GetLowerBound();
+  if (ValCtx.isLibProfile && !Res.IsAllocated()) {
     // Skip unallocated resource for library.
     return;
   }
-  unsigned size = res.GetRangeSize();
-  unsigned space = res.GetSpaceID();
+  unsigned Size = Res.GetRangeSize();
+  unsigned Space = Res.GetSpaceID();
 
-  auto &allocator = spaceAllocator.Get(space);
-  unsigned end = base + size - 1;
+  auto &Allocator = SpaceAllocator.Get(Space);
+  unsigned End = Base + Size - 1;
   // unbounded
-  if (end < base)
-    end = size;
-  const DxilResourceBase *conflictRes = allocator.Insert(&res, base, end);
-  if (conflictRes) {
+  if (End < Base)
+    End = Size;
+  const DxilResourceBase *ConflictRes = Allocator.Insert(&Res, Base, End);
+  if (ConflictRes) {
     ValCtx.EmitFormatError(
         ValidationRule::SmResourceRangeOverlap,
-        {ValCtx.GetResourceName(&res), std::to_string(base),
-         std::to_string(size), std::to_string(conflictRes->GetLowerBound()),
-         std::to_string(conflictRes->GetRangeSize()), std::to_string(space)});
+        {ValCtx.GetResourceName(&Res), std::to_string(Base),
+         std::to_string(Size), std::to_string(ConflictRes->GetLowerBound()),
+         std::to_string(ConflictRes->GetRangeSize()), std::to_string(Space)});
   }
 }
 
-static void ValidateResource(hlsl::DxilResource &res,
+static void ValidateResource(hlsl::DxilResource &Res,
                              ValidationContext &ValCtx) {
-  switch (res.GetKind()) {
+  switch (Res.GetKind()) {
   case DXIL::ResourceKind::RawBuffer:
   case DXIL::ResourceKind::TypedBuffer:
   case DXIL::ResourceKind::TBuffer:
@@ -3725,8 +3739,8 @@ static void ValidateResource(hlsl::DxilResource &res,
   case DXIL::ResourceKind::Texture3D:
   case DXIL::ResourceKind::TextureCube:
   case DXIL::ResourceKind::TextureCubeArray:
-    if (res.GetSampleCount() > 0) {
-      ValCtx.EmitResourceError(&res, ValidationRule::SmSampleCountOnlyOn2DMS);
+    if (Res.GetSampleCount() > 0) {
+      ValCtx.EmitResourceError(&Res, ValidationRule::SmSampleCountOnlyOn2DMS);
     }
     break;
   case DXIL::ResourceKind::Texture2DMS:
@@ -3737,16 +3751,16 @@ static void ValidateResource(hlsl::DxilResource &res,
     break;
   case DXIL::ResourceKind::FeedbackTexture2D:
   case DXIL::ResourceKind::FeedbackTexture2DArray:
-    if (res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry)
-      ValCtx.EmitResourceError(&res,
+    if (Res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry)
+      ValCtx.EmitResourceError(&Res,
                                ValidationRule::SmInvalidSamplerFeedbackType);
     break;
   default:
-    ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceKind);
+    ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceKind);
     break;
   }
 
-  switch (res.GetCompType().GetKind()) {
+  switch (Res.GetCompType().GetKind()) {
   case DXIL::ComponentType::F32:
   case DXIL::ComponentType::SNormF32:
   case DXIL::ComponentType::UNormF32:
@@ -3760,266 +3774,266 @@ static void ValidateResource(hlsl::DxilResource &res,
   case DXIL::ComponentType::U16:
     break;
   default:
-    if (!res.IsStructuredBuffer() && !res.IsRawBuffer() &&
-        !res.IsFeedbackTexture())
-      ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceCompType);
+    if (!Res.IsStructuredBuffer() && !Res.IsRawBuffer() &&
+        !Res.IsFeedbackTexture())
+      ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceCompType);
     break;
   }
 
-  if (res.IsStructuredBuffer()) {
-    unsigned stride = res.GetElementStride();
-    bool alignedTo4Bytes = (stride & 3) == 0;
-    if (!alignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) {
+  if (Res.IsStructuredBuffer()) {
+    unsigned Stride = Res.GetElementStride();
+    bool AlignedTo4Bytes = (Stride & 3) == 0;
+    if (!AlignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) {
       ValCtx.EmitResourceFormatError(
-          &res, ValidationRule::MetaStructBufAlignment,
-          {std::to_string(4), std::to_string(stride)});
+          &Res, ValidationRule::MetaStructBufAlignment,
+          {std::to_string(4), std::to_string(Stride)});
     }
-    if (stride > DXIL::kMaxStructBufferStride) {
+    if (Stride > DXIL::kMaxStructBufferStride) {
       ValCtx.EmitResourceFormatError(
-          &res, ValidationRule::MetaStructBufAlignmentOutOfBound,
+          &Res, ValidationRule::MetaStructBufAlignmentOutOfBound,
           {std::to_string(DXIL::kMaxStructBufferStride),
-           std::to_string(stride)});
+           std::to_string(Stride)});
     }
   }
 
-  if (res.IsAnyTexture() || res.IsTypedBuffer()) {
-    Type *RetTy = res.GetRetType();
-    unsigned size =
+  if (Res.IsAnyTexture() || Res.IsTypedBuffer()) {
+    Type *RetTy = Res.GetRetType();
+    unsigned Size =
         ValCtx.DxilMod.GetModule()->getDataLayout().getTypeAllocSize(RetTy);
-    if (size > 4 * 4) {
-      ValCtx.EmitResourceError(&res, ValidationRule::MetaTextureType);
+    if (Size > 4 * 4) {
+      ValCtx.EmitResourceError(&Res, ValidationRule::MetaTextureType);
     }
   }
 }
 
 static void CollectCBufferRanges(
-    DxilStructAnnotation *annotation,
-    SpanAllocator<unsigned, DxilFieldAnnotation> &constAllocator, unsigned base,
-    DxilTypeSystem &typeSys, StringRef cbName, ValidationContext &ValCtx) {
-  DXASSERT(((base + 15) & ~(0xf)) == base,
+    DxilStructAnnotation *Annotation,
+    SpanAllocator<unsigned, DxilFieldAnnotation> &ConstAllocator, unsigned Base,
+    DxilTypeSystem &TypeSys, StringRef CbName, ValidationContext &ValCtx) {
+  DXASSERT(((Base + 15) & ~(0xf)) == Base,
            "otherwise, base for struct is not aligned");
-  unsigned cbSize = annotation->GetCBufferSize();
+  unsigned CbSize = Annotation->GetCBufferSize();
 
-  const StructType *ST = annotation->GetStructType();
+  const StructType *ST = Annotation->GetStructType();
 
-  for (int i = annotation->GetNumFields() - 1; i >= 0; i--) {
-    DxilFieldAnnotation &fieldAnnotation = annotation->GetFieldAnnotation(i);
-    Type *EltTy = ST->getElementType(i);
+  for (int I = Annotation->GetNumFields() - 1; I >= 0; I--) {
+    DxilFieldAnnotation &FieldAnnotation = Annotation->GetFieldAnnotation(I);
+    Type *EltTy = ST->getElementType(I);
 
-    unsigned offset = fieldAnnotation.GetCBufferOffset();
+    unsigned Offset = FieldAnnotation.GetCBufferOffset();
 
     unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
-        fieldAnnotation, EltTy, typeSys);
+        FieldAnnotation, EltTy, TypeSys);
 
-    bool bOutOfBound = false;
+    bool IsOutOfBound = false;
     if (!EltTy->isAggregateType()) {
-      bOutOfBound = (offset + EltSize) > cbSize;
-      if (!bOutOfBound) {
-        if (constAllocator.Insert(&fieldAnnotation, base + offset,
-                                  base + offset + EltSize - 1)) {
+      IsOutOfBound = (Offset + EltSize) > CbSize;
+      if (!IsOutOfBound) {
+        if (ConstAllocator.Insert(&FieldAnnotation, Base + Offset,
+                                  Base + Offset + EltSize - 1)) {
           ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap,
-                                 {cbName, std::to_string(base + offset)});
+                                 {CbName, std::to_string(Base + Offset)});
         }
       }
     } else if (isa<ArrayType>(EltTy)) {
-      if (((offset + 15) & ~(0xf)) != offset) {
+      if (((Offset + 15) & ~(0xf)) != Offset) {
         ValCtx.EmitFormatError(ValidationRule::SmCBufferArrayOffsetAlignment,
-                               {cbName, std::to_string(offset)});
+                               {CbName, std::to_string(Offset)});
         continue;
       }
-      unsigned arrayCount = 1;
+      unsigned ArrayCount = 1;
       while (isa<ArrayType>(EltTy)) {
-        arrayCount *= EltTy->getArrayNumElements();
+        ArrayCount *= EltTy->getArrayNumElements();
         EltTy = EltTy->getArrayElementType();
       }
 
       DxilStructAnnotation *EltAnnotation = nullptr;
       if (StructType *EltST = dyn_cast<StructType>(EltTy))
-        EltAnnotation = typeSys.GetStructAnnotation(EltST);
+        EltAnnotation = TypeSys.GetStructAnnotation(EltST);
 
-      unsigned alignedEltSize = ((EltSize + 15) & ~(0xf));
-      unsigned arraySize = ((arrayCount - 1) * alignedEltSize) + EltSize;
-      bOutOfBound = (offset + arraySize) > cbSize;
+      unsigned AlignedEltSize = ((EltSize + 15) & ~(0xf));
+      unsigned ArraySize = ((ArrayCount - 1) * AlignedEltSize) + EltSize;
+      IsOutOfBound = (Offset + ArraySize) > CbSize;
 
-      if (!bOutOfBound) {
+      if (!IsOutOfBound) {
         // If we didn't care about gaps where elements could be placed with user
         // offsets, we could: recurse once if EltAnnotation, then allocate the
-        // rest if arrayCount > 1
+        // rest if ArrayCount > 1
 
-        unsigned arrayBase = base + offset;
+        unsigned ArrayBase = Base + Offset;
         if (!EltAnnotation) {
           if (EltSize > 0 &&
-              nullptr != constAllocator.Insert(&fieldAnnotation, arrayBase,
-                                               arrayBase + arraySize - 1)) {
+              nullptr != ConstAllocator.Insert(&FieldAnnotation, ArrayBase,
+                                               ArrayBase + ArraySize - 1)) {
             ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap,
-                                   {cbName, std::to_string(arrayBase)});
+                                   {CbName, std::to_string(ArrayBase)});
           }
         } else {
-          for (unsigned idx = 0; idx < arrayCount; idx++) {
-            CollectCBufferRanges(EltAnnotation, constAllocator, arrayBase,
-                                 typeSys, cbName, ValCtx);
-            arrayBase += alignedEltSize;
+          for (unsigned Idx = 0; Idx < ArrayCount; Idx++) {
+            CollectCBufferRanges(EltAnnotation, ConstAllocator, ArrayBase,
+                                 TypeSys, CbName, ValCtx);
+            ArrayBase += AlignedEltSize;
           }
         }
       }
     } else {
       StructType *EltST = cast<StructType>(EltTy);
-      unsigned structBase = base + offset;
-      bOutOfBound = (offset + EltSize) > cbSize;
-      if (!bOutOfBound) {
+      unsigned StructBase = Base + Offset;
+      IsOutOfBound = (Offset + EltSize) > CbSize;
+      if (!IsOutOfBound) {
         if (DxilStructAnnotation *EltAnnotation =
-                typeSys.GetStructAnnotation(EltST)) {
-          CollectCBufferRanges(EltAnnotation, constAllocator, structBase,
-                               typeSys, cbName, ValCtx);
+                TypeSys.GetStructAnnotation(EltST)) {
+          CollectCBufferRanges(EltAnnotation, ConstAllocator, StructBase,
+                               TypeSys, CbName, ValCtx);
         } else {
           if (EltSize > 0 &&
-              nullptr != constAllocator.Insert(&fieldAnnotation, structBase,
-                                               structBase + EltSize - 1)) {
+              nullptr != ConstAllocator.Insert(&FieldAnnotation, StructBase,
+                                               StructBase + EltSize - 1)) {
             ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap,
-                                   {cbName, std::to_string(structBase)});
+                                   {CbName, std::to_string(StructBase)});
           }
         }
       }
     }
 
-    if (bOutOfBound) {
+    if (IsOutOfBound) {
       ValCtx.EmitFormatError(ValidationRule::SmCBufferElementOverflow,
-                             {cbName, std::to_string(base + offset)});
+                             {CbName, std::to_string(Base + Offset)});
     }
   }
 }
 
-static void ValidateCBuffer(DxilCBuffer &cb, ValidationContext &ValCtx) {
-  Type *Ty = cb.GetHLSLType()->getPointerElementType();
-  if (cb.GetRangeSize() != 1 || Ty->isArrayTy()) {
+static void ValidateCBuffer(DxilCBuffer &Cb, ValidationContext &ValCtx) {
+  Type *Ty = Cb.GetHLSLType()->getPointerElementType();
+  if (Cb.GetRangeSize() != 1 || Ty->isArrayTy()) {
     Ty = Ty->getArrayElementType();
   }
   if (!isa<StructType>(Ty)) {
-    ValCtx.EmitResourceError(&cb,
+    ValCtx.EmitResourceError(&Cb,
                              ValidationRule::SmCBufferTemplateTypeMustBeStruct);
     return;
   }
-  if (cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) {
-    ValCtx.EmitResourceFormatError(&cb, ValidationRule::SmCBufferSize,
-                                   {std::to_string(cb.GetSize())});
+  if (Cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) {
+    ValCtx.EmitResourceFormatError(&Cb, ValidationRule::SmCBufferSize,
+                                   {std::to_string(Cb.GetSize())});
     return;
   }
   StructType *ST = cast<StructType>(Ty);
-  DxilTypeSystem &typeSys = ValCtx.DxilMod.GetTypeSystem();
-  DxilStructAnnotation *annotation = typeSys.GetStructAnnotation(ST);
-  if (!annotation)
+  DxilTypeSystem &TypeSys = ValCtx.DxilMod.GetTypeSystem();
+  DxilStructAnnotation *Annotation = TypeSys.GetStructAnnotation(ST);
+  if (!Annotation)
     return;
 
   // Collect constant ranges.
-  std::vector<std::pair<unsigned, unsigned>> constRanges;
-  SpanAllocator<unsigned, DxilFieldAnnotation> constAllocator(
+  std::vector<std::pair<unsigned, unsigned>> ConstRanges;
+  SpanAllocator<unsigned, DxilFieldAnnotation> ConstAllocator(
       0,
       // 4096 * 16 bytes.
       DXIL::kMaxCBufferSize << 4);
-  CollectCBufferRanges(annotation, constAllocator, 0, typeSys,
-                       ValCtx.GetResourceName(&cb), ValCtx);
+  CollectCBufferRanges(Annotation, ConstAllocator, 0, TypeSys,
+                       ValCtx.GetResourceName(&Cb), ValCtx);
 }
 
 static void ValidateResources(ValidationContext &ValCtx) {
-  const vector<unique_ptr<DxilResource>> &uavs = ValCtx.DxilMod.GetUAVs();
-  SpacesAllocator<unsigned, DxilResourceBase> uavAllocator;
+  const vector<unique_ptr<DxilResource>> &Uavs = ValCtx.DxilMod.GetUAVs();
+  SpacesAllocator<unsigned, DxilResourceBase> UavAllocator;
 
-  for (auto &uav : uavs) {
-    if (uav->IsROV()) {
+  for (auto &Uav : Uavs) {
+    if (Uav->IsROV()) {
       if (!ValCtx.DxilMod.GetShaderModel()->IsPS() && !ValCtx.isLibProfile) {
-        ValCtx.EmitResourceError(uav.get(), ValidationRule::SmROVOnlyInPS);
+        ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmROVOnlyInPS);
       }
     }
-    switch (uav->GetKind()) {
+    switch (Uav->GetKind()) {
     case DXIL::ResourceKind::TextureCube:
     case DXIL::ResourceKind::TextureCubeArray:
-      ValCtx.EmitResourceError(uav.get(),
+      ValCtx.EmitResourceError(Uav.get(),
                                ValidationRule::SmInvalidTextureKindOnUAV);
       break;
     default:
       break;
     }
 
-    if (uav->HasCounter() && !uav->IsStructuredBuffer()) {
-      ValCtx.EmitResourceError(uav.get(),
+    if (Uav->HasCounter() && !Uav->IsStructuredBuffer()) {
+      ValCtx.EmitResourceError(Uav.get(),
                                ValidationRule::SmCounterOnlyOnStructBuf);
     }
-    if (uav->HasCounter() && uav->IsGloballyCoherent())
-      ValCtx.EmitResourceFormatError(uav.get(),
+    if (Uav->HasCounter() && Uav->IsGloballyCoherent())
+      ValCtx.EmitResourceFormatError(Uav.get(),
                                      ValidationRule::MetaGlcNotOnAppendConsume,
-                                     {ValCtx.GetResourceName(uav.get())});
+                                     {ValCtx.GetResourceName(Uav.get())});
 
-    ValidateResource(*uav, ValCtx);
-    ValidateResourceOverlap(*uav, uavAllocator, ValCtx);
+    ValidateResource(*Uav, ValCtx);
+    ValidateResourceOverlap(*Uav, UavAllocator, ValCtx);
   }
 
-  SpacesAllocator<unsigned, DxilResourceBase> srvAllocator;
-  const vector<unique_ptr<DxilResource>> &srvs = ValCtx.DxilMod.GetSRVs();
-  for (auto &srv : srvs) {
+  SpacesAllocator<unsigned, DxilResourceBase> SrvAllocator;
+  const vector<unique_ptr<DxilResource>> &Srvs = ValCtx.DxilMod.GetSRVs();
+  for (auto &srv : Srvs) {
     ValidateResource(*srv, ValCtx);
-    ValidateResourceOverlap(*srv, srvAllocator, ValCtx);
+    ValidateResourceOverlap(*srv, SrvAllocator, ValCtx);
   }
 
-  hlsl::DxilResourceBase *pNonDense;
-  if (!AreDxilResourcesDense(&ValCtx.M, &pNonDense)) {
-    ValCtx.EmitResourceError(pNonDense, ValidationRule::MetaDenseResIDs);
+  hlsl::DxilResourceBase *NonDenseRes;
+  if (!AreDxilResourcesDense(&ValCtx.M, &NonDenseRes)) {
+    ValCtx.EmitResourceError(NonDenseRes, ValidationRule::MetaDenseResIDs);
   }
 
-  SpacesAllocator<unsigned, DxilResourceBase> samplerAllocator;
+  SpacesAllocator<unsigned, DxilResourceBase> SamplerAllocator;
   for (auto &sampler : ValCtx.DxilMod.GetSamplers()) {
     if (sampler->GetSamplerKind() == DXIL::SamplerKind::Invalid) {
       ValCtx.EmitResourceError(sampler.get(),
                                ValidationRule::MetaValidSamplerMode);
     }
-    ValidateResourceOverlap(*sampler, samplerAllocator, ValCtx);
+    ValidateResourceOverlap(*sampler, SamplerAllocator, ValCtx);
   }
 
-  SpacesAllocator<unsigned, DxilResourceBase> cbufferAllocator;
+  SpacesAllocator<unsigned, DxilResourceBase> CbufferAllocator;
   for (auto &cbuffer : ValCtx.DxilMod.GetCBuffers()) {
     ValidateCBuffer(*cbuffer, ValCtx);
-    ValidateResourceOverlap(*cbuffer, cbufferAllocator, ValCtx);
+    ValidateResourceOverlap(*cbuffer, CbufferAllocator, ValCtx);
   }
 }
 
 static void ValidateShaderFlags(ValidationContext &ValCtx) {
-  ShaderFlags calcFlags;
-  ValCtx.DxilMod.CollectShaderFlagsForModule(calcFlags);
+  ShaderFlags CalcFlags;
+  ValCtx.DxilMod.CollectShaderFlagsForModule(CalcFlags);
 
   // Special case for validator version prior to 1.8.
   // If DXR 1.1 flag is set, but our computed flags do not have this set, then
   // this is due to prior versions setting the flag based on DXR 1.1 subobjects,
   // which are gone by this point.  Set the flag and the rest should match.
-  unsigned valMajor, valMinor;
-  ValCtx.DxilMod.GetValidatorVersion(valMajor, valMinor);
-  if (DXIL::CompareVersions(valMajor, valMinor, 1, 5) >= 0 &&
-      DXIL::CompareVersions(valMajor, valMinor, 1, 8) < 0 &&
+  unsigned ValMajor, ValMinor;
+  ValCtx.DxilMod.GetValidatorVersion(ValMajor, ValMinor);
+  if (DXIL::CompareVersions(ValMajor, ValMinor, 1, 5) >= 0 &&
+      DXIL::CompareVersions(ValMajor, ValMinor, 1, 8) < 0 &&
       ValCtx.DxilMod.m_ShaderFlags.GetRaytracingTier1_1() &&
-      !calcFlags.GetRaytracingTier1_1()) {
-    calcFlags.SetRaytracingTier1_1(true);
+      !CalcFlags.GetRaytracingTier1_1()) {
+    CalcFlags.SetRaytracingTier1_1(true);
   }
 
-  const uint64_t mask = ShaderFlags::GetShaderFlagsRawForCollection();
-  uint64_t declaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw();
-  uint64_t calcFlagsRaw = calcFlags.GetShaderFlagsRaw();
+  const uint64_t Mask = ShaderFlags::GetShaderFlagsRawForCollection();
+  uint64_t DeclaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw();
+  uint64_t CalcFlagsRaw = CalcFlags.GetShaderFlagsRaw();
 
-  declaredFlagsRaw &= mask;
-  calcFlagsRaw &= mask;
+  DeclaredFlagsRaw &= Mask;
+  CalcFlagsRaw &= Mask;
 
-  if (declaredFlagsRaw == calcFlagsRaw) {
+  if (DeclaredFlagsRaw == CalcFlagsRaw) {
     return;
   }
   ValCtx.EmitError(ValidationRule::MetaFlagsUsage);
 
   dxilutil::EmitNoteOnContext(ValCtx.M.getContext(),
                               Twine("Flags declared=") +
-                                  Twine(declaredFlagsRaw) + Twine(", actual=") +
-                                  Twine(calcFlagsRaw));
+                                  Twine(DeclaredFlagsRaw) + Twine(", actual=") +
+                                  Twine(CalcFlagsRaw));
 }
 
 static void ValidateSignatureElement(DxilSignatureElement &SE,
                                      ValidationContext &ValCtx) {
-  DXIL::SemanticKind semanticKind = SE.GetSemantic()->GetKind();
-  CompType::Kind compKind = SE.GetCompType().GetKind();
+  DXIL::SemanticKind SemanticKind = SE.GetSemantic()->GetKind();
+  CompType::Kind CompKind = SE.GetCompType().GetKind();
   DXIL::InterpolationMode Mode = SE.GetInterpolationMode()->GetKind();
 
   StringRef Name = SE.GetName();
@@ -4027,86 +4041,86 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSemanticLen);
   }
 
-  if (semanticKind > DXIL::SemanticKind::Arbitrary &&
-      semanticKind < DXIL::SemanticKind::Invalid) {
-    if (semanticKind != Semantic::GetByName(SE.GetName())->GetKind()) {
+  if (SemanticKind > DXIL::SemanticKind::Arbitrary &&
+      SemanticKind < DXIL::SemanticKind::Invalid) {
+    if (SemanticKind != Semantic::GetByName(SE.GetName())->GetKind()) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemaKindMatchesName,
                              {SE.GetName(), SE.GetSemantic()->GetName()});
     }
   }
 
-  unsigned compWidth = 0;
-  bool compFloat = false;
-  bool compInt = false;
-  bool compBool = false;
+  unsigned CompWidth = 0;
+  bool CompFloat = false;
+  bool CompInt = false;
+  bool CompBool = false;
 
-  switch (compKind) {
+  switch (CompKind) {
   case CompType::Kind::U64:
-    compWidth = 64;
-    compInt = true;
+    CompWidth = 64;
+    CompInt = true;
     break;
   case CompType::Kind::I64:
-    compWidth = 64;
-    compInt = true;
+    CompWidth = 64;
+    CompInt = true;
     break;
   // These should be translated for signatures:
   // case CompType::Kind::PackedS8x32:
   // case CompType::Kind::PackedU8x32:
   case CompType::Kind::U32:
-    compWidth = 32;
-    compInt = true;
+    CompWidth = 32;
+    CompInt = true;
     break;
   case CompType::Kind::I32:
-    compWidth = 32;
-    compInt = true;
+    CompWidth = 32;
+    CompInt = true;
     break;
   case CompType::Kind::U16:
-    compWidth = 16;
-    compInt = true;
+    CompWidth = 16;
+    CompInt = true;
     break;
   case CompType::Kind::I16:
-    compWidth = 16;
-    compInt = true;
+    CompWidth = 16;
+    CompInt = true;
     break;
   case CompType::Kind::I1:
-    compWidth = 1;
-    compBool = true;
+    CompWidth = 1;
+    CompBool = true;
     break;
   case CompType::Kind::F64:
-    compWidth = 64;
-    compFloat = true;
+    CompWidth = 64;
+    CompFloat = true;
     break;
   case CompType::Kind::F32:
-    compWidth = 32;
-    compFloat = true;
+    CompWidth = 32;
+    CompFloat = true;
     break;
   case CompType::Kind::F16:
-    compWidth = 16;
-    compFloat = true;
+    CompWidth = 16;
+    CompFloat = true;
     break;
   case CompType::Kind::SNormF64:
-    compWidth = 64;
-    compFloat = true;
+    CompWidth = 64;
+    CompFloat = true;
     break;
   case CompType::Kind::SNormF32:
-    compWidth = 32;
-    compFloat = true;
+    CompWidth = 32;
+    CompFloat = true;
     break;
   case CompType::Kind::SNormF16:
-    compWidth = 16;
-    compFloat = true;
+    CompWidth = 16;
+    CompFloat = true;
     break;
   case CompType::Kind::UNormF64:
-    compWidth = 64;
-    compFloat = true;
+    CompWidth = 64;
+    CompFloat = true;
     break;
   case CompType::Kind::UNormF32:
-    compWidth = 32;
-    compFloat = true;
+    CompWidth = 32;
+    CompFloat = true;
     break;
   case CompType::Kind::UNormF16:
-    compWidth = 16;
-    compFloat = true;
+    CompWidth = 16;
+    CompFloat = true;
     break;
   case CompType::Kind::Invalid:
   default:
@@ -4115,7 +4129,7 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     break;
   }
 
-  if (compInt || compBool) {
+  if (CompInt || CompBool) {
     switch (Mode) {
     case DXIL::InterpolationMode::Linear:
     case DXIL::InterpolationMode::LinearCentroid:
@@ -4132,91 +4146,91 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   }
 
   // Elements that should not appear in the Dxil signature:
-  bool bAllowedInSig = true;
-  bool bShouldBeAllocated = true;
+  bool AllowedInSig = true;
+  bool ShouldBeAllocated = true;
   switch (SE.GetInterpretation()) {
   case DXIL::SemanticInterpretationKind::NA:
   case DXIL::SemanticInterpretationKind::NotInSig:
   case DXIL::SemanticInterpretationKind::Invalid:
-    bAllowedInSig = false;
+    AllowedInSig = false;
     LLVM_FALLTHROUGH;
   case DXIL::SemanticInterpretationKind::NotPacked:
   case DXIL::SemanticInterpretationKind::Shadow:
-    bShouldBeAllocated = false;
+    ShouldBeAllocated = false;
     break;
   default:
     break;
   }
 
-  const char *inputOutput = nullptr;
+  const char *InputOutput = nullptr;
   if (SE.IsInput())
-    inputOutput = "Input";
+    InputOutput = "Input";
   else if (SE.IsOutput())
-    inputOutput = "Output";
+    InputOutput = "Output";
   else
-    inputOutput = "PatchConstant";
+    InputOutput = "PatchConstant";
 
-  if (!bAllowedInSig) {
+  if (!AllowedInSig) {
     ValCtx.EmitFormatError(ValidationRule::SmSemantic,
                            {SE.GetName(),
                             ValCtx.DxilMod.GetShaderModel()->GetKindName(),
-                            inputOutput});
-  } else if (bShouldBeAllocated && !SE.IsAllocated()) {
+                            InputOutput});
+  } else if (ShouldBeAllocated && !SE.IsAllocated()) {
     ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldBeAllocated,
-                           {inputOutput, SE.GetName()});
-  } else if (!bShouldBeAllocated && SE.IsAllocated()) {
+                           {InputOutput, SE.GetName()});
+  } else if (!ShouldBeAllocated && SE.IsAllocated()) {
     ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldNotBeAllocated,
-                           {inputOutput, SE.GetName()});
+                           {InputOutput, SE.GetName()});
   }
 
-  bool bIsClipCull = false;
-  bool bIsTessfactor = false;
-  bool bIsBarycentric = false;
+  bool IsClipCull = false;
+  bool IsTessfactor = false;
+  bool IsBarycentric = false;
 
-  switch (semanticKind) {
+  switch (SemanticKind) {
   case DXIL::SemanticKind::Depth:
   case DXIL::SemanticKind::DepthGreaterEqual:
   case DXIL::SemanticKind::DepthLessEqual:
-    if (!compFloat || compWidth > 32 || SE.GetCols() != 1) {
+    if (!CompFloat || CompWidth > 32 || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
     break;
   case DXIL::SemanticKind::Coverage:
-    DXASSERT(!SE.IsInput() || !bAllowedInSig,
+    DXASSERT(!SE.IsInput() || !AllowedInSig,
              "else internal inconsistency between semantic interpretation "
              "table and validation code");
     LLVM_FALLTHROUGH;
   case DXIL::SemanticKind::InnerCoverage:
   case DXIL::SemanticKind::OutputControlPointID:
-    if (compKind != CompType::Kind::U32 || SE.GetCols() != 1) {
+    if (CompKind != CompType::Kind::U32 || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "uint"});
     }
     break;
   case DXIL::SemanticKind::Position:
-    if (!compFloat || compWidth > 32 || SE.GetCols() != 4) {
+    if (!CompFloat || CompWidth > 32 || SE.GetCols() != 4) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float4"});
     }
     break;
   case DXIL::SemanticKind::Target:
-    if (compWidth > 32) {
+    if (CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float/int/uint"});
     }
     break;
   case DXIL::SemanticKind::ClipDistance:
   case DXIL::SemanticKind::CullDistance:
-    bIsClipCull = true;
-    if (!compFloat || compWidth > 32) {
+    IsClipCull = true;
+    if (!CompFloat || CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
     // NOTE: clip cull distance size is checked at ValidateSignature.
     break;
   case DXIL::SemanticKind::IsFrontFace: {
-    if (!(compInt && compWidth == 32) || SE.GetCols() != 1) {
+    if (!(CompInt && CompWidth == 32) || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "uint"});
     }
@@ -4230,14 +4244,14 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   case DXIL::SemanticKind::SampleIndex:
   case DXIL::SemanticKind::StencilRef:
   case DXIL::SemanticKind::ShadingRate:
-    if ((compKind != CompType::Kind::U32 && compKind != CompType::Kind::U16) ||
+    if ((CompKind != CompType::Kind::U32 && CompKind != CompType::Kind::U16) ||
         SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "uint"});
     }
     break;
   case DXIL::SemanticKind::CullPrimitive: {
-    if (!(compBool && compWidth == 1) || SE.GetCols() != 1) {
+    if (!(CompBool && CompWidth == 1) || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "bool"});
     }
@@ -4245,8 +4259,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   case DXIL::SemanticKind::TessFactor:
   case DXIL::SemanticKind::InsideTessFactor:
     // NOTE: the size check is at CheckPatchConstantSemantic.
-    bIsTessfactor = true;
-    if (!compFloat || compWidth > 32) {
+    IsTessfactor = true;
+    if (!CompFloat || CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
@@ -4255,12 +4269,12 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     break;
   case DXIL::SemanticKind::DomainLocation:
   case DXIL::SemanticKind::Invalid:
-    DXASSERT(!bAllowedInSig, "else internal inconsistency between semantic "
-                             "interpretation table and validation code");
+    DXASSERT(!AllowedInSig, "else internal inconsistency between semantic "
+                            "interpretation table and validation code");
     break;
   case DXIL::SemanticKind::Barycentrics:
-    bIsBarycentric = true;
-    if (!compFloat || compWidth > 32) {
+    IsBarycentric = true;
+    if (!CompFloat || CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
@@ -4305,32 +4319,32 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     }
   }
 
-  if (semanticKind == DXIL::SemanticKind::Target) {
-    // Verify packed row == semantic index
-    unsigned row = SE.GetStartRow();
+  if (SemanticKind == DXIL::SemanticKind::Target) {
+    // Verify packed Row == semantic index
+    unsigned Row = SE.GetStartRow();
     for (unsigned i : SE.GetSemanticIndexVec()) {
-      if (row != i) {
+      if (Row != i) {
         ValCtx.EmitSignatureError(&SE,
                                   ValidationRule::SmPSTargetIndexMatchesRow);
       }
-      ++row;
+      ++Row;
     }
-    // Verify packed col is 0
+    // Verify packed Col is 0
     if (SE.GetStartCol() != 0) {
       ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetCol0);
     }
-    // Verify max row used < 8
+    // Verify max Row used < 8
     if (SE.GetStartRow() + SE.GetRows() > 8) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax,
                              {"SV_Target", "7"});
     }
-  } else if (bAllowedInSig && semanticKind != DXIL::SemanticKind::Arbitrary) {
-    if (bIsBarycentric) {
+  } else if (AllowedInSig && SemanticKind != DXIL::SemanticKind::Arbitrary) {
+    if (IsBarycentric) {
       if (SE.GetSemanticStartIndex() > 1) {
         ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax,
                                {SE.GetSemantic()->GetName(), "1"});
       }
-    } else if (!bIsClipCull && SE.GetSemanticStartIndex() > 0) {
+    } else if (!IsClipCull && SE.GetSemanticStartIndex() > 0) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax,
                              {SE.GetSemantic()->GetName(), "0"});
     }
@@ -4338,17 +4352,17 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     // with the exception of tessfactors, which are validated in
     // CheckPatchConstantSemantic and ClipDistance/CullDistance, which have
     // other custom constraints.
-    if (!bIsTessfactor && !bIsClipCull && SE.GetRows() > 1) {
+    if (!IsTessfactor && !IsClipCull && SE.GetRows() > 1) {
       ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSystemValueRows);
     }
   }
 
   if (SE.GetCols() + (SE.IsAllocated() ? SE.GetStartCol() : 0) > 4) {
-    unsigned size = (SE.GetRows() - 1) * 4 + SE.GetCols();
+    unsigned Size = (SE.GetRows() - 1) * 4 + SE.GetCols();
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange,
                            {SE.GetName(), std::to_string(SE.GetStartRow()),
                             std::to_string(SE.GetStartCol()),
-                            std::to_string(size)});
+                            std::to_string(Size)});
   }
 
   if (!SE.GetInterpolationMode()->IsValid()) {
@@ -4357,8 +4371,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
 }
 
 static void ValidateSignatureOverlap(DxilSignatureElement &E,
-                                     unsigned maxScalars,
-                                     DxilSignatureAllocator &allocator,
+                                     unsigned MaxScalars,
+                                     DxilSignatureAllocator &Allocator,
                                      ValidationContext &ValCtx) {
 
   // Skip entries that are not or should not be allocated.  Validation occurs in
@@ -4376,16 +4390,16 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E,
     break;
   }
 
-  DxilPackElement PE(&E, allocator.UseMinPrecision());
-  DxilSignatureAllocator::ConflictType conflict =
-      allocator.DetectRowConflict(&PE, E.GetStartRow());
-  if (conflict == DxilSignatureAllocator::kNoConflict ||
-      conflict == DxilSignatureAllocator::kInsufficientFreeComponents)
-    conflict =
-        allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol());
-  switch (conflict) {
+  DxilPackElement PE(&E, Allocator.UseMinPrecision());
+  DxilSignatureAllocator::ConflictType Conflict =
+      Allocator.DetectRowConflict(&PE, E.GetStartRow());
+  if (Conflict == DxilSignatureAllocator::kNoConflict ||
+      Conflict == DxilSignatureAllocator::kInsufficientFreeComponents)
+    Conflict =
+        Allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol());
+  switch (Conflict) {
   case DxilSignatureAllocator::kNoConflict:
-    allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol());
+    Allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol());
     break;
   case DxilSignatureAllocator::kConflictsWithIndexed:
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureIndexConflict,
@@ -4447,59 +4461,59 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E,
 }
 
 static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
-                              EntryStatus &Status, unsigned maxScalars) {
-  DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = {
+                              EntryStatus &Status, unsigned MaxScalars) {
+  DxilSignatureAllocator Allocator[DXIL::kNumOutputStreams] = {
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()}};
-  unordered_set<unsigned> semanticUsageSet[DXIL::kNumOutputStreams];
-  StringMap<unordered_set<unsigned>> semanticIndexMap[DXIL::kNumOutputStreams];
-  unordered_set<unsigned> clipcullRowSet[DXIL::kNumOutputStreams];
-  unsigned clipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0};
+  unordered_set<unsigned> SemanticUsageSet[DXIL::kNumOutputStreams];
+  StringMap<unordered_set<unsigned>> SemanticIndexMap[DXIL::kNumOutputStreams];
+  unordered_set<unsigned> ClipcullRowSet[DXIL::kNumOutputStreams];
+  unsigned ClipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0};
 
-  bool isOutput = S.IsOutput();
+  bool IsOutput = S.IsOutput();
   unsigned TargetMask = 0;
   DXIL::SemanticKind DepthKind = DXIL::SemanticKind::Invalid;
 
-  const InterpolationMode *prevBaryInterpMode = nullptr;
-  unsigned numBarycentrics = 0;
+  const InterpolationMode *PrevBaryInterpMode = nullptr;
+  unsigned NumBarycentrics = 0;
 
   for (auto &E : S.GetElements()) {
-    DXIL::SemanticKind semanticKind = E->GetSemantic()->GetKind();
+    DXIL::SemanticKind SemanticKind = E->GetSemantic()->GetKind();
     ValidateSignatureElement(*E, ValCtx);
-    // Avoid OOB indexing on streamId.
-    unsigned streamId = E->GetOutputStream();
-    if (streamId >= DXIL::kNumOutputStreams || !isOutput ||
+    // Avoid OOB indexing on StreamId.
+    unsigned StreamId = E->GetOutputStream();
+    if (StreamId >= DXIL::kNumOutputStreams || !IsOutput ||
         !ValCtx.DxilMod.GetShaderModel()->IsGS()) {
-      streamId = 0;
+      StreamId = 0;
     }
 
     // Semantic index overlap check, keyed by name.
-    std::string nameUpper(E->GetName());
-    std::transform(nameUpper.begin(), nameUpper.end(), nameUpper.begin(),
+    std::string NameUpper(E->GetName());
+    std::transform(NameUpper.begin(), NameUpper.end(), NameUpper.begin(),
                    ::toupper);
-    unordered_set<unsigned> &semIdxSet = semanticIndexMap[streamId][nameUpper];
-    for (unsigned semIdx : E->GetSemanticIndexVec()) {
-      if (semIdxSet.count(semIdx) > 0) {
+    unordered_set<unsigned> &SemIdxSet = SemanticIndexMap[StreamId][NameUpper];
+    for (unsigned SemIdx : E->GetSemanticIndexVec()) {
+      if (SemIdxSet.count(SemIdx) > 0) {
         ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap,
-                               {E->GetName(), std::to_string(semIdx)});
+                               {E->GetName(), std::to_string(SemIdx)});
         return;
       } else
-        semIdxSet.insert(semIdx);
+        SemIdxSet.insert(SemIdx);
     }
 
     // SV_Target has special rules
-    if (semanticKind == DXIL::SemanticKind::Target) {
+    if (SemanticKind == DXIL::SemanticKind::Target) {
       // Validate target overlap
       if (E->GetStartRow() + E->GetRows() <= 8) {
-        unsigned mask = ((1 << E->GetRows()) - 1) << E->GetStartRow();
-        if (TargetMask & mask) {
+        unsigned Mask = ((1 << E->GetRows()) - 1) << E->GetStartRow();
+        if (TargetMask & Mask) {
           ValCtx.EmitFormatError(
               ValidationRule::MetaNoSemanticOverlap,
               {"SV_Target", std::to_string(E->GetStartRow())});
         }
-        TargetMask = TargetMask | mask;
+        TargetMask = TargetMask | Mask;
       }
       if (E->GetRows() > 1) {
         ValCtx.EmitSignatureError(E.get(), ValidationRule::SmNoPSOutputIdx);
@@ -4511,19 +4525,19 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
       continue;
 
     // validate system value semantic rules
-    switch (semanticKind) {
+    switch (SemanticKind) {
     case DXIL::SemanticKind::Arbitrary:
       break;
     case DXIL::SemanticKind::ClipDistance:
     case DXIL::SemanticKind::CullDistance:
       // Validate max 8 components across 2 rows (registers)
-      for (unsigned rowIdx = 0; rowIdx < E->GetRows(); rowIdx++)
-        clipcullRowSet[streamId].insert(E->GetStartRow() + rowIdx);
-      if (clipcullRowSet[streamId].size() > 2) {
+      for (unsigned RowIdx = 0; RowIdx < E->GetRows(); RowIdx++)
+        ClipcullRowSet[StreamId].insert(E->GetStartRow() + RowIdx);
+      if (ClipcullRowSet[StreamId].size() > 2) {
         ValCtx.EmitSignatureError(E.get(), ValidationRule::MetaClipCullMaxRows);
       }
-      clipcullComponents[streamId] += E->GetCols();
-      if (clipcullComponents[streamId] > 8) {
+      ClipcullComponents[StreamId] += E->GetCols();
+      if (ClipcullComponents[StreamId] > 8) {
         ValCtx.EmitSignatureError(E.get(),
                                   ValidationRule::MetaClipCullMaxComponents);
       }
@@ -4535,58 +4549,58 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
         ValCtx.EmitSignatureError(E.get(),
                                   ValidationRule::SmPSMultipleDepthSemantic);
       }
-      DepthKind = semanticKind;
+      DepthKind = SemanticKind;
       break;
     case DXIL::SemanticKind::Barycentrics: {
       // There can only be up to two SV_Barycentrics
       // with differeent perspective interpolation modes.
-      if (numBarycentrics++ > 1) {
+      if (NumBarycentrics++ > 1) {
         ValCtx.EmitSignatureError(
             E.get(), ValidationRule::MetaBarycentricsTwoPerspectives);
         break;
       }
-      const InterpolationMode *mode = E->GetInterpolationMode();
-      if (prevBaryInterpMode) {
-        if ((mode->IsAnyNoPerspective() &&
-             prevBaryInterpMode->IsAnyNoPerspective()) ||
-            (!mode->IsAnyNoPerspective() &&
-             !prevBaryInterpMode->IsAnyNoPerspective())) {
+      const InterpolationMode *Mode = E->GetInterpolationMode();
+      if (PrevBaryInterpMode) {
+        if ((Mode->IsAnyNoPerspective() &&
+             PrevBaryInterpMode->IsAnyNoPerspective()) ||
+            (!Mode->IsAnyNoPerspective() &&
+             !PrevBaryInterpMode->IsAnyNoPerspective())) {
           ValCtx.EmitSignatureError(
               E.get(), ValidationRule::MetaBarycentricsTwoPerspectives);
         }
       }
-      prevBaryInterpMode = mode;
+      PrevBaryInterpMode = Mode;
       break;
     }
     default:
-      if (semanticUsageSet[streamId].count(
-              static_cast<unsigned>(semanticKind)) > 0) {
+      if (SemanticUsageSet[StreamId].count(
+              static_cast<unsigned>(SemanticKind)) > 0) {
         ValCtx.EmitFormatError(ValidationRule::MetaDuplicateSysValue,
                                {E->GetSemantic()->GetName()});
       }
-      semanticUsageSet[streamId].insert(static_cast<unsigned>(semanticKind));
+      SemanticUsageSet[StreamId].insert(static_cast<unsigned>(SemanticKind));
       break;
     }
 
     // Packed element overlap check.
-    ValidateSignatureOverlap(*E.get(), maxScalars, allocator[streamId], ValCtx);
+    ValidateSignatureOverlap(*E.get(), MaxScalars, Allocator[StreamId], ValCtx);
 
-    if (isOutput && semanticKind == DXIL::SemanticKind::Position) {
+    if (IsOutput && SemanticKind == DXIL::SemanticKind::Position) {
       Status.hasOutputPosition[E->GetOutputStream()] = true;
     }
   }
 
   if (Status.hasViewID && S.IsInput() &&
       ValCtx.DxilMod.GetShaderModel()->GetKind() == DXIL::ShaderKind::Pixel) {
-    // Ensure sufficient space for ViewID:
-    DxilSignatureAllocator::DummyElement viewID;
-    viewID.rows = 1;
-    viewID.cols = 1;
-    viewID.kind = DXIL::SemanticKind::Arbitrary;
-    viewID.interpolation = DXIL::InterpolationMode::Constant;
-    viewID.interpretation = DXIL::SemanticInterpretationKind::SGV;
-    allocator[0].PackNext(&viewID, 0, 32);
-    if (!viewID.IsAllocated()) {
+    // Ensure sufficient space for ViewId:
+    DxilSignatureAllocator::DummyElement ViewId;
+    ViewId.rows = 1;
+    ViewId.cols = 1;
+    ViewId.kind = DXIL::SemanticKind::Arbitrary;
+    ViewId.interpolation = DXIL::InterpolationMode::Constant;
+    ViewId.interpretation = DXIL::SemanticInterpretationKind::SGV;
+    Allocator[0].PackNext(&ViewId, 0, 32);
+    if (!ViewId.IsAllocated()) {
       ValCtx.EmitError(ValidationRule::SmViewIDNeedsSlot);
     }
   }
@@ -4611,12 +4625,12 @@ static void ValidateConstantInterpModeSignature(ValidationContext &ValCtx,
 }
 
 static void ValidateEntrySignatures(ValidationContext &ValCtx,
-                                    const DxilEntryProps &entryProps,
+                                    const DxilEntryProps &EntryProps,
                                     EntryStatus &Status, Function &F) {
-  const DxilFunctionProps &props = entryProps.props;
-  const DxilEntrySignature &S = entryProps.sig;
+  const DxilFunctionProps &Props = EntryProps.props;
+  const DxilEntrySignature &S = EntryProps.sig;
 
-  if (props.IsRay()) {
+  if (Props.IsRay()) {
     // No signatures allowed
     if (!S.InputSignature.GetElements().empty() ||
         !S.OutputSignature.GetElements().empty() ||
@@ -4626,62 +4640,62 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
 
     // Validate payload/attribute/params sizes
-    unsigned payloadSize = 0;
-    unsigned attrSize = 0;
-    auto itPayload = F.arg_begin();
-    auto itAttr = itPayload;
-    if (itAttr != F.arg_end())
-      itAttr++;
+    unsigned PayloadSize = 0;
+    unsigned AttrSize = 0;
+    auto ItPayload = F.arg_begin();
+    auto ItAttr = ItPayload;
+    if (ItAttr != F.arg_end())
+      ItAttr++;
     DataLayout DL(F.getParent());
-    switch (props.shaderKind) {
+    switch (Props.shaderKind) {
     case DXIL::ShaderKind::AnyHit:
     case DXIL::ShaderKind::ClosestHit:
-      if (itAttr != F.arg_end()) {
-        Type *Ty = itAttr->getType();
+      if (ItAttr != F.arg_end()) {
+        Type *Ty = ItAttr->getType();
         if (Ty->isPointerTy())
           Ty = Ty->getPointerElementType();
-        attrSize =
+        AttrSize =
             (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX);
       }
       LLVM_FALLTHROUGH;
     case DXIL::ShaderKind::Miss:
     case DXIL::ShaderKind::Callable:
-      if (itPayload != F.arg_end()) {
-        Type *Ty = itPayload->getType();
+      if (ItPayload != F.arg_end()) {
+        Type *Ty = ItPayload->getType();
         if (Ty->isPointerTy())
           Ty = Ty->getPointerElementType();
-        payloadSize =
+        PayloadSize =
             (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX);
       }
       break;
     }
-    if (props.ShaderProps.Ray.payloadSizeInBytes < payloadSize) {
+    if (Props.ShaderProps.Ray.payloadSizeInBytes < PayloadSize) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmRayShaderPayloadSize,
-          {F.getName(), props.IsCallable() ? "params" : "payload"});
+          {F.getName(), Props.IsCallable() ? "params" : "payload"});
     }
-    if (props.ShaderProps.Ray.attributeSizeInBytes < attrSize) {
+    if (Props.ShaderProps.Ray.attributeSizeInBytes < AttrSize) {
       ValCtx.EmitFnFormatError(&F, ValidationRule::SmRayShaderPayloadSize,
                                {F.getName(), "attribute"});
     }
     return;
   }
 
-  bool isPS = props.IsPS();
-  bool isVS = props.IsVS();
-  bool isGS = props.IsGS();
-  bool isCS = props.IsCS();
-  bool isMS = props.IsMS();
+  bool IsPs = Props.IsPS();
+  bool IsVs = Props.IsVS();
+  bool IsGs = Props.IsGS();
+  bool IsCs = Props.IsCS();
+  bool IsMs = Props.IsMS();
 
-  if (isPS) {
+  if (IsPs) {
     // PS output no interp mode.
     ValidateNoInterpModeSignature(ValCtx, S.OutputSignature);
-  } else if (isVS) {
+  } else if (IsVs) {
     // VS input no interp mode.
     ValidateNoInterpModeSignature(ValCtx, S.InputSignature);
   }
 
-  if (isMS) {
+  if (IsMs) {
     // primitive output constant interp mode.
     ValidateConstantInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature);
   } else {
@@ -4689,38 +4703,38 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     ValidateNoInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature);
   }
 
-  unsigned maxInputScalars = DXIL::kMaxInputTotalScalars;
-  unsigned maxOutputScalars = 0;
-  unsigned maxPatchConstantScalars = 0;
+  unsigned MaxInputScalars = DXIL::kMaxInputTotalScalars;
+  unsigned MaxOutputScalars = 0;
+  unsigned MaxPatchConstantScalars = 0;
 
-  switch (props.shaderKind) {
+  switch (Props.shaderKind) {
   case DXIL::ShaderKind::Compute:
     break;
   case DXIL::ShaderKind::Vertex:
   case DXIL::ShaderKind::Geometry:
   case DXIL::ShaderKind::Pixel:
-    maxOutputScalars = DXIL::kMaxOutputTotalScalars;
+    MaxOutputScalars = DXIL::kMaxOutputTotalScalars;
     break;
   case DXIL::ShaderKind::Hull:
   case DXIL::ShaderKind::Domain:
-    maxOutputScalars = DXIL::kMaxOutputTotalScalars;
-    maxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars;
+    MaxOutputScalars = DXIL::kMaxOutputTotalScalars;
+    MaxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars;
     break;
   case DXIL::ShaderKind::Mesh:
-    maxOutputScalars = DXIL::kMaxOutputTotalScalars;
-    maxPatchConstantScalars = DXIL::kMaxOutputTotalScalars;
+    MaxOutputScalars = DXIL::kMaxOutputTotalScalars;
+    MaxPatchConstantScalars = DXIL::kMaxOutputTotalScalars;
     break;
   case DXIL::ShaderKind::Amplification:
   default:
     break;
   }
 
-  ValidateSignature(ValCtx, S.InputSignature, Status, maxInputScalars);
-  ValidateSignature(ValCtx, S.OutputSignature, Status, maxOutputScalars);
+  ValidateSignature(ValCtx, S.InputSignature, Status, MaxInputScalars);
+  ValidateSignature(ValCtx, S.OutputSignature, Status, MaxOutputScalars);
   ValidateSignature(ValCtx, S.PatchConstOrPrimSignature, Status,
-                    maxPatchConstantScalars);
+                    MaxPatchConstantScalars);
 
-  if (isPS) {
+  if (IsPs) {
     // Gather execution information.
     hlsl::PSExecutionInfo PSExec;
     DxilSignatureElement *PosInterpSE = nullptr;
@@ -4762,10 +4776,10 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
 
     // Validate PS output semantic.
-    const DxilSignature &outputSig = S.OutputSignature;
-    for (auto &SE : outputSig.GetElements()) {
-      Semantic::Kind semanticKind = SE->GetSemantic()->GetKind();
-      switch (semanticKind) {
+    const DxilSignature &OutputSig = S.OutputSignature;
+    for (auto &SE : OutputSig.GetElements()) {
+      Semantic::Kind SemanticKind = SE->GetSemantic()->GetKind();
+      switch (SemanticKind) {
       case Semantic::Kind::Target:
       case Semantic::Kind::Coverage:
       case Semantic::Kind::Depth:
@@ -4781,24 +4795,24 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
   }
 
-  if (isGS) {
-    unsigned maxVertexCount = props.ShaderProps.GS.maxVertexCount;
-    unsigned outputScalarCount = 0;
-    const DxilSignature &outSig = S.OutputSignature;
-    for (auto &SE : outSig.GetElements()) {
-      outputScalarCount += SE->GetRows() * SE->GetCols();
+  if (IsGs) {
+    unsigned MaxVertexCount = Props.ShaderProps.GS.maxVertexCount;
+    unsigned OutputScalarCount = 0;
+    const DxilSignature &OutSig = S.OutputSignature;
+    for (auto &SE : OutSig.GetElements()) {
+      OutputScalarCount += SE->GetRows() * SE->GetCols();
     }
-    unsigned totalOutputScalars = maxVertexCount * outputScalarCount;
-    if (totalOutputScalars > DXIL::kMaxGSOutputTotalScalars) {
+    unsigned TotalOutputScalars = MaxVertexCount * OutputScalarCount;
+    if (TotalOutputScalars > DXIL::kMaxGSOutputTotalScalars) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmGSTotalOutputVertexDataRange,
-          {std::to_string(maxVertexCount), std::to_string(outputScalarCount),
-           std::to_string(totalOutputScalars),
+          {std::to_string(MaxVertexCount), std::to_string(OutputScalarCount),
+           std::to_string(TotalOutputScalars),
            std::to_string(DXIL::kMaxGSOutputTotalScalars)});
     }
   }
 
-  if (isCS) {
+  if (IsCs) {
     if (!S.InputSignature.GetElements().empty() ||
         !S.OutputSignature.GetElements().empty() ||
         !S.PatchConstOrPrimSignature.GetElements().empty()) {
@@ -4806,7 +4820,7 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
   }
 
-  if (isMS) {
+  if (IsMs) {
     unsigned VertexSignatureRows = S.OutputSignature.GetRowCount();
     if (VertexSignatureRows > DXIL::kMaxMSVSigRows) {
       ValCtx.EmitFnFormatError(
@@ -4828,31 +4842,31 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
 
     const unsigned kScalarSizeForMSAttributes = 4;
 #define ALIGN32(n) (((n) + 31) & ~31)
-    unsigned maxAlign32VertexCount =
-        ALIGN32(props.ShaderProps.MS.maxVertexCount);
-    unsigned maxAlign32PrimitiveCount =
-        ALIGN32(props.ShaderProps.MS.maxPrimitiveCount);
-    unsigned totalOutputScalars = 0;
+    unsigned MaxAlign32VertexCount =
+        ALIGN32(Props.ShaderProps.MS.maxVertexCount);
+    unsigned MaxAlign32PrimitiveCount =
+        ALIGN32(Props.ShaderProps.MS.maxPrimitiveCount);
+    unsigned TotalOutputScalars = 0;
     for (auto &SE : S.OutputSignature.GetElements()) {
-      totalOutputScalars +=
-          SE->GetRows() * SE->GetCols() * maxAlign32VertexCount;
+      TotalOutputScalars +=
+          SE->GetRows() * SE->GetCols() * MaxAlign32VertexCount;
     }
     for (auto &SE : S.PatchConstOrPrimSignature.GetElements()) {
-      totalOutputScalars +=
-          SE->GetRows() * SE->GetCols() * maxAlign32PrimitiveCount;
+      TotalOutputScalars +=
+          SE->GetRows() * SE->GetCols() * MaxAlign32PrimitiveCount;
     }
 
-    if (totalOutputScalars * kScalarSizeForMSAttributes >
+    if (TotalOutputScalars * kScalarSizeForMSAttributes >
         DXIL::kMaxMSOutputTotalBytes) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmMeshShaderOutputSize,
           {F.getName(), std::to_string(DXIL::kMaxMSOutputTotalBytes)});
     }
 
-    unsigned totalInputOutputBytes =
-        totalOutputScalars * kScalarSizeForMSAttributes +
-        props.ShaderProps.MS.payloadSizeInBytes;
-    if (totalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) {
+    unsigned TotalInputOutputBytes =
+        TotalOutputScalars * kScalarSizeForMSAttributes +
+        Props.ShaderProps.MS.payloadSizeInBytes;
+    if (TotalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmMeshShaderInOutSize,
           {F.getName(), std::to_string(DXIL::kMaxMSInputOutputTotalBytes)});
@@ -4865,9 +4879,9 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) {
   if (ValCtx.isLibProfile) {
     for (Function &F : DM.GetModule()->functions()) {
       if (DM.HasDxilEntryProps(&F)) {
-        DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F);
+        DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F);
         EntryStatus &Status = ValCtx.GetEntryStatus(&F);
-        ValidateEntrySignatures(ValCtx, entryProps, Status, F);
+        ValidateEntrySignatures(ValCtx, EntryProps, Status, F);
       }
     }
   } else {
@@ -4878,8 +4892,8 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) {
       return;
     }
     EntryStatus &Status = ValCtx.GetEntryStatus(Entry);
-    DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry);
-    ValidateEntrySignatures(ValCtx, entryProps, Status, *Entry);
+    DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry);
+    ValidateEntrySignatures(ValCtx, EntryProps, Status, *Entry);
   }
 }
 
@@ -4888,14 +4902,14 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) {
 struct CompatibilityChecker {
   ValidationContext &ValCtx;
   Function *EntryFn;
-  const DxilFunctionProps &props;
-  DXIL::ShaderKind shaderKind;
+  const DxilFunctionProps &Props;
+  DXIL::ShaderKind ShaderKind;
 
   // These masks identify the potential conflict flags based on the entry
   // function's shader kind and properties when either UsesDerivatives or
   // RequiresGroup flags are set in ShaderCompatInfo.
-  uint32_t maskForDeriv = 0;
-  uint32_t maskForGroup = 0;
+  uint32_t MaskForDeriv = 0;
+  uint32_t MaskForGroup = 0;
 
   enum class ConflictKind : uint32_t {
     Stage,
@@ -4917,77 +4931,77 @@ struct CompatibilityChecker {
 
   CompatibilityChecker(ValidationContext &ValCtx, Function *EntryFn)
       : ValCtx(ValCtx), EntryFn(EntryFn),
-        props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props),
-        shaderKind(props.shaderKind) {
+        Props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props),
+        ShaderKind(Props.shaderKind) {
 
     // Precompute potential incompatibilities based on shader stage, shader kind
     // and entry attributes. These will turn into full conflicts if the entry
     // point's shader flags indicate that they use relevant features.
     if (!ValCtx.DxilMod.GetShaderModel()->IsSM66Plus() &&
-        (shaderKind == DXIL::ShaderKind::Mesh ||
-         shaderKind == DXIL::ShaderKind::Amplification ||
-         shaderKind == DXIL::ShaderKind::Compute)) {
-      maskForDeriv |=
+        (ShaderKind == DXIL::ShaderKind::Mesh ||
+         ShaderKind == DXIL::ShaderKind::Amplification ||
+         ShaderKind == DXIL::ShaderKind::Compute)) {
+      MaskForDeriv |=
           static_cast<uint32_t>(ConflictFlags::DerivInComputeShaderModel);
-    } else if (shaderKind == DXIL::ShaderKind::Node) {
+    } else if (ShaderKind == DXIL::ShaderKind::Node) {
       // Only broadcasting launch supports derivatives.
-      if (props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting)
-        maskForDeriv |= static_cast<uint32_t>(ConflictFlags::DerivLaunch);
+      if (Props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting)
+        MaskForDeriv |= static_cast<uint32_t>(ConflictFlags::DerivLaunch);
       // Thread launch node has no group.
-      if (props.Node.LaunchType == DXIL::NodeLaunchType::Thread)
-        maskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
+      if (Props.Node.LaunchType == DXIL::NodeLaunchType::Thread)
+        MaskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
     }
 
-    if (shaderKind == DXIL::ShaderKind::Mesh ||
-        shaderKind == DXIL::ShaderKind::Amplification ||
-        shaderKind == DXIL::ShaderKind::Compute ||
-        shaderKind == DXIL::ShaderKind::Node) {
+    if (ShaderKind == DXIL::ShaderKind::Mesh ||
+        ShaderKind == DXIL::ShaderKind::Amplification ||
+        ShaderKind == DXIL::ShaderKind::Compute ||
+        ShaderKind == DXIL::ShaderKind::Node) {
       // All compute-like stages
       // Thread dimensions must be either 1D and X is multiple of 4, or 2D
       // and X and Y must be multiples of 2.
-      if (props.numThreads[1] == 1 && props.numThreads[2] == 1) {
-        if ((props.numThreads[0] & 0x3) != 0)
-          maskForDeriv |=
+      if (Props.numThreads[1] == 1 && Props.numThreads[2] == 1) {
+        if ((Props.numThreads[0] & 0x3) != 0)
+          MaskForDeriv |=
               static_cast<uint32_t>(ConflictFlags::DerivThreadGroupDim);
-      } else if ((props.numThreads[0] & 0x1) || (props.numThreads[1] & 0x1))
-        maskForDeriv |=
+      } else if ((Props.numThreads[0] & 0x1) || (Props.numThreads[1] & 0x1))
+        MaskForDeriv |=
             static_cast<uint32_t>(ConflictFlags::DerivThreadGroupDim);
     } else {
       // other stages have no group
-      maskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
+      MaskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
     }
   }
 
   uint32_t
-  IdentifyConflict(const DxilModule::ShaderCompatInfo &compatInfo) const {
-    uint32_t conflictMask = 0;
+  IdentifyConflict(const DxilModule::ShaderCompatInfo &CompatInfo) const {
+    uint32_t ConflictMask = 0;
 
     // Compatibility check said this shader kind is not compatible.
-    if (0 == ((1 << (uint32_t)shaderKind) & compatInfo.mask))
-      conflictMask |= (uint32_t)ConflictFlags::Stage;
+    if (0 == ((1 << (uint32_t)ShaderKind) & CompatInfo.mask))
+      ConflictMask |= (uint32_t)ConflictFlags::Stage;
 
     // Compatibility check said this shader model is not compatible.
     if (DXIL::CompareVersions(ValCtx.DxilMod.GetShaderModel()->GetMajor(),
                               ValCtx.DxilMod.GetShaderModel()->GetMinor(),
-                              compatInfo.minMajor, compatInfo.minMinor) < 0)
-      conflictMask |= (uint32_t)ConflictFlags::ShaderModel;
+                              CompatInfo.minMajor, CompatInfo.minMinor) < 0)
+      ConflictMask |= (uint32_t)ConflictFlags::ShaderModel;
 
-    if (compatInfo.shaderFlags.GetUsesDerivatives())
-      conflictMask |= maskForDeriv;
+    if (CompatInfo.shaderFlags.GetUsesDerivatives())
+      ConflictMask |= MaskForDeriv;
 
-    if (compatInfo.shaderFlags.GetRequiresGroup())
-      conflictMask |= maskForGroup;
+    if (CompatInfo.shaderFlags.GetRequiresGroup())
+      ConflictMask |= MaskForGroup;
 
-    return conflictMask;
+    return ConflictMask;
   }
 
-  void Diagnose(Function *F, uint32_t conflictMask, ConflictKind conflict,
-                ValidationRule rule, ArrayRef<StringRef> args = {}) {
-    if (conflictMask & (1 << (unsigned)conflict))
-      ValCtx.EmitFnFormatError(F, rule, args);
+  void Diagnose(Function *F, uint32_t ConflictMask, ConflictKind Conflict,
+                ValidationRule Rule, ArrayRef<StringRef> Args = {}) {
+    if (ConflictMask & (1 << (unsigned)Conflict))
+      ValCtx.EmitFnFormatError(F, Rule, Args);
   }
 
-  void DiagnoseConflicts(Function *F, uint32_t conflictMask) {
+  void DiagnoseConflicts(Function *F, uint32_t ConflictMask) {
     // Emit a diagnostic indicating that either the entry function or a function
     // called by the entry function contains a disallowed operation.
     if (F == EntryFn)
@@ -4996,22 +5010,22 @@ struct CompatibilityChecker {
       ValCtx.EmitFnError(EntryFn, ValidationRule::SmIncompatibleCallInEntry);
 
     // Emit diagnostics for each conflict found in this function.
-    Diagnose(F, conflictMask, ConflictKind::Stage,
+    Diagnose(F, ConflictMask, ConflictKind::Stage,
              ValidationRule::SmIncompatibleStage,
-             {ShaderModel::GetKindName(props.shaderKind)});
-    Diagnose(F, conflictMask, ConflictKind::ShaderModel,
+             {ShaderModel::GetKindName(Props.shaderKind)});
+    Diagnose(F, ConflictMask, ConflictKind::ShaderModel,
              ValidationRule::SmIncompatibleShaderModel);
-    Diagnose(F, conflictMask, ConflictKind::DerivLaunch,
+    Diagnose(F, ConflictMask, ConflictKind::DerivLaunch,
              ValidationRule::SmIncompatibleDerivLaunch,
-             {GetLaunchTypeStr(props.Node.LaunchType)});
-    Diagnose(F, conflictMask, ConflictKind::DerivThreadGroupDim,
+             {GetLaunchTypeStr(Props.Node.LaunchType)});
+    Diagnose(F, ConflictMask, ConflictKind::DerivThreadGroupDim,
              ValidationRule::SmIncompatibleThreadGroupDim,
-             {std::to_string(props.numThreads[0]),
-              std::to_string(props.numThreads[1]),
-              std::to_string(props.numThreads[2])});
-    Diagnose(F, conflictMask, ConflictKind::DerivInComputeShaderModel,
+             {std::to_string(Props.numThreads[0]),
+              std::to_string(Props.numThreads[1]),
+              std::to_string(Props.numThreads[2])});
+    Diagnose(F, ConflictMask, ConflictKind::DerivInComputeShaderModel,
              ValidationRule::SmIncompatibleDerivInComputeShaderModel);
-    Diagnose(F, conflictMask, ConflictKind::RequiresGroup,
+    Diagnose(F, ConflictMask, ConflictKind::RequiresGroup,
              ValidationRule::SmIncompatibleRequiresGroup);
   }
 
@@ -5020,59 +5034,59 @@ struct CompatibilityChecker {
   // functions called by that function introduced the conflict.
   // In those cases, the called functions themselves will emit the diagnostic.
   // Return conflict mask for this function.
-  uint32_t Visit(Function *F, uint32_t &remainingMask,
-                 llvm::SmallPtrSet<Function *, 8> &visited, CallGraph &CG) {
+  uint32_t Visit(Function *F, uint32_t &RemainingMask,
+                 llvm::SmallPtrSet<Function *, 8> &Visited, CallGraph &CG) {
     // Recursive check looks for where a conflict is found and not present
     // in functions called by the current function.
     // - When a source is found, emit diagnostics and clear the conflict
     // flags introduced by this function from the working mask so we don't
     // report this conflict again.
-    // - When the remainingMask is 0, we are done.
+    // - When the RemainingMask is 0, we are done.
 
-    if (remainingMask == 0)
+    if (RemainingMask == 0)
       return 0; // Nothing left to search for.
-    if (!visited.insert(F).second)
+    if (!Visited.insert(F).second)
       return 0; // Already visited.
 
-    const DxilModule::ShaderCompatInfo *compatInfo =
+    const DxilModule::ShaderCompatInfo *CompatInfo =
         ValCtx.DxilMod.GetCompatInfoForFunction(F);
-    DXASSERT(compatInfo, "otherwise, compat info not computed in module");
-    if (!compatInfo)
+    DXASSERT(CompatInfo, "otherwise, compat info not computed in module");
+    if (!CompatInfo)
       return 0;
-    uint32_t maskForThisFunction = IdentifyConflict(*compatInfo);
+    uint32_t MaskForThisFunction = IdentifyConflict(*CompatInfo);
 
-    uint32_t maskForCalls = 0;
+    uint32_t MaskForCalls = 0;
     if (CallGraphNode *CGNode = CG[F]) {
       for (auto &Call : *CGNode) {
         Function *called = Call.second->getFunction();
         if (called->isDeclaration())
           continue;
-        maskForCalls |= Visit(called, remainingMask, visited, CG);
-        if (remainingMask == 0)
+        MaskForCalls |= Visit(called, RemainingMask, Visited, CG);
+        if (RemainingMask == 0)
           return 0; // Nothing left to search for.
       }
     }
 
     // Mask of incompatibilities introduced by this function.
-    uint32_t conflictsIntroduced =
-        remainingMask & maskForThisFunction & ~maskForCalls;
-    if (conflictsIntroduced) {
+    uint32_t ConflictsIntroduced =
+        RemainingMask & MaskForThisFunction & ~MaskForCalls;
+    if (ConflictsIntroduced) {
       // This function introduces at least one conflict.
-      DiagnoseConflicts(F, conflictsIntroduced);
+      DiagnoseConflicts(F, ConflictsIntroduced);
       // Mask off diagnosed incompatibilities.
-      remainingMask &= ~conflictsIntroduced;
+      RemainingMask &= ~ConflictsIntroduced;
     }
-    return maskForThisFunction;
+    return MaskForThisFunction;
   }
 
-  void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &compatInfo) {
-    uint32_t conflictMask = IdentifyConflict(compatInfo);
-    if (conflictMask == 0)
+  void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &CompatInfo) {
+    uint32_t ConflictMask = IdentifyConflict(CompatInfo);
+    if (ConflictMask == 0)
       return;
 
     CallGraph &CG = ValCtx.GetCallGraph();
-    llvm::SmallPtrSet<Function *, 8> visited;
-    Visit(EntryFn, conflictMask, visited, CG);
+    llvm::SmallPtrSet<Function *, 8> Visited;
+    Visit(EntryFn, ConflictMask, Visited, CG);
   }
 };
 
@@ -5081,14 +5095,14 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) {
   DxilModule &DM = ValCtx.DxilMod;
   for (Function &F : DM.GetModule()->functions()) {
     if (DM.HasDxilEntryProps(&F)) {
-      const DxilModule::ShaderCompatInfo *compatInfo =
+      const DxilModule::ShaderCompatInfo *CompatInfo =
           DM.GetCompatInfoForFunction(&F);
-      DXASSERT(compatInfo, "otherwise, compat info not computed in module");
-      if (!compatInfo)
+      DXASSERT(CompatInfo, "otherwise, compat info not computed in module");
+      if (!CompatInfo)
         continue;
 
       CompatibilityChecker checker(ValCtx, &F);
-      checker.FindIncompatibleCall(*compatInfo);
+      checker.FindIncompatibleCall(*CompatInfo);
     }
   }
 }
@@ -5096,101 +5110,101 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) {
 static void CheckPatchConstantSemantic(ValidationContext &ValCtx,
                                        const DxilEntryProps &EntryProps,
                                        EntryStatus &Status, Function *F) {
-  const DxilFunctionProps &props = EntryProps.props;
-  bool isHS = props.IsHS();
+  const DxilFunctionProps &Props = EntryProps.props;
+  bool IsHs = Props.IsHS();
 
-  DXIL::TessellatorDomain domain =
-      isHS ? props.ShaderProps.HS.domain : props.ShaderProps.DS.domain;
+  DXIL::TessellatorDomain Domain =
+      IsHs ? Props.ShaderProps.HS.domain : Props.ShaderProps.DS.domain;
 
-  const DxilSignature &patchConstantSig =
+  const DxilSignature &PatchConstantSig =
       EntryProps.sig.PatchConstOrPrimSignature;
 
-  const unsigned kQuadEdgeSize = 4;
-  const unsigned kQuadInsideSize = 2;
-  const unsigned kQuadDomainLocSize = 2;
+  const unsigned KQuadEdgeSize = 4;
+  const unsigned KQuadInsideSize = 2;
+  const unsigned KQuadDomainLocSize = 2;
 
-  const unsigned kTriEdgeSize = 3;
-  const unsigned kTriInsideSize = 1;
-  const unsigned kTriDomainLocSize = 3;
+  const unsigned KTriEdgeSize = 3;
+  const unsigned KTriInsideSize = 1;
+  const unsigned KTriDomainLocSize = 3;
 
-  const unsigned kIsolineEdgeSize = 2;
-  const unsigned kIsolineInsideSize = 0;
-  const unsigned kIsolineDomainLocSize = 3;
+  const unsigned KIsolineEdgeSize = 2;
+  const unsigned KIsolineInsideSize = 0;
+  const unsigned KIsolineDomainLocSize = 3;
 
-  const char *domainName = "";
+  const char *DomainName = "";
 
   DXIL::SemanticKind kEdgeSemantic = DXIL::SemanticKind::TessFactor;
-  unsigned edgeSize = 0;
+  unsigned EdgeSize = 0;
 
   DXIL::SemanticKind kInsideSemantic = DXIL::SemanticKind::InsideTessFactor;
-  unsigned insideSize = 0;
+  unsigned InsideSize = 0;
 
   Status.domainLocSize = 0;
 
-  switch (domain) {
+  switch (Domain) {
   case DXIL::TessellatorDomain::IsoLine:
-    domainName = "IsoLine";
-    edgeSize = kIsolineEdgeSize;
-    insideSize = kIsolineInsideSize;
-    Status.domainLocSize = kIsolineDomainLocSize;
+    DomainName = "IsoLine";
+    EdgeSize = KIsolineEdgeSize;
+    InsideSize = KIsolineInsideSize;
+    Status.domainLocSize = KIsolineDomainLocSize;
     break;
   case DXIL::TessellatorDomain::Tri:
-    domainName = "Tri";
-    edgeSize = kTriEdgeSize;
-    insideSize = kTriInsideSize;
-    Status.domainLocSize = kTriDomainLocSize;
+    DomainName = "Tri";
+    EdgeSize = KTriEdgeSize;
+    InsideSize = KTriInsideSize;
+    Status.domainLocSize = KTriDomainLocSize;
     break;
   case DXIL::TessellatorDomain::Quad:
-    domainName = "Quad";
-    edgeSize = kQuadEdgeSize;
-    insideSize = kQuadInsideSize;
-    Status.domainLocSize = kQuadDomainLocSize;
+    DomainName = "Quad";
+    EdgeSize = KQuadEdgeSize;
+    InsideSize = KQuadInsideSize;
+    Status.domainLocSize = KQuadDomainLocSize;
     break;
   default:
     // Don't bother with other tests if domain is invalid
     return;
   }
 
-  bool bFoundEdgeSemantic = false;
-  bool bFoundInsideSemantic = false;
-  for (auto &SE : patchConstantSig.GetElements()) {
-    Semantic::Kind kind = SE->GetSemantic()->GetKind();
-    if (kind == kEdgeSemantic) {
-      bFoundEdgeSemantic = true;
-      if (SE->GetRows() != edgeSize || SE->GetCols() > 1) {
+  bool FoundEdgeSemantic = false;
+  bool FoundInsideSemantic = false;
+  for (auto &SE : PatchConstantSig.GetElements()) {
+    Semantic::Kind Kind = SE->GetSemantic()->GetKind();
+    if (Kind == kEdgeSemantic) {
+      FoundEdgeSemantic = true;
+      if (SE->GetRows() != EdgeSize || SE->GetCols() > 1) {
         ValCtx.EmitFnFormatError(F, ValidationRule::SmTessFactorSizeMatchDomain,
                                  {std::to_string(SE->GetRows()),
-                                  std::to_string(SE->GetCols()), domainName,
-                                  std::to_string(edgeSize)});
+                                  std::to_string(SE->GetCols()), DomainName,
+                                  std::to_string(EdgeSize)});
       }
-    } else if (kind == kInsideSemantic) {
-      bFoundInsideSemantic = true;
-      if (SE->GetRows() != insideSize || SE->GetCols() > 1) {
+    } else if (Kind == kInsideSemantic) {
+      FoundInsideSemantic = true;
+      if (SE->GetRows() != InsideSize || SE->GetCols() > 1) {
         ValCtx.EmitFnFormatError(
             F, ValidationRule::SmInsideTessFactorSizeMatchDomain,
             {std::to_string(SE->GetRows()), std::to_string(SE->GetCols()),
-             domainName, std::to_string(insideSize)});
+             DomainName, std::to_string(InsideSize)});
       }
     }
   }
 
-  if (isHS) {
-    if (!bFoundEdgeSemantic) {
+  if (IsHs) {
+    if (!FoundEdgeSemantic) {
       ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain);
     }
-    if (!bFoundInsideSemantic && domain != DXIL::TessellatorDomain::IsoLine) {
+    if (!FoundInsideSemantic && Domain != DXIL::TessellatorDomain::IsoLine) {
       ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain);
     }
   }
 }
 
 static void ValidatePassThruHS(ValidationContext &ValCtx,
-                               const DxilEntryProps &entryProps, Function *F) {
+                               const DxilEntryProps &EntryProps, Function *F) {
   // Check pass thru HS.
   if (F->isDeclaration()) {
-    const auto &props = entryProps.props;
-    if (props.IsHS()) {
-      const auto &HS = props.ShaderProps.HS;
+    const auto &Props = EntryProps.props;
+    if (Props.IsHS()) {
+      const auto &HS = Props.ShaderProps.HS;
       if (HS.inputControlPoints < HS.outputControlPoints) {
         ValCtx.EmitFnError(
             F, ValidationRule::SmHullPassThruControlPointCountMatch);
@@ -5198,12 +5212,12 @@ static void ValidatePassThruHS(ValidationContext &ValCtx,
 
       // Check declared control point outputs storage amounts are ok to pass
       // through (less output storage than input for control points).
-      const DxilSignature &outSig = entryProps.sig.OutputSignature;
-      unsigned totalOutputCPScalars = 0;
-      for (auto &SE : outSig.GetElements()) {
-        totalOutputCPScalars += SE->GetRows() * SE->GetCols();
+      const DxilSignature &OutSig = EntryProps.sig.OutputSignature;
+      unsigned TotalOutputCpScalars = 0;
+      for (auto &SE : OutSig.GetElements()) {
+        TotalOutputCpScalars += SE->GetRows() * SE->GetCols();
       }
-      if (totalOutputCPScalars * HS.outputControlPoints >
+      if (TotalOutputCpScalars * HS.outputControlPoints >
           DXIL::kMaxHSOutputControlPointsTotalScalars) {
         ValCtx.EmitFnError(F,
                            ValidationRule::SmOutputControlPointsTotalScalars);
@@ -5218,35 +5232,35 @@ static void ValidatePassThruHS(ValidationContext &ValCtx,
 // validate wave size (currently allowed only on CS and node shaders but might
 // be supported on other shader types in the future)
 static void ValidateWaveSize(ValidationContext &ValCtx,
-                             const DxilEntryProps &entryProps, Function *F) {
-  const DxilFunctionProps &props = entryProps.props;
-  const hlsl::DxilWaveSize &waveSize = props.WaveSize;
+                             const DxilEntryProps &EntryProps, Function *F) {
+  const DxilFunctionProps &Props = EntryProps.props;
+  const hlsl::DxilWaveSize &WaveSize = Props.WaveSize;
 
-  switch (waveSize.Validate()) {
+  switch (WaveSize.Validate()) {
   case hlsl::DxilWaveSize::ValidationResult::Success:
     break;
   case hlsl::DxilWaveSize::ValidationResult::InvalidMin:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue,
-                             {"Min", std::to_string(waveSize.Min),
+                             {"Min", std::to_string(WaveSize.Min),
                               std::to_string(DXIL::kMinWaveSize),
                               std::to_string(DXIL::kMaxWaveSize)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::InvalidMax:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue,
-                             {"Max", std::to_string(waveSize.Max),
+                             {"Max", std::to_string(WaveSize.Max),
                               std::to_string(DXIL::kMinWaveSize),
                               std::to_string(DXIL::kMaxWaveSize)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::InvalidPreferred:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue,
-                             {"Preferred", std::to_string(waveSize.Preferred),
+                             {"Preferred", std::to_string(WaveSize.Preferred),
                               std::to_string(DXIL::kMinWaveSize),
                               std::to_string(DXIL::kMaxWaveSize)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::MaxOrPreferredWhenUndefined:
     ValCtx.EmitFnFormatError(
         F, ValidationRule::SmWaveSizeAllZeroWhenUndefined,
-        {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)});
+        {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::MaxEqualsMin:
     // This case is allowed because users may disable the ErrorDefault warning.
@@ -5254,227 +5268,227 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
   case hlsl::DxilWaveSize::ValidationResult::PreferredWhenNoRange:
     ValCtx.EmitFnFormatError(
         F, ValidationRule::SmWaveSizeMaxAndPreferredZeroWhenNoRange,
-        {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)});
+        {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::MaxLessThanMin:
     ValCtx.EmitFnFormatError(
         F, ValidationRule::SmWaveSizeMaxGreaterThanMin,
-        {std::to_string(waveSize.Max), std::to_string(waveSize.Min)});
+        {std::to_string(WaveSize.Max), std::to_string(WaveSize.Min)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::PreferredOutOfRange:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizePreferredInRange,
-                             {std::to_string(waveSize.Preferred),
-                              std::to_string(waveSize.Min),
-                              std::to_string(waveSize.Max)});
+                             {std::to_string(WaveSize.Preferred),
+                              std::to_string(WaveSize.Min),
+                              std::to_string(WaveSize.Max)});
     break;
   }
 
   // Check shader model and kind.
-  if (waveSize.IsDefined()) {
-    if (!props.IsCS() && !props.IsNode()) {
+  if (WaveSize.IsDefined()) {
+    if (!Props.IsCS() && !Props.IsNode()) {
       ValCtx.EmitFnError(F, ValidationRule::SmWaveSizeOnComputeOrNode);
     }
   }
 }
 
 static void ValidateEntryProps(ValidationContext &ValCtx,
-                               const DxilEntryProps &entryProps,
+                               const DxilEntryProps &EntryProps,
                                EntryStatus &Status, Function *F) {
-  const DxilFunctionProps &props = entryProps.props;
-  DXIL::ShaderKind ShaderType = props.shaderKind;
+  const DxilFunctionProps &Props = EntryProps.props;
+  DXIL::ShaderKind ShaderType = Props.shaderKind;
 
-  ValidateWaveSize(ValCtx, entryProps, F);
+  ValidateWaveSize(ValCtx, EntryProps, F);
 
-  if (ShaderType == DXIL::ShaderKind::Compute || props.IsNode()) {
-    unsigned x = props.numThreads[0];
-    unsigned y = props.numThreads[1];
-    unsigned z = props.numThreads[2];
+  if (ShaderType == DXIL::ShaderKind::Compute || Props.IsNode()) {
+    unsigned X = Props.numThreads[0];
+    unsigned Y = Props.numThreads[1];
+    unsigned Z = Props.numThreads[2];
 
-    unsigned threadsInGroup = x * y * z;
+    unsigned ThreadsInGroup = X * Y * Z;
 
-    if ((x < DXIL::kMinCSThreadGroupX) || (x > DXIL::kMaxCSThreadGroupX)) {
+    if ((X < DXIL::kMinCSThreadGroupX) || (X > DXIL::kMaxCSThreadGroupX)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"X", std::to_string(x),
+                               {"X", std::to_string(X),
                                 std::to_string(DXIL::kMinCSThreadGroupX),
                                 std::to_string(DXIL::kMaxCSThreadGroupX)});
     }
-    if ((y < DXIL::kMinCSThreadGroupY) || (y > DXIL::kMaxCSThreadGroupY)) {
+    if ((Y < DXIL::kMinCSThreadGroupY) || (Y > DXIL::kMaxCSThreadGroupY)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Y", std::to_string(y),
+                               {"Y", std::to_string(Y),
                                 std::to_string(DXIL::kMinCSThreadGroupY),
                                 std::to_string(DXIL::kMaxCSThreadGroupY)});
     }
-    if ((z < DXIL::kMinCSThreadGroupZ) || (z > DXIL::kMaxCSThreadGroupZ)) {
+    if ((Z < DXIL::kMinCSThreadGroupZ) || (Z > DXIL::kMaxCSThreadGroupZ)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Z", std::to_string(z),
+                               {"Z", std::to_string(Z),
                                 std::to_string(DXIL::kMinCSThreadGroupZ),
                                 std::to_string(DXIL::kMaxCSThreadGroupZ)});
     }
 
-    if (threadsInGroup > DXIL::kMaxCSThreadsPerGroup) {
+    if (ThreadsInGroup > DXIL::kMaxCSThreadsPerGroup) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup,
-                               {std::to_string(threadsInGroup),
+                               {std::to_string(ThreadsInGroup),
                                 std::to_string(DXIL::kMaxCSThreadsPerGroup)});
     }
 
-    // type of threadID, thread group ID take care by DXIL operation overload
+    // type of ThreadID, thread group ID take care by DXIL operation overload
     // check.
   } else if (ShaderType == DXIL::ShaderKind::Mesh) {
-    const auto &MS = props.ShaderProps.MS;
-    unsigned x = props.numThreads[0];
-    unsigned y = props.numThreads[1];
-    unsigned z = props.numThreads[2];
+    const auto &MS = Props.ShaderProps.MS;
+    unsigned X = Props.numThreads[0];
+    unsigned Y = Props.numThreads[1];
+    unsigned Z = Props.numThreads[2];
 
-    unsigned threadsInGroup = x * y * z;
+    unsigned ThreadsInGroup = X * Y * Z;
 
-    if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) {
+    if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"X", std::to_string(x),
+                               {"X", std::to_string(X),
                                 std::to_string(DXIL::kMinMSASThreadGroupX),
                                 std::to_string(DXIL::kMaxMSASThreadGroupX)});
     }
-    if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) {
+    if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Y", std::to_string(y),
+                               {"Y", std::to_string(Y),
                                 std::to_string(DXIL::kMinMSASThreadGroupY),
                                 std::to_string(DXIL::kMaxMSASThreadGroupY)});
     }
-    if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) {
+    if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Z", std::to_string(z),
+                               {"Z", std::to_string(Z),
                                 std::to_string(DXIL::kMinMSASThreadGroupZ),
                                 std::to_string(DXIL::kMaxMSASThreadGroupZ)});
     }
 
-    if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
+    if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup,
-                               {std::to_string(threadsInGroup),
+                               {std::to_string(ThreadsInGroup),
                                 std::to_string(DXIL::kMaxMSASThreadsPerGroup)});
     }
 
-    // type of threadID, thread group ID take care by DXIL operation overload
+    // type of ThreadID, thread group ID take care by DXIL operation overload
     // check.
 
-    unsigned maxVertexCount = MS.maxVertexCount;
-    if (maxVertexCount > DXIL::kMaxMSOutputVertexCount) {
+    unsigned MaxVertexCount = MS.maxVertexCount;
+    if (MaxVertexCount > DXIL::kMaxMSOutputVertexCount) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMeshShaderMaxVertexCount,
                                {std::to_string(DXIL::kMaxMSOutputVertexCount),
-                                std::to_string(maxVertexCount)});
+                                std::to_string(MaxVertexCount)});
     }
 
-    unsigned maxPrimitiveCount = MS.maxPrimitiveCount;
-    if (maxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) {
+    unsigned MaxPrimitiveCount = MS.maxPrimitiveCount;
+    if (MaxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmMeshShaderMaxPrimitiveCount,
           {std::to_string(DXIL::kMaxMSOutputPrimitiveCount),
-           std::to_string(maxPrimitiveCount)});
+           std::to_string(MaxPrimitiveCount)});
     }
   } else if (ShaderType == DXIL::ShaderKind::Amplification) {
-    unsigned x = props.numThreads[0];
-    unsigned y = props.numThreads[1];
-    unsigned z = props.numThreads[2];
+    unsigned X = Props.numThreads[0];
+    unsigned Y = Props.numThreads[1];
+    unsigned Z = Props.numThreads[2];
 
-    unsigned threadsInGroup = x * y * z;
+    unsigned ThreadsInGroup = X * Y * Z;
 
-    if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) {
+    if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"X", std::to_string(x),
+                               {"X", std::to_string(X),
                                 std::to_string(DXIL::kMinMSASThreadGroupX),
                                 std::to_string(DXIL::kMaxMSASThreadGroupX)});
     }
-    if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) {
+    if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Y", std::to_string(y),
+                               {"Y", std::to_string(Y),
                                 std::to_string(DXIL::kMinMSASThreadGroupY),
                                 std::to_string(DXIL::kMaxMSASThreadGroupY)});
     }
-    if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) {
+    if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Z", std::to_string(z),
+                               {"Z", std::to_string(Z),
                                 std::to_string(DXIL::kMinMSASThreadGroupZ),
                                 std::to_string(DXIL::kMaxMSASThreadGroupZ)});
     }
 
-    if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
+    if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup,
-                               {std::to_string(threadsInGroup),
+                               {std::to_string(ThreadsInGroup),
                                 std::to_string(DXIL::kMaxMSASThreadsPerGroup)});
     }
 
-    // type of threadID, thread group ID take care by DXIL operation overload
+    // type of ThreadID, thread group ID take care by DXIL operation overload
     // check.
   } else if (ShaderType == DXIL::ShaderKind::Domain) {
-    const auto &DS = props.ShaderProps.DS;
-    DXIL::TessellatorDomain domain = DS.domain;
-    if (domain >= DXIL::TessellatorDomain::LastEntry)
-      domain = DXIL::TessellatorDomain::Undefined;
-    unsigned inputControlPointCount = DS.inputControlPoints;
+    const auto &DS = Props.ShaderProps.DS;
+    DXIL::TessellatorDomain Domain = DS.domain;
+    if (Domain >= DXIL::TessellatorDomain::LastEntry)
+      Domain = DXIL::TessellatorDomain::Undefined;
+    unsigned InputControlPointCount = DS.inputControlPoints;
 
-    if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
+    if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmDSInputControlPointCountRange,
           {std::to_string(DXIL::kMaxIAPatchControlPointCount),
-           std::to_string(inputControlPointCount)});
+           std::to_string(InputControlPointCount)});
     }
-    if (domain == DXIL::TessellatorDomain::Undefined) {
+    if (Domain == DXIL::TessellatorDomain::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::SmValidDomain);
     }
-    CheckPatchConstantSemantic(ValCtx, entryProps, Status, F);
+    CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F);
   } else if (ShaderType == DXIL::ShaderKind::Hull) {
-    const auto &HS = props.ShaderProps.HS;
-    DXIL::TessellatorDomain domain = HS.domain;
-    if (domain >= DXIL::TessellatorDomain::LastEntry)
-      domain = DXIL::TessellatorDomain::Undefined;
-    unsigned inputControlPointCount = HS.inputControlPoints;
-    if (inputControlPointCount == 0) {
-      const DxilSignature &inputSig = entryProps.sig.InputSignature;
-      if (!inputSig.GetElements().empty()) {
+    const auto &HS = Props.ShaderProps.HS;
+    DXIL::TessellatorDomain Domain = HS.domain;
+    if (Domain >= DXIL::TessellatorDomain::LastEntry)
+      Domain = DXIL::TessellatorDomain::Undefined;
+    unsigned InputControlPointCount = HS.inputControlPoints;
+    if (InputControlPointCount == 0) {
+      const DxilSignature &InputSig = EntryProps.sig.InputSignature;
+      if (!InputSig.GetElements().empty()) {
         ValCtx.EmitFnError(F,
                            ValidationRule::SmZeroHSInputControlPointWithInput);
       }
-    } else if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
+    } else if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmHSInputControlPointCountRange,
           {std::to_string(DXIL::kMaxIAPatchControlPointCount),
-           std::to_string(inputControlPointCount)});
+           std::to_string(InputControlPointCount)});
     }
 
-    unsigned outputControlPointCount = HS.outputControlPoints;
-    if (outputControlPointCount < DXIL::kMinIAPatchControlPointCount ||
-        outputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
+    unsigned OutputControlPointCount = HS.outputControlPoints;
+    if (OutputControlPointCount < DXIL::kMinIAPatchControlPointCount ||
+        OutputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmOutputControlPointCountRange,
           {std::to_string(DXIL::kMinIAPatchControlPointCount),
            std::to_string(DXIL::kMaxIAPatchControlPointCount),
-           std::to_string(outputControlPointCount)});
+           std::to_string(OutputControlPointCount)});
     }
-    if (domain == DXIL::TessellatorDomain::Undefined) {
+    if (Domain == DXIL::TessellatorDomain::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::SmValidDomain);
     }
-    DXIL::TessellatorPartitioning partition = HS.partition;
-    if (partition == DXIL::TessellatorPartitioning::Undefined) {
+    DXIL::TessellatorPartitioning Partition = HS.partition;
+    if (Partition == DXIL::TessellatorPartitioning::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorPartition);
     }
 
-    DXIL::TessellatorOutputPrimitive tessOutputPrimitive = HS.outputPrimitive;
-    if (tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined ||
-        tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) {
+    DXIL::TessellatorOutputPrimitive TessOutputPrimitive = HS.outputPrimitive;
+    if (TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined ||
+        TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) {
       ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorOutputPrimitive);
     }
 
-    float maxTessFactor = HS.maxTessFactor;
-    if (maxTessFactor < DXIL::kHSMaxTessFactorLowerBound ||
-        maxTessFactor > DXIL::kHSMaxTessFactorUpperBound) {
+    float MaxTessFactor = HS.maxTessFactor;
+    if (MaxTessFactor < DXIL::kHSMaxTessFactorLowerBound ||
+        MaxTessFactor > DXIL::kHSMaxTessFactorUpperBound) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::MetaMaxTessFactor,
           {std::to_string(DXIL::kHSMaxTessFactorLowerBound),
            std::to_string(DXIL::kHSMaxTessFactorUpperBound),
-           std::to_string(maxTessFactor)});
+           std::to_string(MaxTessFactor)});
     }
     // Domain and OutPrimivtive match.
-    switch (domain) {
+    switch (Domain) {
     case DXIL::TessellatorDomain::IsoLine:
-      switch (tessOutputPrimitive) {
+      switch (TessOutputPrimitive) {
       case DXIL::TessellatorOutputPrimitive::TriangleCW:
       case DXIL::TessellatorOutputPrimitive::TriangleCCW:
         ValCtx.EmitFnError(F, ValidationRule::SmIsoLineOutputPrimitiveMismatch);
@@ -5484,7 +5498,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
       }
       break;
     case DXIL::TessellatorDomain::Tri:
-      switch (tessOutputPrimitive) {
+      switch (TessOutputPrimitive) {
       case DXIL::TessellatorOutputPrimitive::Line:
         ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch);
         break;
@@ -5493,7 +5507,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
       }
       break;
     case DXIL::TessellatorDomain::Quad:
-      switch (tessOutputPrimitive) {
+      switch (TessOutputPrimitive) {
       case DXIL::TessellatorOutputPrimitive::Line:
         ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch);
         break;
@@ -5506,39 +5520,39 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
       break;
     }
 
-    CheckPatchConstantSemantic(ValCtx, entryProps, Status, F);
+    CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F);
   } else if (ShaderType == DXIL::ShaderKind::Geometry) {
-    const auto &GS = props.ShaderProps.GS;
-    unsigned maxVertexCount = GS.maxVertexCount;
-    if (maxVertexCount > DXIL::kMaxGSOutputVertexCount) {
+    const auto &GS = Props.ShaderProps.GS;
+    unsigned MaxVertexCount = GS.maxVertexCount;
+    if (MaxVertexCount > DXIL::kMaxGSOutputVertexCount) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmGSOutputVertexCountRange,
                                {std::to_string(DXIL::kMaxGSOutputVertexCount),
-                                std::to_string(maxVertexCount)});
+                                std::to_string(MaxVertexCount)});
     }
 
-    unsigned instanceCount = GS.instanceCount;
-    if (instanceCount > DXIL::kMaxGSInstanceCount || instanceCount < 1) {
+    unsigned InstanceCount = GS.instanceCount;
+    if (InstanceCount > DXIL::kMaxGSInstanceCount || InstanceCount < 1) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmGSInstanceCountRange,
                                {std::to_string(DXIL::kMaxGSInstanceCount),
-                                std::to_string(instanceCount)});
+                                std::to_string(InstanceCount)});
     }
 
-    DXIL::PrimitiveTopology topo = DXIL::PrimitiveTopology::Undefined;
-    bool bTopoMismatch = false;
-    for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) {
-      if (GS.streamPrimitiveTopologies[i] !=
+    DXIL::PrimitiveTopology Topo = DXIL::PrimitiveTopology::Undefined;
+    bool TopoMismatch = false;
+    for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) {
+      if (GS.streamPrimitiveTopologies[I] !=
           DXIL::PrimitiveTopology::Undefined) {
-        if (topo == DXIL::PrimitiveTopology::Undefined)
-          topo = GS.streamPrimitiveTopologies[i];
-        else if (topo != GS.streamPrimitiveTopologies[i]) {
-          bTopoMismatch = true;
+        if (Topo == DXIL::PrimitiveTopology::Undefined)
+          Topo = GS.streamPrimitiveTopologies[I];
+        else if (Topo != GS.streamPrimitiveTopologies[I]) {
+          TopoMismatch = true;
           break;
         }
       }
     }
-    if (bTopoMismatch)
-      topo = DXIL::PrimitiveTopology::Undefined;
-    switch (topo) {
+    if (TopoMismatch)
+      Topo = DXIL::PrimitiveTopology::Undefined;
+    switch (Topo) {
     case DXIL::PrimitiveTopology::PointList:
     case DXIL::PrimitiveTopology::LineStrip:
     case DXIL::PrimitiveTopology::TriangleStrip:
@@ -5548,9 +5562,9 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
     } break;
     }
 
-    DXIL::InputPrimitive inputPrimitive = GS.inputPrimitive;
-    unsigned VertexCount = GetNumVertices(inputPrimitive);
-    if (VertexCount == 0 && inputPrimitive != DXIL::InputPrimitive::Undefined) {
+    DXIL::InputPrimitive InputPrimitive = GS.inputPrimitive;
+    unsigned VertexCount = GetNumVertices(InputPrimitive);
+    if (VertexCount == 0 && InputPrimitive != DXIL::InputPrimitive::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::SmGSValidInputPrimitive);
     }
   }
@@ -5561,10 +5575,10 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
   if (ValCtx.isLibProfile) {
     for (Function &F : DM.GetModule()->functions()) {
       if (DM.HasDxilEntryProps(&F)) {
-        DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F);
+        DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F);
         EntryStatus &Status = ValCtx.GetEntryStatus(&F);
-        ValidateEntryProps(ValCtx, entryProps, Status, &F);
-        ValidatePassThruHS(ValCtx, entryProps, &F);
+        ValidateEntryProps(ValCtx, EntryProps, Status, &F);
+        ValidatePassThruHS(ValCtx, EntryProps, &F);
       }
     }
   } else {
@@ -5575,33 +5589,33 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
       return;
     }
     EntryStatus &Status = ValCtx.GetEntryStatus(Entry);
-    DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry);
-    ValidateEntryProps(ValCtx, entryProps, Status, Entry);
-    ValidatePassThruHS(ValCtx, entryProps, Entry);
+    DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry);
+    ValidateEntryProps(ValCtx, EntryProps, Status, Entry);
+    ValidatePassThruHS(ValCtx, EntryProps, Entry);
   }
 }
 
 static CallGraphNode *
-CalculateCallDepth(CallGraphNode *node,
-                   std::unordered_map<CallGraphNode *, unsigned> &depthMap,
-                   std::unordered_set<CallGraphNode *> &callStack,
-                   std::unordered_set<Function *> &funcSet) {
-  unsigned depth = callStack.size();
-  funcSet.insert(node->getFunction());
-  for (auto it = node->begin(), ei = node->end(); it != ei; it++) {
-    CallGraphNode *toNode = it->second;
-    if (callStack.insert(toNode).second == false) {
+CalculateCallDepth(CallGraphNode *Node,
+                   std::unordered_map<CallGraphNode *, unsigned> &DepthMap,
+                   std::unordered_set<CallGraphNode *> &CallStack,
+                   std::unordered_set<Function *> &FuncSet) {
+  unsigned Depth = CallStack.size();
+  FuncSet.insert(Node->getFunction());
+  for (auto It = Node->begin(), EIt = Node->end(); It != EIt; It++) {
+    CallGraphNode *ToNode = It->second;
+    if (CallStack.insert(ToNode).second == false) {
       // Recursive.
-      return toNode;
+      return ToNode;
     }
-    if (depthMap[toNode] < depth)
-      depthMap[toNode] = depth;
+    if (DepthMap[ToNode] < Depth)
+      DepthMap[ToNode] = Depth;
     if (CallGraphNode *N =
-            CalculateCallDepth(toNode, depthMap, callStack, funcSet)) {
+            CalculateCallDepth(ToNode, DepthMap, CallStack, FuncSet)) {
       // Recursive
       return N;
     }
-    callStack.erase(toNode);
+    CallStack.erase(ToNode);
   }
 
   return nullptr;
@@ -5611,29 +5625,29 @@ static void ValidateCallGraph(ValidationContext &ValCtx) {
   // Build CallGraph.
   CallGraph &CG = ValCtx.GetCallGraph();
 
-  std::unordered_map<CallGraphNode *, unsigned> depthMap;
-  std::unordered_set<CallGraphNode *> callStack;
-  CallGraphNode *entryNode = CG[ValCtx.DxilMod.GetEntryFunction()];
-  depthMap[entryNode] = 0;
-  if (CallGraphNode *N = CalculateCallDepth(entryNode, depthMap, callStack,
+  std::unordered_map<CallGraphNode *, unsigned> DepthMap;
+  std::unordered_set<CallGraphNode *> CallStack;
+  CallGraphNode *EntryNode = CG[ValCtx.DxilMod.GetEntryFunction()];
+  DepthMap[EntryNode] = 0;
+  if (CallGraphNode *N = CalculateCallDepth(EntryNode, DepthMap, CallStack,
                                             ValCtx.entryFuncCallSet))
     ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion);
   if (ValCtx.DxilMod.GetShaderModel()->IsHS()) {
-    CallGraphNode *patchConstantNode =
+    CallGraphNode *PatchConstantNode =
         CG[ValCtx.DxilMod.GetPatchConstantFunction()];
-    depthMap[patchConstantNode] = 0;
-    callStack.clear();
+    DepthMap[PatchConstantNode] = 0;
+    CallStack.clear();
     if (CallGraphNode *N =
-            CalculateCallDepth(patchConstantNode, depthMap, callStack,
+            CalculateCallDepth(PatchConstantNode, DepthMap, CallStack,
                                ValCtx.patchConstFuncCallSet))
       ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion);
   }
 }
 
 static void ValidateFlowControl(ValidationContext &ValCtx) {
-  bool reducible =
+  bool Reducible =
       IsReducible(*ValCtx.DxilMod.GetModule(), IrreducibilityAction::Ignore);
-  if (!reducible) {
+  if (!Reducible) {
     ValCtx.EmitError(ValidationRule::FlowReducible);
     return;
   }
@@ -5648,28 +5662,28 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
     DominatorTree DT = DTA.run(F);
     LoopInfo LI;
     LI.Analyze(DT);
-    for (auto loopIt = LI.begin(); loopIt != LI.end(); loopIt++) {
-      Loop *loop = *loopIt;
-      SmallVector<BasicBlock *, 4> exitBlocks;
-      loop->getExitBlocks(exitBlocks);
-      if (exitBlocks.empty())
+    for (auto LoopIt = LI.begin(); LoopIt != LI.end(); LoopIt++) {
+      Loop *Loop = *LoopIt;
+      SmallVector<BasicBlock *, 4> ExitBlocks;
+      Loop->getExitBlocks(ExitBlocks);
+      if (ExitBlocks.empty())
         ValCtx.EmitFnError(&F, ValidationRule::FlowDeadLoop);
     }
 
     // validate that there is no use of a value that has been output-completed
     // for this function.
 
-    hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP();
+    hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP();
 
-    for (auto &it : hlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) {
-      Function *pF = it.second;
+    for (auto &It : HlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) {
+      Function *pF = It.second;
       if (!pF)
         continue;
 
       // first, collect all the output complete calls that are not dominated
       // by another OutputComplete call for the same handle value
       llvm::SmallMapVector<Value *, llvm::SmallPtrSet<CallInst *, 4>, 4>
-          handleToCI;
+          HandleToCI;
       for (User *U : pF->users()) {
         // all OutputComplete calls are instructions, and call instructions,
         // so there shouldn't need to be a null check.
@@ -5681,33 +5695,33 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
           continue;
 
         DxilInst_OutputComplete OutputComplete(CI);
-        Value *completedRecord = OutputComplete.get_output();
+        Value *CompletedRecord = OutputComplete.get_output();
 
-        auto vIt = handleToCI.find(completedRecord);
-        if (vIt == handleToCI.end()) {
+        auto vIt = HandleToCI.find(CompletedRecord);
+        if (vIt == HandleToCI.end()) {
           llvm::SmallPtrSet<CallInst *, 4> s;
           s.insert(CI);
-          handleToCI.insert(std::make_pair(completedRecord, s));
+          HandleToCI.insert(std::make_pair(CompletedRecord, s));
         } else {
           // if the handle is already in the map, make sure the map's set of
           // output complete calls that dominate the handle and do not dominate
           // each other gets updated if necessary
           bool CI_is_dominated = false;
-          for (auto ocIt = vIt->second.begin(); ocIt != vIt->second.end();) {
+          for (auto OcIt = vIt->second.begin(); OcIt != vIt->second.end();) {
             // if our new OC CI dominates an OC instruction in the set,
             // then replace the instruction in the set with the new OC CI.
 
-            if (DT.dominates(CI, *ocIt)) {
-              auto cur_it = ocIt++;
+            if (DT.dominates(CI, *OcIt)) {
+              auto cur_it = OcIt++;
               vIt->second.erase(*cur_it);
               continue;
             }
             // Remember if our new CI gets dominated by any CI in the set.
-            if (DT.dominates(*ocIt, CI)) {
+            if (DT.dominates(*OcIt, CI)) {
               CI_is_dominated = true;
               break;
             }
-            ocIt++;
+            OcIt++;
           }
           // if no CI in the set dominates our new CI,
           // the new CI should be added to the set
@@ -5716,14 +5730,14 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
         }
       }
 
-      for (auto handle_iter = handleToCI.begin(), e = handleToCI.end();
+      for (auto handle_iter = HandleToCI.begin(), e = HandleToCI.end();
            handle_iter != e; handle_iter++) {
         for (auto user_itr = handle_iter->first->user_begin();
              user_itr != handle_iter->first->user_end(); user_itr++) {
           User *pU = *user_itr;
-          Instruction *useInstr = cast<Instruction>(pU);
-          if (useInstr) {
-            if (CallInst *CI = dyn_cast<CallInst>(useInstr)) {
+          Instruction *UseInstr = cast<Instruction>(pU);
+          if (UseInstr) {
+            if (CallInst *CI = dyn_cast<CallInst>(UseInstr)) {
               // if the user is an output complete call that is in the set of
               // OutputComplete calls not dominated by another OutputComplete
               // call for the same handle value, no diagnostics need to be
@@ -5734,15 +5748,15 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
 
             // make sure any output complete call in the set
             // that dominates this use gets its diagnostic emitted.
-            for (auto ocIt = handle_iter->second.begin();
-                 ocIt != handle_iter->second.end(); ocIt++) {
-              Instruction *ocInstr = cast<Instruction>(*ocIt);
-              if (DT.dominates(ocInstr, useInstr)) {
+            for (auto OcIt = handle_iter->second.begin();
+                 OcIt != handle_iter->second.end(); OcIt++) {
+              Instruction *OcInstr = cast<Instruction>(*OcIt);
+              if (DT.dominates(OcInstr, UseInstr)) {
                 ValCtx.EmitInstrError(
-                    useInstr,
+                    UseInstr,
                     ValidationRule::InstrNodeRecordHandleUseAfterComplete);
                 ValCtx.EmitInstrNote(
-                    *ocIt, "record handle invalidated by OutputComplete");
+                    *OcIt, "record handle invalidated by OutputComplete");
                 break;
               }
             }
@@ -5758,57 +5772,57 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
 static void ValidateUninitializedOutput(ValidationContext &ValCtx,
                                         Function *F) {
   DxilModule &DM = ValCtx.DxilMod;
-  DxilEntryProps &entryProps = DM.GetDxilEntryProps(F);
+  DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F);
   EntryStatus &Status = ValCtx.GetEntryStatus(F);
-  const DxilFunctionProps &props = entryProps.props;
+  const DxilFunctionProps &Props = EntryProps.props;
   // For HS only need to check Tessfactor which is in patch constant sig.
-  if (props.IsHS()) {
-    std::vector<unsigned> &patchConstOrPrimCols = Status.patchConstOrPrimCols;
-    const DxilSignature &patchConstSig =
-        entryProps.sig.PatchConstOrPrimSignature;
-    for (auto &E : patchConstSig.GetElements()) {
-      unsigned mask = patchConstOrPrimCols[E->GetID()];
-      unsigned requireMask = (1 << E->GetCols()) - 1;
+  if (Props.IsHS()) {
+    std::vector<unsigned> &PatchConstOrPrimCols = Status.patchConstOrPrimCols;
+    const DxilSignature &PatchConstSig =
+        EntryProps.sig.PatchConstOrPrimSignature;
+    for (auto &E : PatchConstSig.GetElements()) {
+      unsigned Mask = PatchConstOrPrimCols[E->GetID()];
+      unsigned RequireMask = (1 << E->GetCols()) - 1;
       // TODO: check other case uninitialized output is allowed.
-      if (mask != requireMask && !E->GetSemantic()->IsArbitrary()) {
+      if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary()) {
         ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput,
                                  {E->GetName()});
       }
     }
     return;
   }
-  const DxilSignature &outSig = entryProps.sig.OutputSignature;
-  std::vector<unsigned> &outputCols = Status.outputCols;
-  for (auto &E : outSig.GetElements()) {
-    unsigned mask = outputCols[E->GetID()];
-    unsigned requireMask = (1 << E->GetCols()) - 1;
+  const DxilSignature &OutSig = EntryProps.sig.OutputSignature;
+  std::vector<unsigned> &OutputCols = Status.outputCols;
+  for (auto &E : OutSig.GetElements()) {
+    unsigned Mask = OutputCols[E->GetID()];
+    unsigned RequireMask = (1 << E->GetCols()) - 1;
     // TODO: check other case uninitialized output is allowed.
-    if (mask != requireMask && !E->GetSemantic()->IsArbitrary() &&
+    if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary() &&
         E->GetSemantic()->GetKind() != Semantic::Kind::Target) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput,
                                {E->GetName()});
     }
   }
 
-  if (!props.IsGS()) {
-    unsigned posMask = Status.OutputPositionMask[0];
-    if (posMask != 0xf && Status.hasOutputPosition[0]) {
+  if (!Props.IsGS()) {
+    unsigned PosMask = Status.OutputPositionMask[0];
+    if (PosMask != 0xf && Status.hasOutputPosition[0]) {
       ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition);
     }
   } else {
-    const auto &GS = props.ShaderProps.GS;
-    unsigned streamMask = 0;
-    for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) {
-      if (GS.streamPrimitiveTopologies[i] !=
+    const auto &GS = Props.ShaderProps.GS;
+    unsigned StreamMask = 0;
+    for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) {
+      if (GS.streamPrimitiveTopologies[I] !=
           DXIL::PrimitiveTopology::Undefined) {
-        streamMask |= 1 << i;
+        StreamMask |= 1 << I;
       }
     }
 
-    for (unsigned i = 0; i < DXIL::kNumOutputStreams; i++) {
-      if (streamMask & (1 << i)) {
-        unsigned posMask = Status.OutputPositionMask[i];
-        if (posMask != 0xf && Status.hasOutputPosition[i]) {
+    for (unsigned I = 0; I < DXIL::kNumOutputStreams; I++) {
+      if (StreamMask & (1 << I)) {
+        unsigned PosMask = Status.OutputPositionMask[I];
+        if (PosMask != 0xf && Status.hasOutputPosition[I]) {
           ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition);
         }
       }
diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp
index 007e21ff19..c58a2e909a 100644
--- a/lib/HLSL/DxilLinker.cpp
+++ b/lib/HLSL/DxilLinker.cpp
@@ -1247,8 +1247,8 @@ void DxilLinkJob::RunPreparePass(Module &M) {
   PM.add(createDxilReinsertNopsPass());
   PM.add(createAlwaysInlinerPass(/*InsertLifeTime*/ false));
 
-  // Need to lower vector load/stores to scalars here?
-  // If we need SROA and dynamicindexvector to array, it has to be here.
+  // If we need SROA and dynamicindexvector to array,
+  // do it early to allow following scalarization to go forward.
   PM.add(createDxilScalarizeVectorLoadStoresPass());
 
   // Remove unused functions.
@@ -1278,7 +1278,7 @@ void DxilLinkJob::RunPreparePass(Module &M) {
   PM.add(createScalarizerPass());
 
   // Need dxilelimvector for pre 6.9
-  //PM.add(createDxilEliminateVectorPass());
+  // PM.add(createDxilEliminateVectorPass());
 
   PM.add(createPromoteMemoryToRegisterPass());
 
diff --git a/lib/HLSL/DxilScalarizeVectorLoadStores.cpp b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp
index 5b5c43875e..febcf32358 100644
--- a/lib/HLSL/DxilScalarizeVectorLoadStores.cpp
+++ b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp
@@ -15,8 +15,8 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
@@ -24,13 +24,12 @@
 using namespace llvm;
 using namespace hlsl;
 
-class DxilScalarizeVectorLoadStores : public ModulePass {
-private:
-  DxilModule *m_DM;
-
-  void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL, CallInst *CI);
-  void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL, CallInst *CI);
+static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL,
+                                CallInst *CI);
+static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL,
+                                 CallInst *CI);
 
+class DxilScalarizeVectorLoadStores : public ModulePass {
 public:
   static char ID; // Pass identification, replacement for typeid
   explicit DxilScalarizeVectorLoadStores() : ModulePass(ID) {}
@@ -41,8 +40,6 @@ class DxilScalarizeVectorLoadStores : public ModulePass {
 
   bool runOnModule(Module &M) override {
     DxilModule &DM = M.GetOrCreateDxilModule();
-    m_DM = &DM;
-
     // Shader Model 6.9 allows native vectors and doesn't need this pass.
     if (DM.GetShaderModel()->IsSM69Plus())
       return false;
@@ -50,30 +47,25 @@ class DxilScalarizeVectorLoadStores : public ModulePass {
     bool Changed = false;
 
     hlsl::OP *HlslOP = DM.GetOP();
-    auto &LoadList = HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorLoad);
-    for (auto FIt = LoadList.begin(), FEnd = LoadList.end(); FIt != FEnd; FIt++) {
-      Function *F = FIt->second;
-      if (!F)
+    for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorLoad)) {
+      Function *Func = FIt.second;
+      if (!Func)
         continue;
-      for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
+      for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) {
         CallInst *CI = cast<CallInst>(*(U++));
         scalarizeVectorLoad(HlslOP, M.getDataLayout(), CI);
         Changed = true;
       }
-      F->eraseFromParent();
     }
-
-    auto &StoreList = HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorStore);
-    for (auto FIt = StoreList.begin(), FEnd = StoreList.end(); FIt != FEnd; FIt++) {
-      Function *F = FIt->second;
-      if (!F)
+    for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorStore)) {
+      Function *Func = FIt.second;
+      if (!Func)
         continue;
-      for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
+      for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) {
         CallInst *CI = cast<CallInst>(*(U++));
         scalarizeVectorStore(HlslOP, M.getDataLayout(), CI);
         Changed = true;
       }
-      F->eraseFromParent();
     }
     return Changed;
   }
@@ -96,24 +88,23 @@ static unsigned GetRawBufferMask(unsigned NumComponents) {
   return DXIL::kCompMask_All;
 }
 
-void DxilScalarizeVectorLoadStores::scalarizeVectorLoad(hlsl::OP *HlslOP,
-                                                        const DataLayout &DL,
-                                                        CallInst *CI) {
+static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL,
+                                CallInst *CI) {
   IRBuilder<> Builder(CI);
   // Collect the information required to break this into scalar ops from args.
   DxilInst_RawBufferVectorLoad VecLd(CI);
   OP::OpCode OpCode = OP::OpCode::RawBufferLoad;
-  llvm::Constant *opArg = Builder.getInt32((unsigned)OpCode);
+  llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode);
   SmallVector<Value *, 10> Args;
-  Args.emplace_back(opArg);         // opcode @0.
-  Args.emplace_back(VecLd.get_buf()); // Resource handle @1.
-  Args.emplace_back(VecLd.get_index()); // Index @2.
+  Args.emplace_back(OpArg);                     // opcode @0.
+  Args.emplace_back(VecLd.get_buf());           // Resource handle @1.
+  Args.emplace_back(VecLd.get_index());         // Index @2.
   Args.emplace_back(VecLd.get_elementOffset()); // Offset @3.
-  Args.emplace_back(nullptr); // Mask to be set later @4.
-  Args.emplace_back(VecLd.get_alignment()); // Alignment @5.
+  Args.emplace_back(nullptr);                   // Mask to be set later @4.
+  Args.emplace_back(VecLd.get_alignment());     // Alignment @5.
 
   // Set offset to increment depending on whether the real offset is defined.
-  unsigned OffsetIdx = 0;
+  unsigned OffsetIdx;
   if (isa<UndefValue>(VecLd.get_elementOffset()))
     // Byte Address Buffers can't use offset, so use index.
     OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
@@ -133,11 +124,11 @@ void DxilScalarizeVectorLoadStores::scalarizeVectorLoad(hlsl::OP *HlslOP,
     // Load 4 elements or however many less than 4 are left to load.
     unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount);
     Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
-      HlslOP->GetI8Const(GetRawBufferMask(ChunkSize));
+        HlslOP->GetI8Const(GetRawBufferMask(ChunkSize));
     // If we've loaded a chunk already, update offset to next chunk.
     if (EIx > 0)
       Args[OffsetIdx] =
-        Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize));
+          Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize));
     Function *F = HlslOP->GetOpFunc(OpCode, EltTy);
     Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(OpCode));
     for (unsigned ChIx = 0; ChIx < ChunkSize; ChIx++, EIx++)
@@ -148,8 +139,7 @@ void DxilScalarizeVectorLoadStores::scalarizeVectorLoad(hlsl::OP *HlslOP,
   for (unsigned ElIx = 0; ElIx < NumComponents; ElIx++)
     RetValNew = Builder.CreateInsertElement(RetValNew, Elts[ElIx], ElIx);
 
-  // Replace users of the vector extracted from the vector load resret
-  // With our constructed one and we'll see if the can tell the difference.
+  // Replace users of the vector extracted from the vector load resret.
   Value *Status = nullptr;
   for (auto CU = CI->user_begin(), CE = CI->user_end(); CU != CE;) {
     auto EV = cast<ExtractValueInst>(*(CU++));
@@ -168,28 +158,27 @@ void DxilScalarizeVectorLoadStores::scalarizeVectorLoad(hlsl::OP *HlslOP,
   CI->eraseFromParent();
 }
 
-void DxilScalarizeVectorLoadStores::scalarizeVectorStore(hlsl::OP *HlslOP,
-                                                        const DataLayout &DL,
-                                                        CallInst *CI) {
+static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL,
+                                 CallInst *CI) {
   IRBuilder<> Builder(CI);
   // Collect the information required to break this into scalar ops from args.
   DxilInst_RawBufferVectorStore VecSt(CI);
   OP::OpCode OpCode = OP::OpCode::RawBufferStore;
-  llvm::Constant *opArg = Builder.getInt32((unsigned)OpCode);
+  llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode);
   SmallVector<Value *, 10> Args;
-  Args.emplace_back(opArg);         // opcode @0.
-  Args.emplace_back(VecSt.get_uav()); // Resource handle @1.
-  Args.emplace_back(VecSt.get_index()); // Index @2.
+  Args.emplace_back(OpArg);                     // opcode @0.
+  Args.emplace_back(VecSt.get_uav());           // Resource handle @1.
+  Args.emplace_back(VecSt.get_index());         // Index @2.
   Args.emplace_back(VecSt.get_elementOffset()); // Offset @3.
-  Args.emplace_back(nullptr); // Val0 to be set later @4.
-  Args.emplace_back(nullptr); // Val1 to be set later @5.
-  Args.emplace_back(nullptr); // Val2 to be set later @6.
-  Args.emplace_back(nullptr); // Val3 to be set later @7.
-  Args.emplace_back(nullptr); // Mask to be set later @8.
-  Args.emplace_back(VecSt.get_alignment()); // Alignment @9.
+  Args.emplace_back(nullptr);                   // Val0 to be set later @4.
+  Args.emplace_back(nullptr);                   // Val1 to be set later @5.
+  Args.emplace_back(nullptr);                   // Val2 to be set later @6.
+  Args.emplace_back(nullptr);                   // Val3 to be set later @7.
+  Args.emplace_back(nullptr);                   // Mask to be set later @8.
+  Args.emplace_back(VecSt.get_alignment());     // Alignment @9.
 
   // Set offset to increment depending on whether the real offset is defined.
-  unsigned OffsetIdx = 0;
+  unsigned OffsetIdx;
   if (isa<UndefValue>(VecSt.get_elementOffset()))
     // Byte Address Buffers can't use offset, so use index.
     OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
@@ -212,18 +201,20 @@ void DxilScalarizeVectorLoadStores::scalarizeVectorStore(hlsl::OP *HlslOP,
     // index or offset parameter.
     if (EIx > 0)
       Args[OffsetIdx] =
-        Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize));
+          Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize));
     // Populate all value arguments either with the vector or undefs.
     uint8_t Mask = 0;
     unsigned ChIx = 0;
     for (; ChIx < ChunkSize; ChIx++, EIx++) {
-      Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = Builder.CreateExtractElement(VecVal, EIx);
+      Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] =
+          Builder.CreateExtractElement(VecVal, EIx);
       Mask |= (1 << ChIx);
     }
     for (; ChIx < MaxElemCount; ChIx++)
       Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = UndefVal;
 
-    Args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] = HlslOP->GetU8Const(Mask);
+    Args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
+        HlslOP->GetU8Const(Mask);
     Builder.CreateCall(F, Args);
   }
   CI->eraseFromParent();
@@ -238,4 +229,3 @@ ModulePass *llvm::createDxilScalarizeVectorLoadStoresPass() {
 INITIALIZE_PASS(DxilScalarizeVectorLoadStores,
                 "hlsl-dxil-scalarize-vector-load-stores",
                 "DXIL scalarize vector load/stores", false, false)
-
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 362a647e4f..a68bddaf32 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -7,6 +7,9 @@
 //                                                                           //
 // Lower functions to lower HL operations to DXIL operations.                //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
 #define _USE_MATH_DEFINES
@@ -482,11 +485,11 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
 }
 
 Value *TrivialDxilVectorOperation(Function *dxilFunc, OP::OpCode opcode,
-                            ArrayRef<Value *> refArgs, Type *Ty,
-                            OP *hlslOP, IRBuilder<> &Builder) {
+                                  ArrayRef<Value *> refArgs, Type *Ty,
+                                  OP *hlslOP, IRBuilder<> &Builder) {
   if (!Ty->isVoidTy()) {
     Value *retVal =
-      Builder.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode));
+        Builder.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode));
     return retVal;
   } else {
     // Cannot add name to void.
@@ -494,20 +497,22 @@ Value *TrivialDxilVectorOperation(Function *dxilFunc, OP::OpCode opcode,
   }
 }
 
-
-Value *TrivialDxilVectorUnaryOperationRet(OP::OpCode opcode, Value *src, Type *Ty,
-					  OP *hlslOP, IRBuilder<> &Builder) {
+Value *TrivialDxilVectorUnaryOperationRet(OP::OpCode opcode, Value *src,
+                                          Type *Ty, OP *hlslOP,
+                                          IRBuilder<> &Builder) {
 
   Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
   Value *args[] = {opArg, src};
 
   Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
 
-  return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, Builder);
+  return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP,
+                                    Builder);
 }
 
-Value *TrivialDxilVectorBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
-                                  hlsl::OP *hlslOP, IRBuilder<> &Builder) {
+Value *TrivialDxilVectorBinaryOperation(OP::OpCode opcode, Value *src0,
+                                        Value *src1, hlsl::OP *hlslOP,
+                                        IRBuilder<> &Builder) {
   Type *Ty = src0->getType();
 
   Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
@@ -515,7 +520,8 @@ Value *TrivialDxilVectorBinaryOperation(OP::OpCode opcode, Value *src0, Value *s
 
   Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
 
-  return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, Builder);
+  return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP,
+                                    Builder);
 }
 
 Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy,
@@ -544,24 +550,26 @@ Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
   return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
 }
 
-Value *TrivialDxilTrinaryOperationRet(OP::OpCode opcode, Value *src0, Value *src1,
-				      Value *src2, Type *Ty, hlsl::OP *hlslOP,
-				      IRBuilder<> &Builder) {
+Value *TrivialDxilTrinaryOperationRet(OP::OpCode opcode, Value *src0,
+                                      Value *src1, Value *src2, Type *Ty,
+                                      hlsl::OP *hlslOP, IRBuilder<> &Builder) {
   Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
   Value *args[] = {opArg, src0, src1, src2};
 
   return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
 }
 
-Value *TrivialDxilVectorTrinaryOperationRet(OP::OpCode opcode, Value *src0, Value *src1,
-					    Value *src2, Type *Ty, hlsl::OP *hlslOP,
-					    IRBuilder<> &Builder) {
+Value *TrivialDxilVectorTrinaryOperationRet(OP::OpCode opcode, Value *src0,
+                                            Value *src1, Value *src2, Type *Ty,
+                                            hlsl::OP *hlslOP,
+                                            IRBuilder<> &Builder) {
   Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
   Value *args[] = {opArg, src0, src1, src2};
 
   Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
 
-  return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, Builder);
+  return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP,
+                                    Builder);
 }
 
 Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -576,22 +584,20 @@ Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   return retVal;
 }
 
-Value *TrivialVectorizableUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
-					 HLOperationLowerHelper &helper,
-					 HLObjectOperationLowerHelper *pObjHelper,
-					 bool &Translated) {
+Value *TrivialVectorizableUnaryOperation(
+    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
+    bool &Translated) {
   Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
   Type *Ty = CI->getType();
   IRBuilder<> Builder(CI);
   hlsl::OP *hlslOP = &helper.hlslOP;
 
-  if (Ty->isVectorTy() &&
-      helper.M.GetShaderModel()->IsSM69Plus())
-    return TrivialDxilVectorUnaryOperationRet(opcode, src0, Ty,
-					      hlslOP, Builder);
+  if (Ty->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus())
+    return TrivialDxilVectorUnaryOperationRet(opcode, src0, Ty, hlslOP,
+                                              Builder);
   else
-    return TrivialDxilUnaryOperationRet(opcode, src0, Ty,
-					hlslOP, Builder);
+    return TrivialDxilUnaryOperationRet(opcode, src0, Ty, hlslOP, Builder);
 }
 
 Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -608,10 +614,11 @@ Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   return binOp;
 }
 
-Value *TrivialVectorBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
-				    HLOperationLowerHelper &helper,
-				    HLObjectOperationLowerHelper *pObjHelper,
-				    bool &Translated) {
+Value *TrivialVectorBinaryOperation(CallInst *CI, IntrinsicOp IOP,
+                                    OP::OpCode opcode,
+                                    HLOperationLowerHelper &helper,
+                                    HLObjectOperationLowerHelper *pObjHelper,
+                                    bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
   Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
   Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
@@ -623,9 +630,9 @@ Value *TrivialVectorBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode op
 }
 
 Value *TranslateFMA(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
-		    HLOperationLowerHelper &helper,
-		    HLObjectOperationLowerHelper *pObjHelper,
-		    bool &Translated) {
+                    HLOperationLowerHelper &helper,
+                    HLObjectOperationLowerHelper *pObjHelper,
+                    bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
   Type *Ty = CI->getType();
   Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
@@ -633,11 +640,12 @@ Value *TranslateFMA(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
   IRBuilder<> Builder(CI);
 
-  if (Ty->isVectorTy() &&
-      helper.M.GetShaderModel()->IsSM69Plus())
-    return TrivialDxilVectorTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, Builder);
+  if (Ty->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus())
+    return TrivialDxilVectorTrinaryOperationRet(opcode, src0, src1, src2, Ty,
+                                                hlslOP, Builder);
   else
-    return TrivialDxilTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, Builder);
+    return TrivialDxilTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP,
+                                          Builder);
 }
 
 Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -1983,15 +1991,16 @@ Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
   IRBuilder<> Builder(CI);
   // min(max(x, minVal), maxVal).
-  if (Ty->isVectorTy() &&
-      helper.M.GetShaderModel()->IsSM69Plus()) {
+  if (Ty->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus()) {
     Value *maxXMinVal =
-      TrivialDxilVectorBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
-    return TrivialDxilVectorBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder);
+        TrivialDxilVectorBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
+    return TrivialDxilVectorBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP,
+                                            Builder);
   } else {
     Value *maxXMinVal =
-      TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
-    return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder);
+        TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
+    return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP,
+                                      Builder);
   }
 }
 
@@ -2305,11 +2314,12 @@ Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
         ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst);
   }
   val = Builder.CreateFMul(log2eConst, val);
-  if (Ty->isVectorTy() &&
-      helper.M.GetShaderModel()->IsSM69Plus())
-    return TrivialDxilVectorUnaryOperationRet(OP::OpCode::Exp, val, Ty, hlslOP, Builder);
+  if (Ty->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus())
+    return TrivialDxilVectorUnaryOperationRet(OP::OpCode::Exp, val, Ty, hlslOP,
+                                              Builder);
   else
-    return TrivialDxilUnaryOperationRet(OP::OpCode::Exp, val, Ty, hlslOP, Builder);
+    return TrivialDxilUnaryOperationRet(OP::OpCode::Exp, val, Ty, hlslOP,
+                                        Builder);
 }
 
 Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -2325,11 +2335,12 @@ Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
     ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const);
   }
   Value *log = nullptr;
-  if (Ty->isVectorTy() &&
-      helper.M.GetShaderModel()->IsSM69Plus())
-    log = TrivialDxilVectorUnaryOperationRet(OP::OpCode::Log, val, Ty, hlslOP, Builder);
+  if (Ty->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus())
+    log = TrivialDxilVectorUnaryOperationRet(OP::OpCode::Log, val, Ty, hlslOP,
+                                             Builder);
   else
-    log = TrivialDxilUnaryOperationRet(OP::OpCode::Log, val, Ty, hlslOP, Builder);
+    log =
+        TrivialDxilUnaryOperationRet(OP::OpCode::Log, val, Ty, hlslOP, Builder);
 
   return Builder.CreateFMul(ln2Const, log);
 }
@@ -2389,13 +2400,12 @@ Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
       break;
     }
   }
-  if (CI->getType()->isVectorTy() &&
-      helper.M.GetShaderModel()->IsSM69Plus())
+  if (CI->getType()->isVectorTy() && helper.M.GetShaderModel()->IsSM69Plus())
     return TrivialVectorBinaryOperation(CI, IOP, opcode, helper, pObjHelper,
-					Translated);
+                                        Translated);
   else
     return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper,
-				  Translated);
+                                  Translated);
 }
 
 Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -2420,7 +2430,8 @@ Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
   IRBuilder<> Builder(CI);
 
-  return TrivialDxilTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, Builder);
+  return TrivialDxilTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP,
+                                        Builder);
 }
 
 Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -2544,9 +2555,8 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1,
 
 // Instead of using a DXIL intrinsic, implement a dot product operation using
 // multiply and add operations. Used for integer dots and long vectors.
-Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize,
-		 hlsl::OP *hlslOP, IRBuilder<> &Builder,
-		 bool Unsigned = false) {
+Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP,
+                 IRBuilder<> &Builder, bool Unsigned = false) {
   auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad;
   if (arg0->getType()->getScalarType()->isFloatingPointTy())
     madOpCode = DXIL::OpCode::FMad;
@@ -2556,8 +2566,8 @@ Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize,
   for (unsigned Elt = 1; Elt < vecSize; ++Elt) {
     Elt0 = Builder.CreateExtractElement(arg0, Elt);
     Elt1 = Builder.CreateExtractElement(arg1, Elt);
-    Result = TrivialDxilTrinaryOperationRet(madOpCode, Elt0, Elt1, Result, Elt0->getType(), hlslOP,
-					    Builder);
+    Result = TrivialDxilTrinaryOperationRet(madOpCode, Elt0, Elt1, Result,
+                                            Elt0->getType(), hlslOP, Builder);
   }
 
   return Result;
@@ -2595,11 +2605,12 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   unsigned vecSize = Ty->getVectorNumElements();
   Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
   IRBuilder<> Builder(CI);
-  if (Ty->getScalarType()->isFloatingPointTy() && Ty->getVectorNumElements() <= 4) {
+  if (Ty->getScalarType()->isFloatingPointTy() &&
+      Ty->getVectorNumElements() <= 4) {
     return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
   } else {
     return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder,
-                         IOP == IntrinsicOp::IOP_udot);
+                     IOP == IntrinsicOp::IOP_udot);
   }
 }
 
@@ -2782,8 +2793,9 @@ Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3);
 
   // Msad on vecref and byteSrc.
-  return TrivialDxilTrinaryOperationRet(DXIL::OpCode::Msad, vecRef, byteSrc, accum,
-					vecRef->getType(), hlslOP, Builder);
+  return TrivialDxilTrinaryOperationRet(DXIL::OpCode::Msad, vecRef, byteSrc,
+                                        accum, vecRef->getType(), hlslOP,
+                                        Builder);
 }
 
 Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -3148,7 +3160,7 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
         return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
       } else {
         return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder,
-                             IOP == IntrinsicOp::IOP_umul);
+                         IOP == IntrinsicOp::IOP_umul);
       }
     } else {
       // mul(vector, scalar) == vector * scalar-splat
@@ -4297,6 +4309,9 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
   if (isBool || (is64 && isTyped))
     EltTy = Builder.getInt32Ty();
 
+  // Calculate load size with the scalar memory element type.
+  unsigned LdSize = DL.getTypeAllocSize(EltTy);
+
   // Adjust number of components as needed.
   if (is64 && isTyped) {
     // 64-bit types are stored as int32 pairs in typed buffers.
@@ -4308,7 +4323,6 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
     NumComponents = 1;
   }
 
-  unsigned LdSize = DL.getTypeAllocSize(EltTy);
   SmallVector<Value *, 10> Args = GetBufLoadArgs(helper, RK, Builder, LdSize);
 
   // Keep track of the first load for debug info migration.
@@ -6549,7 +6563,8 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble},
     {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::IOP_atan, TrivialVectorizableUnaryOperation, DXIL::OpCode::Atan},
+    {IntrinsicOp::IOP_atan, TrivialVectorizableUnaryOperation,
+     DXIL::OpCode::Atan},
     {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi},
     {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
@@ -6640,7 +6655,8 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt},
     {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan},
-    {IntrinsicOp::IOP_tanh, TrivialVectorizableUnaryOperation, DXIL::OpCode::Htan},
+    {IntrinsicOp::IOP_tanh, TrivialVectorizableUnaryOperation,
+     DXIL::OpCode::Htan},
     {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
@@ -6950,6 +6966,12 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread,
      DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil
                                          // opcode
+    {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic,
+     DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic,
+     DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic,
+     DXIL::OpCode::NumOpCodes},
 };
 } // namespace
 static_assert(
diff --git a/tools/clang/CMakeLists.txt b/tools/clang/CMakeLists.txt
index 71190336ca..449e6c28b4 100644
--- a/tools/clang/CMakeLists.txt
+++ b/tools/clang/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 2.8.8)
+cmake_minimum_required(VERSION 3.17.2) # HLSL Change - Require CMake 3.17.2.
 
 # FIXME: It may be removed when we use 2.8.12.
 if(CMAKE_VERSION VERSION_LESS 2.8.12)
diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h
index 3b517576fe..ab29e4bde7 100644
--- a/tools/clang/include/clang/AST/HlslTypes.h
+++ b/tools/clang/include/clang/AST/HlslTypes.h
@@ -6,6 +6,9 @@
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 ///
 /// \file                                                                    //
 /// \brief Defines the HLSL type system interface.                           //
@@ -31,6 +34,7 @@
 namespace clang {
 class ASTContext;
 class AttributeList;
+class CXXConstructorDecl;
 class CXXMethodDecl;
 class CXXRecordDecl;
 class ClassTemplateDecl;
@@ -402,6 +406,10 @@ DeclareNodeOrRecordType(clang::ASTContext &Ctx, DXIL::NodeIOKind Type,
                         bool IsCompleteType = false);
 
 #ifdef ENABLE_SPIRV_CODEGEN
+clang::CXXRecordDecl *
+DeclareVkBufferPointerType(clang::ASTContext &context,
+                           clang::DeclContext *declContext);
+
 clang::CXXRecordDecl *DeclareInlineSpirvType(clang::ASTContext &context,
                                              clang::DeclContext *declContext,
                                              llvm::StringRef typeName,
@@ -427,7 +435,7 @@ clang::VarDecl *DeclareBuiltinGlobal(llvm::StringRef name, clang::QualType Ty,
 /// method.</summary> <param name="context">AST context in which to
 /// work.</param> <param name="recordDecl">Class in which the function template
 /// is declared.</param> <param name="functionDecl">Function for which a
-/// template is created.</params> <param
+/// template is created.</param> <param
 /// name="templateParamNamedDecls">Declarations for templates to the
 /// function.</param> <param name="templateParamNamedDeclsCount">Count of
 /// template declarations.</param> <returns>A new function template declaration
@@ -533,6 +541,29 @@ bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator,
                                       clang::QualType paramType);
 bool IsPatchConstantFunctionDecl(const clang::FunctionDecl *FD);
 
+#ifdef ENABLE_SPIRV_CODEGEN
+bool IsVKBufferPointerType(clang::QualType type);
+clang::QualType GetVKBufferPointerBufferType(clang::QualType type);
+unsigned GetVKBufferPointerAlignment(clang::QualType type);
+#endif
+
+/// <summary>Adds a constructor declaration to the specified class
+/// record.</summary> <param name="context">ASTContext that owns
+/// declarations.</param> <param name="recordDecl">Record declaration in which
+/// to add constructor.</param> <param name="resultType">Result type for
+/// constructor.</param> <param name="paramTypes">Types for constructor
+/// parameters.</param> <param name="paramNames">Names for constructor
+/// parameters.</param> <param name="declarationName">Name for
+/// constructor.</param> <param name="isConst">Whether the constructor is a
+/// const function.</param> <returns>The method declaration for the
+/// constructor.</returns>
+clang::CXXConstructorDecl *CreateConstructorDeclarationWithParams(
+    clang::ASTContext &context, clang::CXXRecordDecl *recordDecl,
+    clang::QualType resultType, llvm::ArrayRef<clang::QualType> paramTypes,
+    llvm::ArrayRef<clang::StringRef> paramNames,
+    clang::DeclarationName declarationName, bool isConst,
+    bool isTemplateFunction = false);
+
 /// <summary>Adds a function declaration to the specified class
 /// record.</summary> <param name="context">ASTContext that owns
 /// declarations.</param> <param name="recordDecl">Record declaration in which
diff --git a/tools/clang/include/clang/AST/OperationKinds.h b/tools/clang/include/clang/AST/OperationKinds.h
index 75e665a5e9..3909c8b5e8 100644
--- a/tools/clang/include/clang/AST/OperationKinds.h
+++ b/tools/clang/include/clang/AST/OperationKinds.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file enumerates the different kinds of operations that can be
@@ -321,6 +324,8 @@ enum CastKind {
   CK_HLSLCC_FloatingToIntegral,
   CK_HLSLCC_FloatingToBoolean,
   CK_HLSLCC_FloatingCast,
+  CK_VK_BufferPointerToIntegral,
+  CK_VK_IntegralToBufferPointer,
 
   // HLSL Change - Made CK_Invalid an enum case because otherwise it is UB to
   // assign it to a value of CastKind.
diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index 7a009aa7e1..9c117fb3ce 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 class DocumentationCategory<string name> {
@@ -1447,6 +1450,20 @@ def VKStorageClassExt : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def VKBufferPointer : InheritableAttr {
+  let Spellings = [CXX11<"", "hlsl_vk_buffer_pointer", 2021>];
+  let LangOpts = [SPIRV];
+  let Documentation = [Undocumented];
+}
+
+def VKAliasedPointer : InheritableAttr {
+  let Spellings = [CXX11<"vk", "aliased_pointer">];
+  let Subjects = SubjectList<[Var, ParmVar], ErrorDiag>;
+  let Args = [];
+  let LangOpts = [SPIRV];
+  let Documentation = [Undocumented];
+}
+
 // Global variables that are of struct type
 def StructGlobalVar : SubsetSubject<Var, [{S->hasGlobalStorage() && S->getType()->isStructureType()}]>;
 
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 6ae59cac14..4f4dc28a4c 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -7838,7 +7841,7 @@ def warn_hlsl_intrinsic_in_wrong_shader_model : Warning<
    "intrinsic %0 potentially used by '%1' requires shader model %2 or greater">,
     DefaultError, InGroup<HLSLAvailability>;
 def warn_hlsl_intrinsic_overload_in_wrong_shader_model : Warning<
-   "overload of intrinsic %0 requires shader model %1 or greater">, 
+   "overload of intrinsic %0 requires shader model %1 or greater">,
     DefaultError, InGroup<HLSLAvailability>;
 def err_hlsl_intrinsic_template_arg_unsupported: Error<
    "Explicit template arguments on intrinsic %0 are not supported">;
@@ -8004,6 +8007,10 @@ def err_hlsl_hitobject_unsupported_stage : Error<
 
 // SPIRV Change Starts
 def err_hlsl_vulkan_specific_feature: Error<"%0 is a Vulkan specific feature">;
+def err_hlsl_vk_pointer_cast_alignment: Error<
+  "Vulkan buffer pointer cannot be cast to greater alignment">;
+def err_hlsl_vk_static_pointer_cast_type: Error<
+  "vk::static_pointer_cast() content type must be base class of argument's content type">;
 // SPIRV Change Ends
 
 let CategoryName = "OpenMP Issue" in {
diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h
index 8a9755ae79..3c1871df37 100644
--- a/tools/clang/include/clang/SPIRV/FeatureManager.h
+++ b/tools/clang/include/clang/SPIRV/FeatureManager.h
@@ -64,6 +64,7 @@ enum class Extension {
   KHR_maximal_reconvergence,
   KHR_float_controls,
   NV_shader_subgroup_partitioned,
+  KHR_quad_control,
   Unknown,
 };
 
diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h
index f03735115b..5e03d1ef96 100644
--- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h
+++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVBUILDER_H
 #define LLVM_CLANG_SPIRV_SPIRVBUILDER_H
@@ -239,7 +242,7 @@ class SpirvBuilder {
   /// \brief Creates an operation with the given OpGroupNonUniform* SPIR-V
   /// opcode.
   SpirvGroupNonUniformOp *createGroupNonUniformOp(
-      spv::Op op, QualType resultType, spv::Scope execScope,
+      spv::Op op, QualType resultType, llvm::Optional<spv::Scope> execScope,
       llvm::ArrayRef<SpirvInstruction *> operands, SourceLocation,
       llvm::Optional<spv::GroupOperation> groupOp = llvm::None);
 
@@ -273,6 +276,14 @@ class SpirvBuilder {
                                                   SpirvInstruction *sample,
                                                   SourceLocation);
 
+  /// \brief Creates an OpConverPtrToU SPIR-V instruction with the given
+  /// parameters.
+  SpirvConvertPtrToU *createConvertPtrToU(SpirvInstruction *ptr, QualType type);
+
+  /// \brief Creates an OpConverUToPtr SPIR-V instruction with the given
+  /// parameters.
+  SpirvConvertUToPtr *createConvertUToPtr(SpirvInstruction *val, QualType type);
+
   /// \brief Creates SPIR-V instructions for sampling the given image.
   ///
   /// If compareVal is given a non-zero value, *Dref* variants of OpImageSample*
diff --git a/tools/clang/include/clang/SPIRV/SpirvContext.h b/tools/clang/include/clang/SPIRV/SpirvContext.h
index e65097bedb..c18c139642 100644
--- a/tools/clang/include/clang/SPIRV/SpirvContext.h
+++ b/tools/clang/include/clang/SPIRV/SpirvContext.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVCONTEXT_H
 #define LLVM_CLANG_SPIRV_SPIRVCONTEXT_H
@@ -317,6 +320,13 @@ class SpirvContext {
 
   const HybridPointerType *getPointerType(QualType pointee, spv::StorageClass);
 
+  const ForwardPointerType *getForwardPointerType(QualType pointee);
+
+  const SpirvPointerType *getForwardReference(QualType type);
+
+  void registerForwardReference(QualType type,
+                                const SpirvPointerType *pointerType);
+
   /// Generates (or reuses an existing) OpString for the given string literal.
   SpirvString *getSpirvString(llvm::StringRef str);
 
@@ -478,6 +488,8 @@ class SpirvContext {
   llvm::SmallVector<const HybridStructType *, 8> hybridStructTypes;
   llvm::DenseMap<const SpirvType *, SCToPtrTyMap> pointerTypes;
   llvm::SmallVector<const HybridPointerType *, 8> hybridPointerTypes;
+  llvm::MapVector<QualType, const ForwardPointerType *> forwardPointerTypes;
+  llvm::MapVector<QualType, const SpirvPointerType *> forwardReferences;
   llvm::DenseSet<FunctionType *, FunctionTypeMapInfo> functionTypes;
   llvm::DenseMap<unsigned, SpirvIntrinsicType *> spirvIntrinsicTypesById;
   llvm::SmallVector<const SpirvIntrinsicType *, 8> spirvIntrinsicTypes;
diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h
index 7ec1375bde..f49a295610 100644
--- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h
+++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H
 #define LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H
@@ -67,6 +71,10 @@ class SpirvInstruction {
     IK_ConstantComposite,
     IK_ConstantNull,
 
+    // Pointer <-> uint conversions.
+    IK_ConvertPtrToU,
+    IK_ConvertUToPtr,
+
     // OpUndef
     IK_Undef,
 
@@ -1306,6 +1314,50 @@ class SpirvConstantNull : public SpirvConstant {
   bool operator==(const SpirvConstantNull &that) const;
 };
 
+class SpirvConvertPtrToU : public SpirvInstruction {
+public:
+  SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type,
+                     SourceLocation loc = {}, SourceRange range = {});
+
+  DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertPtrToU)
+
+  // For LLVM-style RTTI
+  static bool classof(const SpirvInstruction *inst) {
+    return inst->getKind() == IK_ConvertPtrToU;
+  }
+
+  bool operator==(const SpirvConvertPtrToU &that) const;
+
+  bool invokeVisitor(Visitor *v) override;
+
+  SpirvInstruction *getPtr() const { return ptr; }
+
+private:
+  SpirvInstruction *ptr;
+};
+
+class SpirvConvertUToPtr : public SpirvInstruction {
+public:
+  SpirvConvertUToPtr(SpirvInstruction *intValue, QualType type,
+                     SourceLocation loc = {}, SourceRange range = {});
+
+  DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertUToPtr)
+
+  // For LLVM-style RTTI
+  static bool classof(const SpirvInstruction *inst) {
+    return inst->getKind() == IK_ConvertUToPtr;
+  }
+
+  bool operator==(const SpirvConvertUToPtr &that) const;
+
+  bool invokeVisitor(Visitor *v) override;
+
+  SpirvInstruction *getVal() const { return val; }
+
+private:
+  SpirvInstruction *val;
+};
+
 class SpirvUndef : public SpirvInstruction {
 public:
   SpirvUndef(QualType type);
@@ -1514,7 +1566,8 @@ class SpirvFunctionCall : public SpirvInstruction {
 /// \brief OpGroupNonUniform* instructions
 class SpirvGroupNonUniformOp : public SpirvInstruction {
 public:
-  SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType, spv::Scope scope,
+  SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType,
+                         llvm::Optional<spv::Scope> scope,
                          llvm::ArrayRef<SpirvInstruction *> operands,
                          SourceLocation loc,
                          llvm::Optional<spv::GroupOperation> group);
@@ -1528,7 +1581,8 @@ class SpirvGroupNonUniformOp : public SpirvInstruction {
 
   bool invokeVisitor(Visitor *v) override;
 
-  spv::Scope getExecutionScope() const { return execScope; }
+  bool hasExecutionScope() const { return execScope.hasValue(); }
+  spv::Scope getExecutionScope() const { return execScope.getValue(); }
 
   llvm::ArrayRef<SpirvInstruction *> getOperands() const { return operands; }
 
@@ -1546,7 +1600,7 @@ class SpirvGroupNonUniformOp : public SpirvInstruction {
   }
 
 private:
-  spv::Scope execScope;
+  llvm::Optional<spv::Scope> execScope;
   llvm::SmallVector<SpirvInstruction *, 4> operands;
   llvm::Optional<spv::GroupOperation> groupOp;
 };
diff --git a/tools/clang/include/clang/SPIRV/SpirvType.h b/tools/clang/include/clang/SPIRV/SpirvType.h
index 221f01e5ff..00a00ef238 100644
--- a/tools/clang/include/clang/SPIRV/SpirvType.h
+++ b/tools/clang/include/clang/SPIRV/SpirvType.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVTYPE_H
 #define LLVM_CLANG_SPIRV_SPIRVTYPE_H
@@ -53,6 +56,7 @@ class SpirvType {
     TK_RuntimeArray,
     TK_Struct,
     TK_Pointer,
+    TK_ForwardPointer,
     TK_Function,
     TK_AccelerationStructureNV,
     TK_RayQueryKHR,
@@ -387,6 +391,26 @@ class SpirvPointerType : public SpirvType {
   spv::StorageClass storageClass;
 };
 
+/// Represents a SPIR-V forwarding pointer type.
+class ForwardPointerType : public SpirvType {
+public:
+  ForwardPointerType(QualType pointee)
+      : SpirvType(TK_ForwardPointer), pointeeType(pointee) {}
+
+  static bool classof(const SpirvType *t) {
+    return t->getKind() == TK_ForwardPointer;
+  }
+
+  const QualType getPointeeType() const { return pointeeType; }
+
+  bool operator==(const ForwardPointerType &that) const {
+    return pointeeType == that.pointeeType;
+  }
+
+private:
+  const QualType pointeeType;
+};
+
 /// Represents a SPIR-V function type. None of the parameters nor the return
 /// type is allowed to be a hybrid type.
 class FunctionType : public SpirvType {
diff --git a/tools/clang/include/clang/SPIRV/SpirvVisitor.h b/tools/clang/include/clang/SPIRV/SpirvVisitor.h
index 303a4600a1..93682518a1 100644
--- a/tools/clang/include/clang/SPIRV/SpirvVisitor.h
+++ b/tools/clang/include/clang/SPIRV/SpirvVisitor.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVVISITOR_H
 #define LLVM_CLANG_SPIRV_SPIRVVISITOR_H
@@ -89,6 +93,8 @@ class Visitor {
   DEFINE_VISIT_METHOD(SpirvConstantFloat)
   DEFINE_VISIT_METHOD(SpirvConstantComposite)
   DEFINE_VISIT_METHOD(SpirvConstantNull)
+  DEFINE_VISIT_METHOD(SpirvConvertPtrToU)
+  DEFINE_VISIT_METHOD(SpirvConvertUToPtr)
   DEFINE_VISIT_METHOD(SpirvUndef)
   DEFINE_VISIT_METHOD(SpirvCompositeConstruct)
   DEFINE_VISIT_METHOD(SpirvCompositeExtract)
diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h
index d6103b55e6..ac6e08b3fa 100644
--- a/tools/clang/include/clang/Sema/SemaHLSL.h
+++ b/tools/clang/include/clang/Sema/SemaHLSL.h
@@ -128,7 +128,7 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema,
                                           const clang::InitListExpr *InitList,
                                           const clang::QualType EltTy);
 
-bool containsLongVector(clang::QualType qt);
+bool ContainsLongVector(clang::QualType);
 
 bool IsConversionToLessOrEqualElements(clang::Sema *self,
                                        const clang::ExprResult &sourceExpr,
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 3748f8f8f8..c7a031a219 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -6,6 +6,9 @@
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 //  This file implements the ASTContext interface for HLSL.                  //
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
@@ -1072,6 +1075,47 @@ static void CreateConstructorDeclaration(
   (*constructorDecl)->setAccess(AccessSpecifier::AS_public);
 }
 
+CXXConstructorDecl *hlsl::CreateConstructorDeclarationWithParams(
+    ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType,
+    ArrayRef<QualType> paramTypes, ArrayRef<StringRef> paramNames,
+    DeclarationName declarationName, bool isConst, bool isTemplateFunction) {
+  DXASSERT_NOMSG(recordDecl != nullptr);
+  DXASSERT_NOMSG(!resultType.isNull());
+  DXASSERT_NOMSG(paramTypes.size() == paramNames.size());
+
+  TypeSourceInfo *tinfo;
+  CXXConstructorDecl *constructorDecl;
+  CreateConstructorDeclaration(context, recordDecl, resultType, paramTypes,
+                               declarationName, isConst, &constructorDecl,
+                               &tinfo);
+
+  // Create and associate parameters to constructor.
+  SmallVector<ParmVarDecl *, 2> parmVarDecls;
+  if (!paramTypes.empty()) {
+    for (unsigned int i = 0; i < paramTypes.size(); ++i) {
+      IdentifierInfo *argIi = &context.Idents.get(paramNames[i]);
+      ParmVarDecl *parmVarDecl = ParmVarDecl::Create(
+          context, constructorDecl, NoLoc, NoLoc, argIi, paramTypes[i],
+          context.getTrivialTypeSourceInfo(paramTypes[i], NoLoc),
+          StorageClass::SC_None, nullptr);
+      parmVarDecl->setScopeInfo(0, i);
+      DXASSERT(parmVarDecl->getFunctionScopeIndex() == i,
+               "otherwise failed to set correct index");
+      parmVarDecls.push_back(parmVarDecl);
+    }
+    constructorDecl->setParams(ArrayRef<ParmVarDecl *>(parmVarDecls));
+    AssociateParametersToFunctionPrototype(tinfo, &parmVarDecls.front(),
+                                           parmVarDecls.size());
+  }
+
+  // If this is going to be part of a template function decl, don't add it to
+  // the record because the template function decl will be added instead.
+  if (!isTemplateFunction)
+    recordDecl->addDecl(constructorDecl);
+
+  return constructorDecl;
+}
+
 static void CreateObjectFunctionDeclaration(
     ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType,
     ArrayRef<QualType> args, DeclarationName declarationName, bool isConst,
@@ -1324,6 +1368,41 @@ CXXRecordDecl *hlsl::DeclareNodeOrRecordType(
 }
 
 #ifdef ENABLE_SPIRV_CODEGEN
+CXXRecordDecl *hlsl::DeclareVkBufferPointerType(ASTContext &context,
+                                                DeclContext *declContext) {
+  BuiltinTypeDeclBuilder Builder(declContext, "BufferPointer",
+                                 TagDecl::TagKind::TTK_Struct);
+  TemplateTypeParmDecl *TyParamDecl =
+      Builder.addTypeTemplateParam("recordtype");
+  Builder.addIntegerTemplateParam("alignment", context.UnsignedIntTy, 0);
+
+  Builder.startDefinition();
+
+  QualType paramType = QualType(TyParamDecl->getTypeForDecl(), 0);
+  CXXRecordDecl *recordDecl = Builder.getRecordDecl();
+
+  CXXMethodDecl *methodDecl = CreateObjectFunctionDeclarationWithParams(
+      context, recordDecl, context.getLValueReferenceType(paramType), {}, {},
+      DeclarationName(&context.Idents.get("Get")), true);
+  CanQualType canQualType =
+      recordDecl->getTypeForDecl()->getCanonicalTypeUnqualified();
+  CreateConstructorDeclarationWithParams(
+      context, recordDecl, context.VoidTy,
+      {context.getRValueReferenceType(canQualType)}, {"bufferPointer"},
+      context.DeclarationNames.getCXXConstructorName(canQualType), false);
+  CreateConstructorDeclarationWithParams(
+      context, recordDecl, context.VoidTy, {context.UnsignedIntTy}, {"address"},
+      context.DeclarationNames.getCXXConstructorName(canQualType), false);
+
+  StringRef OpcodeGroup = GetHLOpcodeGroupName(HLOpcodeGroup::HLIntrinsic);
+  unsigned Opcode = static_cast<unsigned>(IntrinsicOp::MOP_GetBufferContents);
+  methodDecl->addAttr(
+      HLSLIntrinsicAttr::CreateImplicit(context, OpcodeGroup, "", Opcode));
+  methodDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context));
+
+  return Builder.completeDefinition();
+}
+
 CXXRecordDecl *hlsl::DeclareInlineSpirvType(clang::ASTContext &context,
                                             clang::DeclContext *declContext,
                                             llvm::StringRef typeName,
diff --git a/tools/clang/lib/AST/Expr.cpp b/tools/clang/lib/AST/Expr.cpp
index 0e2ec8c6c2..c6dc21217e 100644
--- a/tools/clang/lib/AST/Expr.cpp
+++ b/tools/clang/lib/AST/Expr.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the Expr class and subclasses.
@@ -1716,7 +1719,11 @@ const char *CastExpr::getCastKindName() const {
     return "HLSLCC_FloatingToBoolean";
   case CK_HLSLCC_FloatingCast:
     return "HLSLCC_FloatingCast";
-  // HLSL Change Ends
+  case CK_VK_BufferPointerToIntegral:
+    return "VK_BufferPointerToIntegral";
+  case CK_VK_IntegralToBufferPointer:
+    return "VK_IntegralToBufferPointer";
+    // HLSL Change Ends
   }
 
   llvm_unreachable("Unhandled cast kind!");
diff --git a/tools/clang/lib/AST/ExprConstant.cpp b/tools/clang/lib/AST/ExprConstant.cpp
index 5e8d4700bd..69e0760bce 100644
--- a/tools/clang/lib/AST/ExprConstant.cpp
+++ b/tools/clang/lib/AST/ExprConstant.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the Expr constant evaluator.
@@ -7829,6 +7832,12 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
       return false;
     return Success(Value, E);
   }
+
+  // HLSL Change Starts
+  case CK_VK_BufferPointerToIntegral: {
+    return false;
+    // HLSL Change Ends
+  }
   }
 
   llvm_unreachable("unknown cast resulting in integral value");
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index eaf8273413..d853125954 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -5,6 +5,9 @@
 // Copyright (C) Microsoft Corporation. All rights reserved.                 //
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
 //                                                                           //
 ///
 /// \file                                                                    //
@@ -734,6 +737,50 @@ bool IsHLSLRayQueryType(clang::QualType type) {
   return false;
 }
 
+#ifdef ENABLE_SPIRV_CODEGEN
+static llvm::Optional<std::pair<clang::QualType, unsigned>>
+MaybeGetVKBufferPointerParams(clang::QualType type) {
+  const RecordType *RT = dyn_cast<RecordType>(type.getCanonicalType());
+  if (!RT)
+    return llvm::None;
+
+  const ClassTemplateSpecializationDecl *templateDecl =
+      dyn_cast<ClassTemplateSpecializationDecl>(RT->getAsCXXRecordDecl());
+  if (!templateDecl || !templateDecl->getName().equals("BufferPointer"))
+    return llvm::None;
+
+  auto *namespaceDecl =
+      dyn_cast_or_null<NamespaceDecl>(templateDecl->getDeclContext());
+  if (!namespaceDecl || !namespaceDecl->getName().equals("vk"))
+    return llvm::None;
+
+  const TemplateArgumentList &argList = templateDecl->getTemplateArgs();
+  QualType bufferType = argList[0].getAsType();
+  unsigned align =
+      argList.size() > 1 ? argList[1].getAsIntegral().getLimitedValue() : 0;
+  return std::make_pair(bufferType, align);
+}
+
+bool IsVKBufferPointerType(clang::QualType type) {
+  return MaybeGetVKBufferPointerParams(type).hasValue();
+}
+
+QualType GetVKBufferPointerBufferType(clang::QualType type) {
+  auto bpParams = MaybeGetVKBufferPointerParams(type);
+  assert(bpParams.hasValue() &&
+         "cannot get pointer type for type that is not a vk::BufferPointer");
+  return bpParams.getValue().first;
+}
+
+unsigned GetVKBufferPointerAlignment(clang::QualType type) {
+  auto bpParams = MaybeGetVKBufferPointerParams(type);
+  assert(
+      bpParams.hasValue() &&
+      "cannot get pointer alignment for type that is not a vk::BufferPointer");
+  return bpParams.getValue().second;
+}
+#endif
+
 QualType GetHLSLResourceResultType(QualType type) {
   // Don't canonicalize the type as to not lose snorm in Buffer<snorm float>
   const RecordType *RT = type->getAs<RecordType>();
diff --git a/tools/clang/lib/CodeGen/CGStmt.cpp b/tools/clang/lib/CodeGen/CGStmt.cpp
index 080d824022..340550dbdd 100644
--- a/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/tools/clang/lib/CodeGen/CGStmt.cpp
@@ -525,6 +525,10 @@ void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) {
 
 // HLSL Change Begins.
 void CodeGenFunction::EmitDiscardStmt(const DiscardStmt &S) {
+  // Skip unreachable discard.
+  if (!HaveInsertPoint())
+    return;
+
   CGM.getHLSLRuntime().EmitHLSLDiscard(*this);
 }
 // HLSL Change Ends.
diff --git a/tools/clang/lib/Lex/PPMacroExpansion.cpp b/tools/clang/lib/Lex/PPMacroExpansion.cpp
index 64ce8c9182..ebfb93df2e 100644
--- a/tools/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/tools/clang/lib/Lex/PPMacroExpansion.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the top level handling of macro expansion for the
@@ -1080,7 +1083,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("nullability", true)
       .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory))
       .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread))
-      .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow))
+      .Case("dataflow_sanitizer",
+            LangOpts.Sanitize.has(SanitizerKind::DataFlow))
       // Objective-C features
       .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE?
       .Case("objc_arc", LangOpts.ObjCAutoRefCount)
@@ -1180,6 +1184,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("has_trivial_constructor", LangOpts.CPlusPlus)
       .Case("has_trivial_destructor", LangOpts.CPlusPlus)
       .Case("has_virtual_destructor", LangOpts.CPlusPlus)
+      .Case("hlsl_vk_buffer_pointer", LangOpts.SPIRV)
       .Case("is_abstract", LangOpts.CPlusPlus)
       .Case("is_base_of", LangOpts.CPlusPlus)
       .Case("is_class", LangOpts.CPlusPlus)
diff --git a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp
index 492640c493..db140f4766 100644
--- a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp
+++ b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AlignmentSizeCalculator.h"
@@ -277,14 +280,20 @@ std::pair<uint32_t, uint32_t> AlignmentSizeCalculator::getAlignmentAndSize(
   if (recordType != nullptr) {
     const llvm::StringRef name = recordType->getDecl()->getName();
 
-    if (isTypeInVkNamespace(recordType) && name == "SpirvType") {
-      const ClassTemplateSpecializationDecl *templateDecl =
-          cast<ClassTemplateSpecializationDecl>(recordType->getDecl());
-      const uint64_t size =
-          templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue();
-      const uint64_t alignment =
-          templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue();
-      return {alignment, size};
+    if (isTypeInVkNamespace(recordType)) {
+      if (name == "BufferPointer") {
+        return {8, 8}; // same as uint64_t
+      }
+
+      if (name == "SpirvType") {
+        const ClassTemplateSpecializationDecl *templateDecl =
+            cast<ClassTemplateSpecializationDecl>(recordType->getDecl());
+        const uint64_t size =
+            templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue();
+        const uint64_t alignment =
+            templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue();
+        return {alignment, size};
+      }
     }
   }
 
diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
index c2b5acff53..24dfdc2e9a 100644
--- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
+++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "CapabilityVisitor.h"
@@ -200,8 +203,10 @@ void CapabilityVisitor::addCapabilityForType(const SpirvType *type,
   }
   // Pointer type
   else if (const auto *ptrType = dyn_cast<SpirvPointerType>(type)) {
-    addCapabilityForType(ptrType->getPointeeType(), loc, sc);
-    if (sc == spv::StorageClass::PhysicalStorageBuffer) {
+    addCapabilityForType(ptrType->getPointeeType(), loc,
+                         ptrType->getStorageClass());
+    if (ptrType->getStorageClass() ==
+        spv::StorageClass::PhysicalStorageBuffer) {
       addExtension(Extension::KHR_physical_storage_buffer,
                    "SPV_KHR_physical_storage_buffer", loc);
       addCapability(spv::Capability::PhysicalStorageBufferAddresses);
@@ -882,6 +887,9 @@ bool CapabilityVisitor::visit(SpirvModule *, Visitor::Phase phase) {
 
   addCapability(spv::Capability::InterpolationFunction);
 
+  addExtensionAndCapabilitiesIfEnabled(Extension::KHR_quad_control,
+                                       {spv::Capability::QuadControlKHR});
+
   return true;
 }
 
diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp
index 6f6f5f88cd..eb00f59632 100644
--- a/tools/clang/lib/SPIRV/EmitVisitor.cpp
+++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 // Do not change the inclusion order between "dxc/Support/*" files.
@@ -488,6 +491,7 @@ std::vector<uint32_t> EmitVisitor::takeBinary() {
                 debugVariableBinary.end());
   result.insert(result.end(), annotationsBinary.begin(),
                 annotationsBinary.end());
+  result.insert(result.end(), fwdDeclBinary.begin(), fwdDeclBinary.end());
   result.insert(result.end(), typeConstantBinary.begin(),
                 typeConstantBinary.end());
   result.insert(result.end(), globalVarsBinary.begin(), globalVarsBinary.end());
@@ -1016,6 +1020,28 @@ bool EmitVisitor::visit(SpirvConstantNull *inst) {
   return true;
 }
 
+bool EmitVisitor::visit(SpirvConvertPtrToU *inst) {
+  initInstruction(inst);
+  curInst.push_back(inst->getResultTypeId());
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst));
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst->getPtr()));
+  finalizeInstruction(&mainBinary);
+  emitDebugNameForInstruction(getOrAssignResultId<SpirvInstruction>(inst),
+                              inst->getDebugName());
+  return true;
+}
+
+bool EmitVisitor::visit(SpirvConvertUToPtr *inst) {
+  initInstruction(inst);
+  curInst.push_back(inst->getResultTypeId());
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst));
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst->getVal()));
+  finalizeInstruction(&mainBinary);
+  emitDebugNameForInstruction(getOrAssignResultId<SpirvInstruction>(inst),
+                              inst->getDebugName());
+  return true;
+}
+
 bool EmitVisitor::visit(SpirvUndef *inst) {
   typeHandler.getOrCreateUndef(inst);
   emitDebugNameForInstruction(getOrAssignResultId<SpirvInstruction>(inst),
@@ -1108,9 +1134,10 @@ bool EmitVisitor::visit(SpirvGroupNonUniformOp *inst) {
   initInstruction(inst);
   curInst.push_back(inst->getResultTypeId());
   curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst));
-  curInst.push_back(typeHandler.getOrCreateConstantInt(
-      llvm::APInt(32, static_cast<uint32_t>(inst->getExecutionScope())),
-      context.getUIntType(32), /* isSpecConst */ false));
+  if (inst->hasExecutionScope())
+    curInst.push_back(typeHandler.getOrCreateConstantInt(
+        llvm::APInt(32, static_cast<uint32_t>(inst->getExecutionScope())),
+        context.getUIntType(32), /* isSpecConst */ false));
   if (inst->hasGroupOp())
     curInst.push_back(static_cast<uint32_t>(inst->getGroupOp()));
   for (auto *operand : inst->getOperands())
@@ -2012,10 +2039,11 @@ void EmitTypeHandler::initTypeInstruction(spv::Op op) {
   curTypeInst.push_back(static_cast<uint32_t>(op));
 }
 
-void EmitTypeHandler::finalizeTypeInstruction() {
+void EmitTypeHandler::finalizeTypeInstruction(bool isFwdDecl) {
   curTypeInst[0] |= static_cast<uint32_t>(curTypeInst.size()) << 16;
-  typeConstantBinary->insert(typeConstantBinary->end(), curTypeInst.begin(),
-                             curTypeInst.end());
+  auto binarySection = isFwdDecl ? fwdDeclBinary : typeConstantBinary;
+  binarySection->insert(binarySection->end(), curTypeInst.begin(),
+                        curTypeInst.end());
 }
 
 uint32_t EmitTypeHandler::getResultIdForType(const SpirvType *type,
@@ -2594,6 +2622,17 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) {
     curTypeInst.push_back(pointeeType);
     finalizeTypeInstruction();
   }
+  // Forward pointer types
+  else if (const auto *fwdPtrType = dyn_cast<ForwardPointerType>(type)) {
+    const SpirvPointerType *ptrType =
+        context.getForwardReference(fwdPtrType->getPointeeType());
+    const uint32_t refId = emitType(ptrType);
+    initTypeInstruction(spv::Op::OpTypeForwardPointer);
+    curTypeInst.push_back(refId);
+    curTypeInst.push_back(static_cast<uint32_t>(ptrType->getStorageClass()));
+    finalizeTypeInstruction(true);
+    return refId;
+  }
   // Function types
   else if (const auto *fnType = dyn_cast<FunctionType>(type)) {
     const uint32_t retTypeId = emitType(fnType->getReturnType());
diff --git a/tools/clang/lib/SPIRV/EmitVisitor.h b/tools/clang/lib/SPIRV/EmitVisitor.h
index 2f5d99b89d..1f9b0939e6 100644
--- a/tools/clang/lib/SPIRV/EmitVisitor.h
+++ b/tools/clang/lib/SPIRV/EmitVisitor.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_EMITVISITOR_H
 #define LLVM_CLANG_SPIRV_EMITVISITOR_H
@@ -49,15 +53,15 @@ class EmitTypeHandler {
   EmitTypeHandler(ASTContext &astCtx, SpirvContext &spvContext,
                   const SpirvCodeGenOptions &opts, FeatureManager &featureMgr,
                   std::vector<uint32_t> *debugVec,
-                  std::vector<uint32_t> *decVec,
+                  std::vector<uint32_t> *decVec, std::vector<uint32_t> *fwdVec,
                   std::vector<uint32_t> *typesVec,
                   const std::function<uint32_t()> &takeNextIdFn)
       : astContext(astCtx), context(spvContext), featureManager(featureMgr),
         debugVariableBinary(debugVec), annotationsBinary(decVec),
-        typeConstantBinary(typesVec), takeNextIdFunction(takeNextIdFn),
-        emittedConstantInts({}), emittedConstantFloats({}),
-        emittedConstantComposites({}), emittedConstantNulls({}),
-        emittedUndef({}), emittedConstantBools() {
+        fwdDeclBinary(fwdVec), typeConstantBinary(typesVec),
+        takeNextIdFunction(takeNextIdFn), emittedConstantInts({}),
+        emittedConstantFloats({}), emittedConstantComposites({}),
+        emittedConstantNulls({}), emittedUndef({}), emittedConstantBools() {
     assert(decVec);
     assert(typesVec);
   }
@@ -120,7 +124,7 @@ class EmitTypeHandler {
 
 private:
   void initTypeInstruction(spv::Op op);
-  void finalizeTypeInstruction();
+  void finalizeTypeInstruction(bool isFwdDecl = false);
 
   // Returns the result-id for the given type and decorations. If a type with
   // the same decorations have already been used, it returns the existing
@@ -161,6 +165,7 @@ class EmitTypeHandler {
   std::vector<uint32_t> curDecorationInst;
   std::vector<uint32_t> *debugVariableBinary;
   std::vector<uint32_t> *annotationsBinary;
+  std::vector<uint32_t> *fwdDeclBinary;
   std::vector<uint32_t> *typeConstantBinary;
   std::function<uint32_t()> takeNextIdFunction;
 
@@ -207,7 +212,7 @@ class EmitVisitor : public Visitor {
       : Visitor(opts, spvCtx), astContext(astCtx), featureManager(featureMgr),
         id(0),
         typeHandler(astCtx, spvCtx, opts, featureMgr, &debugVariableBinary,
-                    &annotationsBinary, &typeConstantBinary,
+                    &annotationsBinary, &fwdDeclBinary, &typeConstantBinary,
                     [this]() -> uint32_t { return takeNextId(); }),
         debugMainFileId(0), debugInfoExtInstId(0), debugLineStart(0),
         debugLineEnd(0), debugColumnStart(0), debugColumnEnd(0),
@@ -254,6 +259,8 @@ class EmitVisitor : public Visitor {
   bool visit(SpirvConstantFloat *) override;
   bool visit(SpirvConstantComposite *) override;
   bool visit(SpirvConstantNull *) override;
+  bool visit(SpirvConvertPtrToU *) override;
+  bool visit(SpirvConvertUToPtr *) override;
   bool visit(SpirvUndef *) override;
   bool visit(SpirvCompositeConstruct *) override;
   bool visit(SpirvCompositeExtract *) override;
@@ -438,7 +445,9 @@ class EmitVisitor : public Visitor {
   // All annotation instructions: OpDecorate, OpMemberDecorate, OpGroupDecorate,
   // OpGroupMemberDecorate, and OpDecorationGroup.
   std::vector<uint32_t> annotationsBinary;
-  // All type and constant instructions
+  // All forward pointer type declaration instructions
+  std::vector<uint32_t> fwdDeclBinary;
+  // All other type and constant instructions
   std::vector<uint32_t> typeConstantBinary;
   // All global variable declarations (all OpVariable instructions whose Storage
   // Class is not Function)
diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp
index a8ee1de000..7fb449fee9 100644
--- a/tools/clang/lib/SPIRV/FeatureManager.cpp
+++ b/tools/clang/lib/SPIRV/FeatureManager.cpp
@@ -226,6 +226,7 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) {
       .Case("SPV_KHR_float_controls", Extension::KHR_float_controls)
       .Case("SPV_NV_shader_subgroup_partitioned",
             Extension::NV_shader_subgroup_partitioned)
+      .Case("SPV_KHR_quad_control", Extension::KHR_quad_control)
       .Default(Extension::Unknown);
 }
 
@@ -297,6 +298,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) {
     return "SPV_KHR_float_controls";
   case Extension::NV_shader_subgroup_partitioned:
     return "SPV_NV_shader_subgroup_partitioned";
+  case Extension::KHR_quad_control:
+    return "SPV_KHR_quad_control";
   default:
     break;
   }
diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
index a5bc4a4aa8..b31d19b5d8 100644
--- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
+++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "LowerTypeVisitor.h"
@@ -549,7 +552,9 @@ const SpirvType *LowerTypeVisitor::lowerType(QualType type,
     // checking the general struct type.
     if (const auto *spvType =
             lowerResourceType(type, rule, isRowMajor, srcLoc)) {
-      spvContext.registerStructDeclForSpirvType(spvType, decl);
+      if (!isa<SpirvPointerType>(spvType)) {
+        spvContext.registerStructDeclForSpirvType(spvType, decl);
+      }
       return spvType;
     }
 
@@ -809,6 +814,32 @@ const SpirvType *LowerTypeVisitor::lowerVkTypeInVkNamespace(
     QualType realType = hlsl::GetHLSLResourceTemplateParamType(type);
     return lowerType(realType, rule, llvm::None, srcLoc);
   }
+  if (name == "BufferPointer") {
+    const size_t visitedTypeStackSize = visitedTypeStack.size();
+    (void)visitedTypeStackSize; // suppress unused warning (used only in assert)
+
+    for (QualType t : visitedTypeStack) {
+      if (t == type) {
+        return spvContext.getForwardPointerType(type);
+      }
+    }
+
+    QualType realType = hlsl::GetHLSLResourceTemplateParamType(type);
+    if (rule == SpirvLayoutRule::Void) {
+      rule = spvOptions.sBufferLayoutRule;
+    }
+    visitedTypeStack.push_back(type);
+
+    const SpirvType *spirvType = lowerType(realType, rule, llvm::None, srcLoc);
+    const auto *pointerType = spvContext.getPointerType(
+        spirvType, spv::StorageClass::PhysicalStorageBuffer);
+    spvContext.registerForwardReference(type, pointerType);
+
+    assert(visitedTypeStack.back() == type);
+    visitedTypeStack.pop_back();
+    assert(visitedTypeStack.size() == visitedTypeStackSize);
+    return pointerType;
+  }
   emitError("unknown type %0 in vk namespace", srcLoc) << type;
   return nullptr;
 }
diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.h b/tools/clang/lib/SPIRV/LowerTypeVisitor.h
index 96235d1508..5b26b67e3a 100644
--- a/tools/clang/lib/SPIRV/LowerTypeVisitor.h
+++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_LIB_SPIRV_LOWERTYPEVISITOR_H
@@ -137,6 +140,7 @@ class LowerTypeVisitor : public Visitor {
   AlignmentSizeCalculator alignmentCalc; /// alignment calculator
   bool useArrayForMat1xN;                /// SPIR-V array for HLSL Matrix 1xN
   SpirvBuilder &spvBuilder;
+  SmallVector<QualType, 4> visitedTypeStack; // for type recursion detection
 };
 
 } // end namespace spirv
diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp
index 1275e2b252..689fc0715f 100644
--- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp
+++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "clang/SPIRV/SpirvBuilder.h"
@@ -202,6 +205,14 @@ SpirvInstruction *SpirvBuilder::createLoad(QualType resultType,
   instruction->setLayoutRule(pointer->getLayoutRule());
   instruction->setRValue(true);
 
+  if (pointer->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer) {
+    AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions);
+    uint32_t align, size, stride;
+    std::tie(align, size) = alignmentCalc.getAlignmentAndSize(
+        resultType, pointer->getLayoutRule(), llvm::None, &stride);
+    instruction->setAlignment(align);
+  }
+
   if (pointer->containsAliasComponent() &&
       isAKindOfStructuredOrByteBuffer(resultType)) {
     instruction->setStorageClass(spv::StorageClass::Uniform);
@@ -300,6 +311,16 @@ SpirvStore *SpirvBuilder::createStore(SpirvInstruction *address,
       new (context) SpirvStore(loc, address, source, llvm::None, range);
   insertPoint->addInstruction(instruction);
 
+  if (address->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer &&
+      address->getAstResultType() != QualType()) { // exclude raw buffer
+    AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions);
+    uint32_t align, size, stride;
+    std::tie(align, size) = alignmentCalc.getAlignmentAndSize(
+        address->getAstResultType(), address->getLayoutRule(), llvm::None,
+        &stride);
+    instruction->setAlignment(align);
+  }
+
   if (address->isRasterizerOrdered()) {
     createEndInvocationInterlockEXT(loc, range);
   }
@@ -432,7 +453,7 @@ SpirvSpecConstantBinaryOp *SpirvBuilder::createSpecConstantBinaryOp(
 }
 
 SpirvGroupNonUniformOp *SpirvBuilder::createGroupNonUniformOp(
-    spv::Op op, QualType resultType, spv::Scope execScope,
+    spv::Op op, QualType resultType, llvm::Optional<spv::Scope> execScope,
     llvm::ArrayRef<SpirvInstruction *> operands, SourceLocation loc,
     llvm::Optional<spv::GroupOperation> groupOp) {
   assert(insertPoint && "null insert point");
@@ -491,6 +512,22 @@ SpirvImageTexelPointer *SpirvBuilder::createImageTexelPointer(
   return instruction;
 }
 
+SpirvConvertPtrToU *SpirvBuilder::createConvertPtrToU(SpirvInstruction *ptr,
+                                                      QualType type) {
+  auto *instruction = new (context) SpirvConvertPtrToU(ptr, type);
+  instruction->setRValue(true);
+  insertPoint->addInstruction(instruction);
+  return instruction;
+}
+
+SpirvConvertUToPtr *SpirvBuilder::createConvertUToPtr(SpirvInstruction *val,
+                                                      QualType type) {
+  auto *instruction = new (context) SpirvConvertUToPtr(val, type);
+  instruction->setRValue(false);
+  insertPoint->addInstruction(instruction);
+  return instruction;
+}
+
 spv::ImageOperandsMask SpirvBuilder::composeImageOperandsMask(
     SpirvInstruction *bias, SpirvInstruction *lod,
     const std::pair<SpirvInstruction *, SpirvInstruction *> &grad,
diff --git a/tools/clang/lib/SPIRV/SpirvContext.cpp b/tools/clang/lib/SPIRV/SpirvContext.cpp
index 6af36eb691..47dfc67433 100644
--- a/tools/clang/lib/SPIRV/SpirvContext.cpp
+++ b/tools/clang/lib/SPIRV/SpirvContext.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include <algorithm>
@@ -328,6 +331,29 @@ const HybridPointerType *SpirvContext::getPointerType(QualType pointee,
   return result;
 }
 
+const ForwardPointerType *
+SpirvContext::getForwardPointerType(QualType pointee) {
+  assert(hlsl::IsVKBufferPointerType(pointee));
+
+  auto foundPointee = forwardPointerTypes.find(pointee);
+  if (foundPointee != forwardPointerTypes.end()) {
+    return foundPointee->second;
+  }
+
+  return forwardPointerTypes[pointee] = new (this) ForwardPointerType(pointee);
+}
+
+const SpirvPointerType *SpirvContext::getForwardReference(QualType type) {
+  return forwardReferences[type];
+}
+
+void SpirvContext::registerForwardReference(
+    QualType type, const SpirvPointerType *pointerType) {
+  assert(pointerType->getStorageClass() ==
+         spv::StorageClass::PhysicalStorageBuffer);
+  forwardReferences[type] = pointerType;
+}
+
 FunctionType *
 SpirvContext::getFunctionType(const SpirvType *ret,
                               llvm::ArrayRef<const SpirvType *> param) {
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index 579af04ea6..eed4f6369f 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file implements a SPIR-V emitter class that takes in HLSL AST and emits
@@ -1233,12 +1237,17 @@ SpirvInstruction *SpirvEmitter::doExpr(const Expr *expr,
   } else if (isa<CXXThisExpr>(expr)) {
     assert(curThis);
     result = curThis;
-  } else if (isa<CXXConstructExpr>(expr)) {
+  } else if (const auto *constructExpr = dyn_cast<CXXConstructExpr>(expr)) {
     // For RayQuery type, we should not explicitly initialize it using
     // CXXConstructExpr e.g., RayQuery<0> r = RayQuery<0>() is the same as we do
     // not have a variable initialization. Setting nullptr for the SPIR-V
     // instruction used for expr will let us skip the variable initialization.
-    if (!hlsl::IsHLSLRayQueryType(expr->getType()))
+    if (hlsl::IsVKBufferPointerType(expr->getType())) {
+      const Expr *arg = constructExpr->getArg(0);
+      SpirvInstruction *value = loadIfGLValue(arg, arg->getSourceRange());
+      result = spvBuilder.createConvertUToPtr(value, expr->getType());
+      result->setRValue();
+    } else if (!hlsl::IsHLSLRayQueryType(expr->getType()))
       result = curThis;
   } else if (const auto *unaryExpr = dyn_cast<UnaryExprOrTypeTraitExpr>(expr)) {
     result = doUnaryExprOrTypeTraitExpr(unaryExpr);
@@ -1543,7 +1552,23 @@ void SpirvEmitter::doFunctionDecl(const FunctionDecl *decl) {
   // Create all parameters.
   for (uint32_t i = 0; i < decl->getNumParams(); ++i) {
     const ParmVarDecl *paramDecl = decl->getParamDecl(i);
-    (void)declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn);
+    QualType paramType = paramDecl->getType();
+    auto *param =
+        declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn);
+#ifdef ENABLE_SPIRV_CODEGEN
+    if (hlsl::IsVKBufferPointerType(paramType)) {
+      Optional<bool> isRowMajor = llvm::None;
+      QualType desugaredType = desugarType(paramType, &isRowMajor);
+      if (hlsl::IsVKBufferPointerType(desugaredType)) {
+        spvBuilder.decorateWithLiterals(
+            param,
+            static_cast<unsigned>(paramDecl->hasAttr<VKAliasedPointerAttr>()
+                                      ? spv::Decoration::AliasedPointer
+                                      : spv::Decoration::RestrictPointer),
+            {}, loc);
+      }
+    }
+#endif
   }
 
   if (decl->hasBody()) {
@@ -1644,6 +1669,15 @@ bool SpirvEmitter::validateVKAttributes(const NamedDecl *decl) {
                 loc);
       success = false;
     }
+
+#ifdef ENABLE_SPIRV_CODEGEN
+    if (hlsl::IsVKBufferPointerType(cast<VarDecl>(decl)->getType())) {
+      emitError("vk::push_constant attribute cannot be used on declarations "
+                "with vk::BufferPointer type",
+                loc);
+      success = false;
+    }
+#endif
   }
 
   // vk::shader_record_nv is supported only on cbuffer/ConstantBuffer
@@ -1951,6 +1985,11 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) {
     return;
   }
 
+  if (hlsl::IsVKBufferPointerType(decl->getType()) && !decl->hasInit()) {
+    emitError("vk::BufferPointer has no default constructor", loc);
+    return;
+  }
+
   // We can have VarDecls inside cbuffer/tbuffer. For those VarDecls, we need
   // to emit their cbuffer/tbuffer as a whole and access each individual one
   // using access chains.
@@ -2037,10 +2076,24 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) {
       needsLegalization = true;
   }
 
-  if (var != nullptr && decl->hasAttrs()) {
-    declIdMapper.decorateWithIntrinsicAttrs(decl, var);
-    if (auto attr = decl->getAttr<VKStorageClassExtAttr>()) {
-      var->setStorageClass(static_cast<spv::StorageClass>(attr->getStclass()));
+  if (var != nullptr) {
+    Optional<bool> isRowMajor = llvm::None;
+    QualType desugaredType = desugarType(decl->getType(), &isRowMajor);
+    if (hlsl::IsVKBufferPointerType(desugaredType)) {
+      spvBuilder.decorateWithLiterals(
+          var,
+          static_cast<unsigned>(decl->hasAttr<VKAliasedPointerAttr>()
+                                    ? spv::Decoration::AliasedPointer
+                                    : spv::Decoration::RestrictPointer),
+          {}, loc);
+    }
+
+    if (decl->hasAttrs()) {
+      declIdMapper.decorateWithIntrinsicAttrs(decl, var);
+      if (auto attr = decl->getAttr<VKStorageClassExtAttr>()) {
+        var->setStorageClass(
+            static_cast<spv::StorageClass>(attr->getStclass()));
+      }
     }
   }
 
@@ -3665,6 +3718,12 @@ SpirvInstruction *SpirvEmitter::doCastExpr(const CastExpr *expr,
   }
   case CastKind::CK_ToVoid:
     return nullptr;
+  case CastKind::CK_VK_BufferPointerToIntegral: {
+    return spvBuilder.createConvertPtrToU(doExpr(subExpr, range), toType);
+  }
+  case CastKind::CK_VK_IntegralToBufferPointer: {
+    return spvBuilder.createConvertUToPtr(doExpr(subExpr, range), toType);
+  }
   default:
     emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc())
         << expr->getCastKindName() << expr->getSourceRange();
@@ -5442,6 +5501,8 @@ SpirvEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr,
   case IntrinsicOp::MOP_WorldRayDirection:
   case IntrinsicOp::MOP_WorldRayOrigin:
     return processRayQueryIntrinsics(expr, opcode);
+  case IntrinsicOp::MOP_GetBufferContents:
+    return processIntrinsicGetBufferContents(expr);
   default:
     emitError("intrinsic '%0' method unimplemented",
               expr->getCallee()->getExprLoc())
@@ -7021,6 +7082,12 @@ SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal,
   if (const auto *recordType = valType->getAs<RecordType>()) {
     assert(recordType->isStructureType());
 
+    if (isTypeInVkNamespace(recordType) &&
+        recordType->getDecl()->getName().equals("BufferPointer")) {
+      // Uniquely among structs, vk::BufferPointer<T> lowers to a pointer type.
+      return srcVal;
+    }
+
     LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions,
                                       spvBuilder);
     const StructType *spirvStructType =
@@ -9204,6 +9271,10 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   case hlsl::IntrinsicOp::IOP_QuadReadLaneAt:
     retVal = processWaveQuadWideShuffle(callExpr, hlslOpcode);
     break;
+  case hlsl::IntrinsicOp::IOP_QuadAny:
+  case hlsl::IntrinsicOp::IOP_QuadAll:
+    retVal = processWaveQuadAnyAll(callExpr, hlslOpcode);
+    break;
   case hlsl::IntrinsicOp::IOP_abort:
   case hlsl::IntrinsicOp::IOP_GetRenderTargetSampleCount:
   case hlsl::IntrinsicOp::IOP_GetRenderTargetSamplePosition: {
@@ -9403,6 +9474,14 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   case hlsl::IntrinsicOp::IOP_EvaluateAttributeSnapped: {
     retVal = processEvaluateAttributeAt(callExpr, hlslOpcode, srcLoc, srcRange);
     break;
+  }
+  case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: {
+    retVal = processIntrinsicPointerCast(callExpr, false);
+    break;
+  }
+  case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: {
+    retVal = processIntrinsicPointerCast(callExpr, true);
+    break;
   }
     INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true);
     INTRINSIC_SPIRV_OP_CASE(ddx_coarse, DPdxCoarse, false);
@@ -10158,6 +10237,53 @@ SpirvEmitter::processWaveQuadWideShuffle(const CallExpr *callExpr,
       opcode, retType, spv::Scope::Subgroup, {value, target}, srcLoc);
 }
 
+SpirvInstruction *SpirvEmitter::processWaveQuadAnyAll(const CallExpr *callExpr,
+                                                      hlsl::IntrinsicOp op) {
+  // Signatures:
+  // bool QuadAny(bool localValue)
+  // bool QuadAll(bool localValue)
+  assert(callExpr->getNumArgs() == 1);
+  assert(op == hlsl::IntrinsicOp::IOP_QuadAny ||
+         op == hlsl::IntrinsicOp::IOP_QuadAll);
+  featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_1, "Wave Operation",
+                                  callExpr->getExprLoc());
+
+  auto *predicate = doExpr(callExpr->getArg(0));
+  const auto srcLoc = callExpr->getExprLoc();
+
+  if (!featureManager.isExtensionEnabled(Extension::KHR_quad_control)) {
+    // We can't use QuadAny/QuadAll, so implement them using QuadSwap. We
+    // will read the value at each quad invocation, then combine them.
+
+    spv::Op reducer = op == hlsl::IntrinsicOp::IOP_QuadAny
+                          ? spv::Op::OpLogicalOr
+                          : spv::Op::OpLogicalAnd;
+
+    SpirvInstruction *result = predicate;
+
+    for (size_t i = 0; i < 3; i++) {
+      SpirvInstruction *invocationValue = spvBuilder.createGroupNonUniformOp(
+          spv::Op::OpGroupNonUniformQuadSwap, astContext.BoolTy,
+          spv::Scope::Subgroup,
+          {predicate, spvBuilder.getConstantInt(astContext.UnsignedIntTy,
+                                                llvm::APInt(32, i))},
+          srcLoc);
+      result = spvBuilder.createBinaryOp(reducer, astContext.BoolTy, result,
+                                         invocationValue, srcLoc);
+    }
+
+    return result;
+  }
+
+  spv::Op opcode = op == hlsl::IntrinsicOp::IOP_QuadAny
+                       ? spv::Op::OpGroupNonUniformQuadAnyKHR
+                       : spv::Op::OpGroupNonUniformQuadAllKHR;
+
+  return spvBuilder.createGroupNonUniformOp(opcode, astContext.BoolTy,
+                                            llvm::Optional<spv::Scope>(),
+                                            {predicate}, srcLoc);
+}
+
 SpirvInstruction *
 SpirvEmitter::processWaveActiveAllEqual(const CallExpr *callExpr) {
   assert(callExpr->getNumArgs() == 1);
@@ -10782,6 +10908,50 @@ SpirvEmitter::processIntrinsicClamp(const CallExpr *callExpr) {
                                       loc, range);
 }
 
+SpirvInstruction *
+SpirvEmitter::processIntrinsicPointerCast(const CallExpr *callExpr,
+                                          bool isStatic) {
+  const Expr *argExpr = callExpr->getArg(0);
+  SpirvInstruction *ptr = doExpr(argExpr);
+  QualType srcType = argExpr->getType();
+  QualType destType = callExpr->getType();
+  QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType);
+  QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType);
+  return srcTypeArg == destTypeArg
+             ? ptr
+             : spvBuilder.createUnaryOp(spv::Op::OpBitcast, destType, ptr,
+                                        callExpr->getExprLoc(),
+                                        callExpr->getSourceRange());
+}
+
+SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents(
+    const CXXMemberCallExpr *callExpr) {
+  LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions,
+                                    spvBuilder);
+  Expr *obj = callExpr->getImplicitObjectArgument();
+  SpirvInstruction *bufferPointer = doExpr(obj);
+  if (!bufferPointer)
+    return nullptr;
+  unsigned align = hlsl::GetVKBufferPointerAlignment(obj->getType());
+  lowerTypeVisitor.visitInstruction(bufferPointer);
+
+  const SpirvPointerType *bufferPointerType =
+      dyn_cast<SpirvPointerType>(bufferPointer->getResultType());
+  SpirvLoad *retVal =
+      spvBuilder.createLoad(bufferPointerType->getPointeeType(), bufferPointer,
+                            callExpr->getLocStart());
+  if (!align) {
+    QualType bufferType = hlsl::GetVKBufferPointerBufferType(obj->getType());
+    AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions);
+    uint32_t stride;
+    std::tie(align, std::ignore) = alignmentCalc.getAlignmentAndSize(
+        bufferType, retVal->getLayoutRule(), llvm::None, &stride);
+  }
+  retVal->setAlignment(align);
+  retVal->setRValue(false);
+  return retVal;
+}
+
 SpirvInstruction *
 SpirvEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr,
                                             bool isDevice, bool groupSync,
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h
index eca038527f..79d2c43c35 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.h
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file defines a SPIR-V emitter class that takes in HLSL AST and emits
@@ -491,6 +495,15 @@ class SpirvEmitter : public ASTConsumer {
   /// Processes the 'lit' intrinsic function.
   SpirvInstruction *processIntrinsicLit(const CallExpr *);
 
+  /// Processes the 'vk::static_pointer_cast' and 'vk_reinterpret_pointer_cast'
+  /// intrinsic functions.
+  SpirvInstruction *processIntrinsicPointerCast(const CallExpr *,
+                                                bool isStatic);
+
+  /// Processes the vk::BufferPointer intrinsic function 'Get'.
+  SpirvInstruction *
+  processIntrinsicGetBufferContents(const CXXMemberCallExpr *);
+
   /// Processes the 'GroupMemoryBarrier', 'GroupMemoryBarrierWithGroupSync',
   /// 'DeviceMemoryBarrier', 'DeviceMemoryBarrierWithGroupSync',
   /// 'AllMemoryBarrier', and 'AllMemoryBarrierWithGroupSync' intrinsic
@@ -657,6 +670,10 @@ class SpirvEmitter : public ASTConsumer {
   SpirvInstruction *processWaveQuadWideShuffle(const CallExpr *,
                                                hlsl::IntrinsicOp op);
 
+  /// Processes SM6.7 quad any/all.
+  SpirvInstruction *processWaveQuadAnyAll(const CallExpr *,
+                                          hlsl::IntrinsicOp op);
+
   /// Generates the Spir-V instructions needed to implement the given call to
   /// WaveActiveAllEqual. Returns a pointer to the instruction that produces the
   /// final result.
diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp
index 21aada9e82..f41de03adc 100644
--- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp
+++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file implements the in-memory representation of SPIR-V instructions.
@@ -57,6 +61,8 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantInteger)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantFloat)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantComposite)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantNull)
+DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertPtrToU)
+DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertUToPtr)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvUndef)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeConstruct)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeExtract)
@@ -620,6 +626,28 @@ bool SpirvConstantNull::operator==(const SpirvConstantNull &that) const {
          astResultType == that.astResultType;
 }
 
+SpirvConvertPtrToU::SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type,
+                                       SourceLocation loc, SourceRange range)
+    : SpirvInstruction(IK_ConvertPtrToU, spv::Op::OpConvertPtrToU, type, loc,
+                       range),
+      ptr(ptr) {}
+
+bool SpirvConvertPtrToU::operator==(const SpirvConvertPtrToU &that) const {
+  return opcode == that.opcode && resultType == that.resultType &&
+         astResultType == that.astResultType && ptr == that.ptr;
+}
+
+SpirvConvertUToPtr::SpirvConvertUToPtr(SpirvInstruction *val, QualType type,
+                                       SourceLocation loc, SourceRange range)
+    : SpirvInstruction(IK_ConvertUToPtr, spv::Op::OpConvertUToPtr, type, loc,
+                       range),
+      val(val) {}
+
+bool SpirvConvertUToPtr::operator==(const SpirvConvertUToPtr &that) const {
+  return opcode == that.opcode && resultType == that.resultType &&
+         astResultType == that.astResultType && val == that.val;
+}
+
 SpirvUndef::SpirvUndef(QualType type)
     : SpirvInstruction(IK_Undef, spv::Op::OpUndef, type,
                        /*SourceLocation*/ {}) {}
@@ -677,7 +705,7 @@ SpirvFunctionCall::SpirvFunctionCall(QualType resultType, SourceLocation loc,
       function(fn), args(argsVec.begin(), argsVec.end()) {}
 
 SpirvGroupNonUniformOp::SpirvGroupNonUniformOp(
-    spv::Op op, QualType resultType, spv::Scope scope,
+    spv::Op op, QualType resultType, llvm::Optional<spv::Scope> scope,
     llvm::ArrayRef<SpirvInstruction *> operandsVec, SourceLocation loc,
     llvm::Optional<spv::GroupOperation> group)
     : SpirvInstruction(IK_GroupNonUniformOp, op, resultType, loc),
@@ -709,6 +737,8 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp(
   case spv::Op::OpGroupNonUniformLogicalAnd:
   case spv::Op::OpGroupNonUniformLogicalOr:
   case spv::Op::OpGroupNonUniformLogicalXor:
+  case spv::Op::OpGroupNonUniformQuadAnyKHR:
+  case spv::Op::OpGroupNonUniformQuadAllKHR:
     assert(operandsVec.size() == 1);
     break;
 
@@ -740,6 +770,11 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp(
     assert(false && "Unexpected Group non-uniform opcode");
     break;
   }
+
+  if (op != spv::Op::OpGroupNonUniformQuadAnyKHR &&
+      op != spv::Op::OpGroupNonUniformQuadAllKHR) {
+    assert(scope.hasValue());
+  }
 }
 
 SpirvImageOp::SpirvImageOp(
diff --git a/tools/clang/lib/Sema/SemaCast.cpp b/tools/clang/lib/Sema/SemaCast.cpp
index 10668dc388..f5a864e2b6 100644
--- a/tools/clang/lib/Sema/SemaCast.cpp
+++ b/tools/clang/lib/Sema/SemaCast.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file implements semantic analysis for cast expressions, including
@@ -1543,6 +1546,20 @@ TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr, QualType DestType,
   
   if (InitSeq.isConstructorInitialization())
     Kind = CK_ConstructorConversion;
+#ifdef ENABLE_SPIRV_CODEGEN
+  // Special cases for vk::BufferPointer.
+  else if (hlsl::IsVKBufferPointerType(SrcExpr.get()->getType()) &&
+           DestType->isIntegerType() && CCK == Sema::CCK_CStyleCast) {
+    Kind = CK_VK_BufferPointerToIntegral;
+    SrcExpr = Result;
+    return TC_Success;
+  } else if (hlsl::IsVKBufferPointerType(DestType) &&
+             SrcExpr.get()->getType()->isIntegerType()) {
+    Kind = CK_VK_IntegralToBufferPointer;
+    SrcExpr = Result;
+    return TC_Success;
+  }
+#endif
   else
     Kind = CK_NoOp;
   
diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp
index 0f27de8291..36ab55ea10 100644
--- a/tools/clang/lib/Sema/SemaDXR.cpp
+++ b/tools/clang/lib/Sema/SemaDXR.cpp
@@ -810,7 +810,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload,
     return;
   }
 
-  if (containsLongVector(Payload->getType())) {
+  if (ContainsLongVector(Payload->getType())) {
     const unsigned PayloadParametersIdx = 10;
     S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector)
         << PayloadParametersIdx;
diff --git a/tools/clang/lib/Sema/SemaExprCXX.cpp b/tools/clang/lib/Sema/SemaExprCXX.cpp
index f46bb0ad9f..4723bc93e9 100644
--- a/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/tools/clang/lib/Sema/SemaExprCXX.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 ///
 /// \file
@@ -1052,6 +1055,31 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
   // corresponding cast expression.
   if (Exprs.size() == 1 && !ListInitialization) {
     Expr *Arg = Exprs[0];
+#ifdef ENABLE_SPIRV_CODEGEN
+    if (hlsl::IsVKBufferPointerType(Ty) && Arg->getType()->isIntegerType()) {
+      for (auto *ctor : Ty->getAsCXXRecordDecl()->ctors()) {
+        if (auto *functionType = ctor->getType()->getAs<FunctionProtoType>()) {
+          if (functionType->getNumParams() != 1 ||
+              !functionType->getParamType(0)->isIntegerType())
+            continue;
+
+          CanQualType argType = Arg->getType()->getCanonicalTypeUnqualified();
+          if (!Arg->isRValue()) {
+            Arg = ImpCastExprToType(Arg, argType, CK_LValueToRValue).get();
+          }
+          if (argType != Context.UnsignedLongLongTy) {
+            Arg = ImpCastExprToType(Arg, Context.UnsignedLongLongTy,
+                                    CK_IntegralCast)
+                      .get();
+          }
+          return CXXConstructExpr::Create(
+              Context, Ty, TyBeginLoc, ctor, false, {Arg}, false, false, false,
+              false, CXXConstructExpr::ConstructionKind::CK_Complete,
+              SourceRange(LParenLoc, RParenLoc));
+        }
+      }
+    }
+#endif
     return BuildCXXFunctionalCastExpr(TInfo, LParenLoc, Arg, RParenLoc);
   }
 
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 18d0bfec01..1ef555c6df 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -6,6 +6,9 @@
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 //  This file implements the semantic support for HLSL.                      //
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
@@ -195,6 +198,7 @@ enum ArBasicKind {
   AR_OBJECT_VK_LITERAL,
   AR_OBJECT_VK_SPV_INTRINSIC_TYPE,
   AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID,
+  AR_OBJECT_VK_BUFFER_POINTER,
 #endif // ENABLE_SPIRV_CODEGEN
   // SPIRV change ends
 
@@ -385,7 +389,7 @@ enum ArBasicKind {
   (IS_BPROP_AINT(_Props) && GET_BPROP_BITS(_Props) != BPROP_BITS12)
 
 #define IS_BPROP_ENUM(_Props) (((_Props)&BPROP_ENUM) != 0)
-#define IS_BPROP_RAWBUFFER(_Props) (((_Props)&BPROP_RAWBUFFER) != 0)
+#define IS_BPROP_RAWBUFFER(_Props) (((_Props) & BPROP_RAWBUFFER) != 0)
 
 const UINT g_uBasicKindProps[] = {
     BPROP_PRIMITIVE | BPROP_BOOLEAN | BPROP_INTEGER | BPROP_NUMERIC |
@@ -514,14 +518,22 @@ const UINT g_uBasicKindProps[] = {
     BPROP_OBJECT | BPROP_RWBUFFER | BPROP_TEXTURE, // AR_OBJECT_RWTEXTURE3D
     BPROP_OBJECT | BPROP_RWBUFFER,                 // AR_OBJECT_RWBUFFER
 
-    BPROP_OBJECT | BPROP_RBUFFER | BPROP_RAWBUFFER,  // AR_OBJECT_BYTEADDRESS_BUFFER
-    BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_RWBYTEADDRESS_BUFFER
-    BPROP_OBJECT | BPROP_RBUFFER | BPROP_RAWBUFFER,  // AR_OBJECT_STRUCTURED_BUFFER
-    BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER
-    BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER_ALLOC
-    BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER_CONSUME
-    BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_APPEND_STRUCTURED_BUFFER
-    BPROP_OBJECT | BPROP_RWBUFFER | BPROP_RAWBUFFER, // AR_OBJECT_CONSUME_STRUCTURED_BUFFER
+    BPROP_OBJECT | BPROP_RBUFFER |
+        BPROP_RAWBUFFER, // AR_OBJECT_BYTEADDRESS_BUFFER
+    BPROP_OBJECT | BPROP_RWBUFFER |
+        BPROP_RAWBUFFER, // AR_OBJECT_RWBYTEADDRESS_BUFFER
+    BPROP_OBJECT | BPROP_RBUFFER |
+        BPROP_RAWBUFFER, // AR_OBJECT_STRUCTURED_BUFFER
+    BPROP_OBJECT | BPROP_RWBUFFER |
+        BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER
+    BPROP_OBJECT | BPROP_RWBUFFER |
+        BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER_ALLOC
+    BPROP_OBJECT | BPROP_RWBUFFER |
+        BPROP_RAWBUFFER, // AR_OBJECT_RWSTRUCTURED_BUFFER_CONSUME
+    BPROP_OBJECT | BPROP_RWBUFFER |
+        BPROP_RAWBUFFER, // AR_OBJECT_APPEND_STRUCTURED_BUFFER
+    BPROP_OBJECT | BPROP_RWBUFFER |
+        BPROP_RAWBUFFER, // AR_OBJECT_CONSUME_STRUCTURED_BUFFER
 
     BPROP_OBJECT | BPROP_RBUFFER, // AR_OBJECT_CONSTANT_BUFFER
     BPROP_OBJECT | BPROP_RBUFFER, // AR_OBJECT_TEXTURE_BUFFER
@@ -552,6 +564,7 @@ const UINT g_uBasicKindProps[] = {
     BPROP_OBJECT,                 // AR_OBJECT_VK_LITERAL,
     BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE use recordType
     BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID use recordType
+    BPROP_OBJECT, // AR_OBJECT_VK_BUFFER_POINTER use recordType
 #endif            // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1236,6 +1249,11 @@ static const ArBasicKind g_AnyOutputRecordCT[] = {
 static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT,
                                               AR_BASIC_UNKNOWN};
 
+#ifdef ENABLE_SPIRV_CODEGEN
+static const ArBasicKind g_VKBufferPointerCT[] = {AR_OBJECT_VK_BUFFER_POINTER,
+                                                  AR_BASIC_UNKNOWN};
+#endif
+
 // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value.
 const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
     g_NullCT,               // LICOMPTYPE_VOID
@@ -1291,6 +1309,9 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
     g_GroupNodeOutputRecordsCT,  // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS
     g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS
     g_DxHitObjectCT,             // LICOMPTYPE_HIT_OBJECT
+#ifdef ENABLE_SPIRV_CODEGEN
+    g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER
+#endif
 };
 static_assert(
     ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT,
@@ -1349,6 +1370,7 @@ static const ArBasicKind g_ArBasicKindsAsTypes[] = {
     AR_OBJECT_VK_SPIRV_TYPE, AR_OBJECT_VK_SPIRV_OPAQUE_TYPE,
     AR_OBJECT_VK_INTEGRAL_CONSTANT, AR_OBJECT_VK_LITERAL,
     AR_OBJECT_VK_SPV_INTRINSIC_TYPE, AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID,
+    AR_OBJECT_VK_BUFFER_POINTER,
 #endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1455,6 +1477,7 @@ static const uint8_t g_ArBasicKindsTemplateCount[] = {
     1, // AR_OBJECT_VK_LITERAL,
     1, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE
     1, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID
+    2, // AR_OBJECT_VK_BUFFER_POINTER
 #endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1603,6 +1626,7 @@ static const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = {
     {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_LITERAL,
     {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE
     {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID
+    {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_BUFFER_POINTER
 #endif                           // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1767,6 +1791,7 @@ static const char *g_ArBasicTypeNames[] = {
     "Literal",
     "ext_type",
     "ext_result_id",
+    "BufferPointer",
 #endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -2985,6 +3010,7 @@ class HLSLExternalSource : public ExternalSemaSource {
 
   ClassTemplateDecl *m_vkIntegralConstantTemplateDecl;
   ClassTemplateDecl *m_vkLiteralTemplateDecl;
+  ClassTemplateDecl *m_vkBufferPointerTemplateDecl;
 
   // Declarations for Work Graph Output Record types
   ClassTemplateDecl *m_GroupNodeOutputRecordsTemplateDecl;
@@ -3490,6 +3516,25 @@ class HLSLExternalSource : public ExternalSemaSource {
         templateTypeParmDecls.push_back(templateTypeParmDecl);
         continue;
       }
+      if (pArgs[i].uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) {
+        if (TInfo == nullptr) {
+          TInfo = m_sema->getASTContext().CreateTypeSourceInfo(
+              m_context->UnsignedIntTy, 0);
+        }
+        IdentifierInfo *idT = &context.Idents.get("T");
+        IdentifierInfo *idA = &context.Idents.get("A");
+        TemplateTypeParmDecl *templateTypeParmDecl =
+            TemplateTypeParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, 0,
+                                         0, idT, TypenameTrue,
+                                         ParameterPackFalse);
+        NonTypeTemplateParmDecl *nonTypeTemplateParmDecl =
+            NonTypeTemplateParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc,
+                                            0, 1, idA, context.UnsignedIntTy,
+                                            ParameterPackFalse, TInfo);
+        templateTypeParmDecl->setDefaultArgument(TInfo);
+        templateTypeParmDecls.push_back(templateTypeParmDecl);
+        templateTypeParmDecls.push_back(nonTypeTemplateParmDecl);
+      }
     }
     return templateTypeParmDecls;
   }
@@ -3558,6 +3603,21 @@ class HLSLExternalSource : public ExternalSemaSource {
       case LICOMPTYPE_HIT_OBJECT:
         paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT));
         break;
+#ifdef ENABLE_SPIRV_CODEGEN
+      case LICOMPTYPE_VK_BUFFER_POINTER: {
+        const ArBasicKind *match =
+            std::find(g_ArBasicKindsAsTypes,
+                      &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)],
+                      AR_OBJECT_VK_BUFFER_POINTER);
+        DXASSERT(match !=
+                     &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)],
+                 "otherwise can't find constant in basic kinds");
+        size_t index = match - g_ArBasicKindsAsTypes;
+        paramTypes.push_back(
+            m_sema->getASTContext().getTypeDeclType(m_objectTypeDecls[index]));
+        break;
+      }
+#endif
       default:
         DXASSERT(false, "Argument type of intrinsic function is not "
                         "supported");
@@ -3936,6 +3996,12 @@ class HLSLExternalSource : public ExternalSemaSource {
         recordDecl = DeclareTemplateTypeWithHandleInDeclContext(
             *m_context, m_vkNSDecl, typeName, 1, nullptr);
         recordDecl->setImplicit(true);
+      } else if (kind == AR_OBJECT_VK_BUFFER_POINTER) {
+        if (!m_vkNSDecl)
+          continue;
+        recordDecl = DeclareVkBufferPointerType(*m_context, m_vkNSDecl);
+        recordDecl->setImplicit(true);
+        m_vkBufferPointerTemplateDecl = recordDecl->getDescribedClassTemplate();
       }
 #endif
       else if (templateArgCount == 0) {
@@ -4048,7 +4114,8 @@ class HLSLExternalSource : public ExternalSemaSource {
   HLSLExternalSource()
       : m_matrixTemplateDecl(nullptr), m_vectorTemplateDecl(nullptr),
         m_vkIntegralConstantTemplateDecl(nullptr),
-        m_vkLiteralTemplateDecl(nullptr), m_hlslNSDecl(nullptr),
+        m_vkLiteralTemplateDecl(nullptr),
+        m_vkBufferPointerTemplateDecl(nullptr), m_hlslNSDecl(nullptr),
         m_vkNSDecl(nullptr), m_dxNSDecl(nullptr), m_context(nullptr),
         m_sema(nullptr), m_hlslStringTypedef(nullptr) {
     memset(m_matrixTypes, 0, sizeof(m_matrixTypes));
@@ -4806,7 +4873,11 @@ class HLSLExternalSource : public ExternalSemaSource {
     case AR_OBJECT_NODE_OUTPUT_ARRAY:
     case AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY:
     case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS:
-    case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: {
+    case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS:
+#ifdef ENABLE_SPIRV_CODEGEN
+    case AR_OBJECT_VK_BUFFER_POINTER:
+#endif
+    {
       const ArBasicKind *match = std::find(
           g_ArBasicKindsAsTypes,
           &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind);
@@ -5322,6 +5393,10 @@ class HLSLExternalSource : public ExternalSemaSource {
               << type << GetMatrixOrVectorElementType(type);
         }
         return valid;
+#ifdef ENABLE_SPIRV_CODEGEN
+      } else if (hlsl::IsVKBufferPointerType(qt)) {
+        return true;
+#endif
       } else if (qt->isStructureOrClassType()) {
         const RecordType *recordType = qt->getAs<RecordType>();
         objectKind = ClassifyRecordType(recordType);
@@ -5466,7 +5541,7 @@ class HLSLExternalSource : public ExternalSemaSource {
         m_sema->RequireCompleteType(argSrcLoc, argType,
                                     diag::err_typecheck_decl_incomplete_type);
 
-        if (containsLongVector(argType)) {
+        if (ContainsLongVector(argType)) {
           const unsigned ConstantBuffersOrTextureBuffersIdx = 0;
           m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector)
               << ConstantBuffersOrTextureBuffersIdx;
@@ -5574,7 +5649,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl();
       if (Decl && !Decl->isCompleteDefinition())
         return true;
-      if (containsLongVector(arg.getAsType())) {
+      if (ContainsLongVector(arg.getAsType())) {
         const unsigned TessellationPatchesIDx = 1;
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
@@ -5593,7 +5668,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl();
       if (Decl && !Decl->isCompleteDefinition())
         return true;
-      if (containsLongVector(arg.getAsType())) {
+      if (ContainsLongVector(arg.getAsType())) {
         const unsigned GeometryStreamsIdx = 2;
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
@@ -6794,6 +6869,7 @@ bool HLSLExternalSource::MatchArguments(
   if (pIntrinsic->pArgs[0].qwUsage &&
       pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_TYPE &&
       pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION &&
+      pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION_2 &&
       pIntrinsic->pArgs[0].uComponentTypeId !=
           INTRIN_COMPTYPE_FROM_NODEOUTPUT) {
     CAB(pIntrinsic->pArgs[0].uTemplateId < MaxIntrinsicArgs, 0);
@@ -6834,7 +6910,8 @@ bool HLSLExternalSource::MatchArguments(
 
     // Check template.
     if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE ||
-        pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION) {
+        pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION ||
+        pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) {
       continue; // Already verified that this is available.
     }
     if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) {
@@ -7003,6 +7080,14 @@ bool HLSLExternalSource::MatchArguments(
       } else {
         pNewType = functionTemplateTypeArg;
       }
+    } else if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) {
+      if (i == 0 &&
+          (builtinOp == hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast ||
+           builtinOp == hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast)) {
+        pNewType = Args[0]->getType();
+      } else {
+        badArgIdx = std::min(badArgIdx, i);
+      }
     } else if (pArgument->uLegalComponentTypes ==
                LICOMPTYPE_USER_DEFINED_TYPE) {
       if (objectElement.isNull()) {
@@ -9689,6 +9774,13 @@ bool HLSLExternalSource::CanConvert(SourceLocation loc, Expr *sourceExpr,
     return false;
   }
 
+#ifdef ENABLE_SPIRV_CODEGEN
+  // Cast vk::BufferPointer to pointer address.
+  if (SourceInfo.EltKind == AR_OBJECT_VK_BUFFER_POINTER) {
+    return TargetInfo.EltKind == AR_BASIC_UINT64;
+  }
+#endif
+
   // Cast cbuffer to its result value.
   if ((SourceInfo.EltKind == AR_OBJECT_CONSTANT_BUFFER ||
        SourceInfo.EltKind == AR_OBJECT_TEXTURE_BUFFER) &&
@@ -11537,6 +11629,32 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) {
   return false;
 }
 
+#ifdef ENABLE_SPIRV_CODEGEN
+static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE,
+                                     bool isStatic) {
+  const Expr *argExpr = CE->getArg(0);
+  QualType srcType = argExpr->getType();
+  QualType destType = CE->getType();
+  QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType);
+  QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType);
+
+  if (isStatic && srcTypeArg != destTypeArg &&
+      !S.IsDerivedFrom(srcTypeArg, destTypeArg)) {
+    S.Diags.Report(CE->getExprLoc(),
+                   diag::err_hlsl_vk_static_pointer_cast_type);
+    return true;
+  }
+
+  if (hlsl::GetVKBufferPointerAlignment(destType) >
+      hlsl::GetVKBufferPointerAlignment(srcType)) {
+    S.Diags.Report(CE->getExprLoc(), diag::err_hlsl_vk_pointer_cast_alignment);
+    return true;
+  }
+
+  return false;
+}
+#endif
+
 // Check HLSL call constraints, not fatal to creating the AST.
 void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                                  const FunctionProtoType *Proto) {
@@ -11555,6 +11673,14 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
   case hlsl::IntrinsicOp::IOP_Barrier:
     CheckBarrierCall(*this, FDecl, TheCall);
     break;
+#ifdef ENABLE_SPIRV_CODEGEN
+  case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast:
+    CheckVKBufferPointerCast(*this, FDecl, TheCall, false);
+    break;
+  case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast:
+    CheckVKBufferPointerCast(*this, FDecl, TheCall, true);
+    break;
+#endif
   default:
     break;
   }
@@ -12431,14 +12557,14 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType,
   return false;
 }
 
-bool hlsl::containsLongVector(QualType qt) {
-  if (qt.isNull() || qt->isDependentType())
+bool hlsl::ContainsLongVector(QualType QT) {
+  if (QT.isNull() || QT->isDependentType())
     return false;
 
-  while (const ArrayType *Arr = qt->getAsArrayTypeUnsafe())
-    qt = Arr->getElementType();
+  while (const ArrayType *Arr = QT->getAsArrayTypeUnsafe())
+    QT = Arr->getElementType();
 
-  if (CXXRecordDecl *Decl = qt->getAsCXXRecordDecl()) {
+  if (CXXRecordDecl *Decl = QT->getAsCXXRecordDecl()) {
     if (!Decl->isCompleteDefinition())
       return false;
     return Decl->hasHLSLLongVector();
@@ -13805,6 +13931,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
         A.getRange(), S.Context, A.getAttributeSpellingListIndex());
     break;
   // SPIRV Change Starts
+  case AttributeList::AT_VKAliasedPointer: {
+    declAttr = ::new (S.Context) VKAliasedPointerAttr(
+        A.getRange(), S.Context, A.getAttributeSpellingListIndex());
+  } break;
   case AttributeList::AT_VKDecorateIdExt: {
     if (A.getNumArgs() == 0 || !A.getArg(0).is<clang::Expr *>()) {
       Handled = false;
@@ -15075,8 +15205,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
   }
 
   // Disallow long vecs from $Global cbuffers.
-  if (isGlobal && !isStatic && !isGroupShared &&
-      !IS_BASIC_RAWBUFFER(basicKind)) {
+  if (isGlobal && !isStatic && !isGroupShared && !IS_BASIC_OBJECT(basicKind)) {
     // Suppress actual emitting of errors for incompletable types here
     // They are redundant to those produced in ActOnUninitializedDecl.
     struct SilentDiagnoser : public TypeDiagnoser {
@@ -15084,7 +15213,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
       virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {}
     } SD;
     RequireCompleteType(D.getLocStart(), qt, SD);
-    if (containsLongVector(qt)) {
+    if (ContainsLongVector(qt)) {
       unsigned CbuffersOrTbuffersIdx = 4;
       Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
           << CbuffersOrTbuffersIdx;
@@ -15982,7 +16111,7 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) {
 
 // Verify that user-defined intrinsic struct args contain no long vectors
 static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) {
-  if (containsLongVector(Arg->getType())) {
+  if (ContainsLongVector(Arg->getType())) {
     const unsigned UserDefinedStructParameterIdx = 5;
     S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector)
         << UserDefinedStructParameterIdx;
@@ -16725,14 +16854,14 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) {
   // Would be nice to check for resources here as they crash the compiler now.
   // See issue #7186.
   for (const auto *param : FD->params()) {
-    if (containsLongVector(param->getType())) {
+    if (ContainsLongVector(param->getType())) {
       const unsigned EntryFunctionParametersIdx = 6;
       S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector)
           << EntryFunctionParametersIdx;
     }
   }
 
-  if (containsLongVector(FD->getReturnType())) {
+  if (ContainsLongVector(FD->getReturnType())) {
     const unsigned EntryFunctionReturnIdx = 7;
     S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector)
         << EntryFunctionReturnIdx;
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index c562ee8d52..abca7cbf86 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -710,14 +710,14 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
         }
       }
       for (const auto *param : pPatchFnDecl->params())
-        if (containsLongVector(param->getType())) {
+        if (ContainsLongVector(param->getType())) {
           const unsigned PatchConstantFunctionParametersIdx = 8;
           self->Diag(param->getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
               << PatchConstantFunctionParametersIdx;
         }
 
-      if (containsLongVector(pPatchFnDecl->getReturnType())) {
+      if (ContainsLongVector(pPatchFnDecl->getReturnType())) {
         const unsigned PatchConstantFunctionReturnIdx = 9;
         self->Diag(pPatchFnDecl->getLocation(),
                    diag::err_hlsl_unsupported_long_vector)
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl
index b1e3b92f79..5305ee495b 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl
@@ -3,9 +3,9 @@
 // RUN: %dxc -DTYPE=uint64_t -DNUM=2 -T vs_6_9 %s | FileCheck %s
 // RUN: %dxc -DTYPE=double   -DNUM=2 -T vs_6_9 %s | FileCheck %s
 
-// RUN: %dxc -DTYPE=float    -DNUM=6   -T vs_6_9 %s | FileCheck %s
-// RUN: %dxc -DTYPE=bool     -DNUM=13  -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1
-// RUN: %dxc -DTYPE=uint64_t -DNUM=24  -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=float    -DNUM=6  -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool     -DNUM=13 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t -DNUM=24 -T vs_6_9 %s | FileCheck %s
 // RUN: %dxc -DTYPE=double   -DNUM=32 -T vs_6_9 %s | FileCheck %s
 
 ///////////////////////////////////////////////////////////////////////
@@ -15,16 +15,17 @@
 
 // CHECK: %dx.types.ResRet.[[VTY:v[0-9]*[a-z][0-9][0-9]]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]>, i32 }
 
-  ByteAddressBuffer RoByBuf : register(t1);
+ByteAddressBuffer RoByBuf : register(t1);
 RWByteAddressBuffer RwByBuf : register(u1);
 
-StructuredBuffer< vector<TYPE, NUM> > RoStBuf : register(t2);
-RWStructuredBuffer< vector<TYPE, NUM>  > RwStBuf : register(u2);
+StructuredBuffer<vector<TYPE, NUM> > RoStBuf : register(t2);
+RWStructuredBuffer<vector<TYPE, NUM> > RwStBuf : register(u2);
 
 ConsumeStructuredBuffer<vector<TYPE, NUM> > CnStBuf : register(u4);
 AppendStructuredBuffer<vector<TYPE, NUM> > ApStBuf  : register(u5);
 
 // CHECK-LABEL: define void @main
+[shader("vertex")]
 void main(uint ix[2] : IX) {
   // ByteAddressBuffer Tests
 
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl
index e6a5def3b6..0a115bd709 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl
@@ -1,5 +1,5 @@
-// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
-// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float    -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float    -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
 // RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int      -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
 // RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint     -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
 // RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double   -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL,NOINT
@@ -24,12 +24,12 @@
 // CHECK-DAG: %dx.types.ResRet.[[STY:[a-z][0-9]*]] = type { [[STYPE:[a-z0-9_]*]]
 // CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
 
-export void assignments(inout vector<TYPE, NUM> things[11], TYPE scales[10]);
-export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11];
-export vector<TYPE, NUM> scarithmetic(vector<TYPE, NUM> things[11], TYPE scales[10])[11];
-export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[11])[10];
-export vector<TYPE, NUM> index(vector<TYPE, NUM> things[11], int i, TYPE val)[11];
-export void bittwiddlers(inout vector<TYPE, NUM> things[13]);
+void assignments(inout vector<TYPE, NUM> things[11], TYPE scales[10]);
+vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11];
+vector<TYPE, NUM> scarithmetic(vector<TYPE, NUM> things[11], TYPE scales[10])[11];
+vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[11])[10];
+vector<TYPE, NUM> index(vector<TYPE, NUM> things[11], int i)[11];
+void bittwiddlers(inout vector<TYPE, NUM> things[13]);
 
 struct Viface {
   vector<TYPE, NUM> values[11];
@@ -43,20 +43,17 @@ struct Liface {
   vector<bool, NUM> values[10];
 };
 
-struct Biface {
+struct Binface {
   vector<TYPE, NUM> values[13];
 };
 
-// Requires vector loading support. Enable when available.
 RWStructuredBuffer<Viface> Input : register(u11);
 RWStructuredBuffer<Viface> Output : register(u12);
 RWStructuredBuffer<Siface> Scales : register(u13);
 RWStructuredBuffer<Liface> Truths : register(u14);
-RWStructuredBuffer<Biface> Bits : register(u15);
+RWStructuredBuffer<Binface> Bits : register(u15);
 RWStructuredBuffer<vector<uint,13> > Offsets : register(u16);
 
-TYPE g_val;
-
 [shader("compute")]
 [numthreads(8,1,1)]
 // CHECK-LABEL: define void @main
@@ -95,7 +92,7 @@ void main(uint3 GID : SV_GroupThreadID) {
                                sizeof(vector<TYPE, NUM>)*12);
 
   // Assign scalar offsets to capture the expected values.
-  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> <i32 [[SOFF0:[0-9]*]], i32 [[SOFF1:[0-9]*]], i32 [[SOFF2:[0-9]*]], i32 [[SOFF3:[0-9]*]], i32 [[SOFF4:[0-9]*]], i32 [[SOFF5:[0-9]*]], i32 [[SOFF6:[0-9]*]], i32 [[SOFF7:[0-9]*]], i32 [[SOFF8:[0-9]*]], i32 [[SOFF9:[0-9]*]], i32 [[SOFF10:[0-9]*]], i32 [[SOFF11:[0-9]*]], i32 [[ALN:[0-9]*]]>
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> <i32 [[SOFF0:[0-9]*]], i32 [[SOFF1:[0-9]*]], i32 [[SOFF2:[0-9]*]], i32 [[SOFF3:[0-9]*]], i32 [[SOFF4:[0-9]*]], i32 [[SOFF5:[0-9]*]], i32 [[SOFF6:[0-9]*]], i32 [[SOFF7:[0-9]*]], i32 [[SOFF8:[0-9]*]], i32 [[SOFF9:[0-9]*]], i32 [[SOFF10:[0-9]*]], i32 [[ALN:[0-9]*]], i32 [[IALN:[0-9]*]]>
   Offsets[1] = vector<uint,13>(sizeof(TYPE)*0,
                                sizeof(TYPE)*1,
                                sizeof(TYPE)*2,
@@ -107,8 +104,8 @@ void main(uint3 GID : SV_GroupThreadID) {
                                sizeof(TYPE)*8,
                                sizeof(TYPE)*9,
                                sizeof(TYPE)*10,
-                               sizeof(TYPE)*11,
-                               sizeof(TYPE));// Effectively alignof.
+                               sizeof(TYPE),// Effectively alignof.
+                               sizeof(int));// Effectively integer alignof.
 
   // Assign boolean offsets to capture the expected values.
   // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 2, i32 0, <13 x i32> <i32 [[BOFF0:[0-9]*]], i32 [[BOFF1:[0-9]*]], i32 [[BOFF2:[0-9]*]], i32 [[BOFF3:[0-9]*]], i32 [[BOFF4:[0-9]*]], i32 [[BOFF5:[0-9]*]], i32 [[BOFF6:[0-9]*]], i32 [[BOFF7:[0-9]*]], i32 [[BOFF8:[0-9]*]], i32 [[BOFF9:[0-9]*]], i32 [[BOFF10:[0-9]*]], i32 [[BOFF11:[0-9]*]], i32 [[BOFF12:[0-9]*]]>
@@ -130,7 +127,7 @@ void main(uint3 GID : SV_GroupThreadID) {
   Output[OutIx+2].values = arithmetic(Input[InIx1+2].values);
   Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values);
   Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values);
-  Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5, g_val);
+  Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5);
 #ifdef INT
   bittwiddlers(Bits[InIx1+6].values);
 #endif
@@ -142,23 +139,23 @@ void assignments(inout vector<TYPE, NUM> things[11], TYPE scales[10]) {
 
   // CHECK: [[VcIx:%.*]] = add i32 [[InIx1]], 1
   // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]])
   // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]])
   // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]])
   // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]])
   // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]])
   // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]])
   // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]])
   // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]])
   // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]])
   // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
 
   // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1
@@ -247,27 +244,27 @@ vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11] {
   // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
   // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 2
   // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
   // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
   // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
   // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
   // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
   // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
   // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
   // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]])
   // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]])
   // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]])
   // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]])
   // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
 
   // NOINT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> <[[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000),.*}}>, [[vec0]]
@@ -342,19 +339,19 @@ vector<TYPE, NUM> scarithmetic(vector<TYPE, NUM> things[11], TYPE scales[10])[11
   // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
   // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 3
   // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
   // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
   // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
   // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
   // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
   // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
   // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
   // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
 
   // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3
@@ -429,35 +426,35 @@ vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequen
   // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4
   // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]]
   // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i32 [[IALN]])
   // CHECK: [[ivec0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i32 [[IALN]])
   // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i32 [[IALN]])
   // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i32 [[IALN]])
   // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i32 [[IALN]])
   // CHECK: [[ivec4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i32 [[IALN]])
   // CHECK: [[ivec5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
 
   // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 4
   // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
-  //CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 8)
-  //CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  //CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 8)
-  //CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  //CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 8)
-  //CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  //CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 8)
-  //CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  //CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 8)
-  //CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  //CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 8)
-  //CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  //CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 8)
-  //CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
+  // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
 
 
   // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec0]], zeroinitializer
@@ -523,7 +520,7 @@ vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequen
 static const int Ix = 2;
 
 // Test indexing operators
-vector<TYPE, NUM> index(vector<TYPE, NUM> things[11], int i, TYPE val)[11] {
+vector<TYPE, NUM> index(vector<TYPE, NUM> things[11], int i)[11] {
   vector<TYPE, NUM> res[11];
 
   // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5
@@ -532,47 +529,47 @@ vector<TYPE, NUM> index(vector<TYPE, NUM> things[11], int i, TYPE val)[11] {
   // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
 
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 0
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
   // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 1
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
   // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec1]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 2
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
   // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 3
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
   // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec3]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 4
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
   // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec4]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 5
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
   // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec5]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 6
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
   // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec6]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 7
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]])
   // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 8
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]])
   // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 9
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]])
   // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec9]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
   // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 10
-  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 8)
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]])
   // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
   // CHECK: store <[[NUM]] x [[TYPE]]> [[vec10]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
 
@@ -639,29 +636,29 @@ vector<TYPE, NUM> index(vector<TYPE, NUM> things[11], int i, TYPE val)[11] {
 void bittwiddlers(inout vector<TYPE, NUM> things[13]) {
   // INT: [[VcIx:%.*]] = add i32 [[InIx1]], 6
   // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]]
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]])
   // INT: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]])
   // INT: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]])
   // INT: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]])
   // INT: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]])
   // INT: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]])
   // INT: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]])
   // INT: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]])
   // INT: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]])
   // INT: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], i32 [[ALN]])
   // INT: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], i32 [[ALN]])
   // INT: [[vec11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
-  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], i32 8)
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], i32 [[ALN]])
   // INT: [[vec12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
 
   // INT: [[res0:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1
@@ -703,6 +700,20 @@ void bittwiddlers(inout vector<TYPE, NUM> things[13]) {
   // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]]
   things[10] >>= things[12];
 
-  // INT: ret void
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], <[[NUM]] x [[TYPE]]> [[vec11]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], <[[NUM]] x [[TYPE]]> [[vec12]], i32 [[ALN]])
+
+  // CHECK-LABEL: ret void
 }
 #endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl
new file mode 100644
index 0000000000..ca239a5b22
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl
@@ -0,0 +1,680 @@
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double   -DDBL %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Scalar variants to confirm they match.
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=double   -DDBL %s | FileCheck %s
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float16_t      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int16_t  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Linking tests.
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -Fo %t.1 %s
+// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL -Fo %t.2 %s
+// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DINT -enable-16bit-types -Fo %t.3 %s
+// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+
+// Test relevant operators on vec1s in a 6.9 compute shader to ensure they continue to be treated as scalars.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9]*]] = type { [[TYPE:[a-z0-9_]*]]
+// CHECK-DAG: %dx.types.ResRet.[[ITY:i32]] = type { i32
+
+#ifdef SCL
+#define VTYPE TYPE
+#else
+#define VTYPE vector<TYPE, 1>
+#endif
+
+void assignments(inout VTYPE things[11], TYPE scales[10]);
+VTYPE arithmetic(inout VTYPE things[11])[11];
+VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11];
+bool1 logic(bool1 truth[10], VTYPE consequences[11])[10];
+VTYPE index(VTYPE things[11], int i)[11];
+void bittwiddlers(inout VTYPE things[13]);
+
+struct Viface {
+  VTYPE values[11];
+};
+
+struct Siface {
+  TYPE values[10];
+};
+
+struct Liface {
+  bool1 values[10];
+};
+
+struct Binface {
+  VTYPE values[13];
+};
+
+RWStructuredBuffer<Viface> Input  : register(u11);
+RWStructuredBuffer<Viface> Output : register(u12);
+RWStructuredBuffer<Siface> Scales : register(u13);
+RWStructuredBuffer<Liface> Truths : register(u14);
+RWStructuredBuffer<Binface> Bits  : register(u15);
+RWStructuredBuffer<vector<uint,13> > Offsets : register(u16);
+
+[shader("compute")]
+[numthreads(8,1,1)]
+// CHECK-LABEL: define void @main
+void main(uint3 GID : SV_GroupThreadID) {
+
+  // CHECK-DAG: [[Input:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11
+  // CHECK-DAG: [[Output:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12
+  // CHECK-DAG: [[Scales:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13
+  // CHECK-DAG: [[Truths:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14
+  // INT-DAG: [[Bits:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15
+
+  // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
+  // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
+  // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2)
+
+  uint InIx1 = GID[0];
+  uint InIx2 = GID[1];
+  uint OutIx = GID[2];
+
+  // Assign vector offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> <i32 [[OFF0:[0-9]*]], i32 [[OFF1:[0-9]*]], i32 [[OFF2:[0-9]*]], i32 [[OFF3:[0-9]*]], i32 [[OFF4:[0-9]*]], i32 [[OFF5:[0-9]*]], i32 [[OFF6:[0-9]*]], i32 [[OFF7:[0-9]*]], i32 [[OFF8:[0-9]*]], i32 [[OFF9:[0-9]*]], i32 [[OFF10:[0-9]*]], i32 [[OFF11:[0-9]*]], i32 [[OFF12:[0-9]*]]>
+  Offsets[0] = vector<uint,13>(sizeof(TYPE)*0,
+                               sizeof(TYPE)*1,
+                               sizeof(TYPE)*2,
+                               sizeof(TYPE)*3,
+                               sizeof(TYPE)*4,
+                               sizeof(TYPE)*5,
+                               sizeof(TYPE)*6,
+                               sizeof(TYPE)*7,
+                               sizeof(TYPE)*8,
+                               sizeof(TYPE)*9,
+                               sizeof(TYPE)*10,
+                               sizeof(TYPE)*11,
+                               sizeof(TYPE)*12);
+
+  // Assign boolean offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> <i32 [[BOFF0:[0-9]*]], i32 [[BOFF1:[0-9]*]], i32 [[BOFF2:[0-9]*]], i32 [[BOFF3:[0-9]*]], i32 [[BOFF4:[0-9]*]], i32 [[BOFF5:[0-9]*]], i32 [[BOFF6:[0-9]*]], i32 [[BOFF7:[0-9]*]], i32 [[BOFF8:[0-9]*]], i32 [[BOFF9:[0-9]*]], i32 [[BOFF10:[0-9]*]], i32 [[ALN:[0-9]*]], i32 [[IALN:[0-9]*]]>
+  Offsets[1] = vector<uint,13>(sizeof(int)*0,
+                               sizeof(int)*1,
+                               sizeof(int)*2,
+                               sizeof(int)*3,
+                               sizeof(int)*4,
+                               sizeof(int)*5,
+                               sizeof(int)*6,
+                               sizeof(int)*7,
+                               sizeof(int)*8,
+                               sizeof(int)*9,
+                               sizeof(int)*10,
+                               sizeof(TYPE),// Effectively alignof.
+                               sizeof(int));// Effectively integer alignof.
+
+  assignments(Input[InIx1+1].values, Scales[InIx2+1].values);
+  Output[OutIx+2].values = arithmetic(Input[InIx1+2].values);
+  Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values);
+  Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values);
+  Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5);
+#ifdef INT
+  bittwiddlers(Bits[InIx1+6].values);
+#endif
+}
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+void assignments(inout VTYPE things[11], TYPE scales[10]) {
+
+  // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 1
+
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1
+  // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // Nothing to check. Just a copy over.
+  things[0] = scales[0];
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]]{{( nsw)?}} [[TYPE]] [[val5]], [[val1]]
+  things[1] += things[5];
+
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]]{{( nsw)?}} [[TYPE]] [[val2]], [[val6]]
+  things[2] -= things[6];
+
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]]{{( nsw)?}} [[TYPE]] [[val7]], [[val3]]
+  things[3] *= things[7];
+
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]]{{( nsw)?}} [[TYPE]] [[val4]], [[val8]]
+  things[4] /= things[8];
+
+#ifdef DBL
+  things[5] = 0; // Gotta give it something in any case for validation.
+#else
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[val5]], [[val9]]
+  things[5] %= things[9];
+#endif
+
+  // CHECK: [[res6:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl1]], [[val6]]
+  things[6] += scales[1];
+
+  // CHECK: [[res7:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val7]], [[scl2]]
+  things[7] -= scales[2];
+
+  // CHECK: [[res8:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl3]], [[val8]]
+  things[8] *= scales[3];
+
+  // CHECK: [[res9:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val9]], [[scl4]]
+  things[9] /= scales[4];
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[scl0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[val10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+}
+
+// Test arithmetic operators.
+VTYPE arithmetic(inout VTYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 2
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  res[0] = +things[0];
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[res1:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]]
+  res[1] = -things[0];
+
+  // CHECK: [[res2:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val2]], [[val1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[res3:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val2]], [[val3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[res4:%.*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[val4]], [[val3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[res5:%.*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val4]], [[val5]]
+  res[5] = things[4] / things[5];
+
+#ifdef DBL
+  res[6] = 0; // Gotta give it something in any case for validation.
+#else
+  // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[res7:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  res[7] = things[7]++;
+
+  // CHECK: [[res8:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]]
+  res[8] = things[8]--;
+
+  // CHECK: [[res9:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val9]], [[POS1]]
+  res[9] = ++things[9];
+
+  // CHECK: [[res10:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val10]], [[NEG1]]
+  res[10] = --things[10];
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[val1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[val3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[val4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[val5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[val6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // Postincrement/decrements get the original value.
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[val7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[val8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11] {
+  VTYPE res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 3
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3
+  // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[res0:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl0]], [[val0]]
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[res1:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val1]], [[scl1]]
+  res[1] = things[1] - scales[1];
+
+  // CHECK: [[res2:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl2]], [[val2]]
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[res3:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val3]], [[scl3]]
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[res4:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl4]], [[val4]]
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[res5:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[scl5]], [[val5]]
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[res6:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl6]], [[val6]]
+  res[6] = scales[6] * things[6];
+  res[7] = res[8] = res[9] = res[10] = 0;
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  return res;
+}
+
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+bool1 logic(bool1 truth[10], VTYPE consequences[11])[10] {
+  bool1 res[10];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4
+  // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]]
+  // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+
+  // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 4
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[bres0:%.*]] = icmp eq i32 [[ival0]], 0
+  // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32
+  res[0] = !truth[0];
+
+  // CHECK: [[res1:%.*]] = or i32 [[ival2]], [[ival1]]
+  // CHECK: [[bres1:%.*]] = icmp ne i32 [[res1]], 0
+  // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bval2:%.*]] = icmp ne i32 [[ival2]], 0
+  // CHECK: [[bval3:%.*]] = icmp ne i32 [[ival3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]]
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[bval4:%.*]] = icmp ne i32 [[ival4]], 0
+  // CHECK: [[bval5:%.*]] = icmp ne i32 [[ival5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]]
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]]
+  // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]]
+  // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]]
+  // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]]
+  // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]]
+  // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]]
+  // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], i32 [[res0]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], i32 [[res1]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], i32 [[res2]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], i32 [[res3]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], i32 [[res4]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], i32 [[res5]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], i32 [[res6]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], i32 [[res7]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], i32 [[res8]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], i32 [[res9]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+VTYPE index(VTYPE things[11], int i)[11] {
+  VTYPE res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 5
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1:%.*]], i32 0, i32 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 1
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val1]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 2
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 3
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val3]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 4
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val4]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 5
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val5]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 6
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val6]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 7
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 8
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 9
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val9]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 10
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val10]], [[TYPE]]* [[adr]], align [[ALN]]
+
+  // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5
+
+  // CHECK: [[adr0:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2:%.*]], i32 0, i32 0
+  // CHECK: store [[TYPE]] {{(0|0\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]], align [[ALN]]
+  res[0] = 0;
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 [[Ix]]
+  // CHECK: store [[TYPE]] [[POS1]], [[TYPE]]* [[adr]]
+  res[i] = 1;
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 2
+  // CHECK: store [[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]], [[TYPE]]* [[adr]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 3
+  // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]]
+  res[3] = things[0];
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 [[Ix]]
+  // CHECK: [[vali:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 4
+  // CHECK: store [[TYPE]] [[vali]], [[TYPE]]* [[adr]]
+  res[4] = things[i];
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 5
+  // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]]
+  res[5] = things[Ix];
+
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 1
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[TWO]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[vali]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 6
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 7
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 8
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 9
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 10
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  return res;
+}
+
+#ifdef INT
+// Test bit twiddling operators.
+void bittwiddlers(inout VTYPE things[13]) {
+  // INT: [[ValIx:%.*]] = add i32 [[InIx1]], 6
+  // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]]
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // INT: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // INT: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // INT: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // INT: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // INT: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // INT: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // INT: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // INT: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // INT: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // INT: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], i8 1, i32 [[ALN]])
+  // INT: [[val11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], i8 1, i32 [[ALN]])
+  // INT: [[val12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // INT: [[res0:%[0-9]*]] = xor [[TYPE]] [[val1]], -1
+  things[0] = ~things[1];
+
+  // INT: [[res1:%[0-9]*]] = or [[TYPE]] [[val3]], [[val2]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[res2:%[0-9]*]] = and [[TYPE]] [[val4]], [[val3]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[res3:%[0-9]*]] = xor [[TYPE]] [[val5]], [[val4]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[shv6:%[0-9]*]] = and [[TYPE]] [[val6]]
+  // INT: [[res4:%[0-9]*]] = shl [[TYPE]] [[val5]], [[shv6]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[shv7:%[0-9]*]] = and [[TYPE]] [[val7]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr [[TYPE]] [[val6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr [[TYPE]] [[val6]], [[shv7]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[res6:%[0-9]*]] = or [[TYPE]] [[val8]], [[val6]]
+  things[6] |= things[8];
+
+  // INT: [[res7:%[0-9]*]] = and [[TYPE]] [[val9]], [[val7]]
+  things[7] &= things[9];
+
+  // INT: [[res8:%[0-9]*]] = xor [[TYPE]] [[val10]], [[val8]]
+  things[8] ^= things[10];
+
+  // INT: [[shv11:%[0-9]*]] = and [[TYPE]] [[val11]]
+  // INT: [[res9:%[0-9]*]] = shl [[TYPE]] [[val9]], [[shv11]]
+  things[9] <<= things[11];
+
+  // INT: [[shv12:%[0-9]*]] = and [[TYPE]] [[val12]]
+  // UNSIG: [[res10:%[0-9]*]] = lshr [[TYPE]] [[val10]], [[shv12]]
+  // SIG: [[res10:%[0-9]*]] = ashr [[TYPE]] [[val10]], [[shv12]]
+  things[10] >>= things[12];
+
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], [[TYPE]] [[val11]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], [[TYPE]] [[val12]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  // CHECK-LABEL: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
index c366261406..44c9be17d4 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
@@ -1,51 +1,23 @@
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float1          %s | FileCheck %s --check-prefixes=CHECK,NODBL
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int1      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double1   -DDBL %s | FileCheck %s --check-prefixes=CHECK
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t1      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t1  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double   -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
 
-// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors.
+// Test relevant operators on vec1s in 6.9 to ensure they continue to be treated as scalars.
+
+#define VTYPE vector<TYPE, 1>
 
 // Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
 // CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[ELTY:[a-z0-9_]*]]
 // CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:.*]] }
-RWStructuredBuffer<TYPE> buf;
-
-export void assignments(inout TYPE things[10], TYPE scales[10]);
-export TYPE arithmetic(inout TYPE things[11])[11];
-export bool logic(bool truth[10], TYPE consequences[10])[10];
-export TYPE index(TYPE things[10], int i, TYPE val)[10];
-
-struct Interface {
-  TYPE assigned[10];
-  TYPE arithmeticked[11];
-  bool logicked[10];
-  TYPE indexed[10];
-  TYPE scales[10];
-};
-
-#if 0
-// Requires vector loading support. Enable when available.
-RWStructuredBuffer<Interface> Input;
-RWStructuredBuffer<Interface> Output;
-
-TYPE g_val;
-
-[shader("compute")]
-[numthreads(8,1,1)]
-void main(uint GI : SV_GroupIndex) {
-  assignments(Output[GI].assigned, Input[GI].scales);
-  Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked);
-  Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned);
-  Output[GI].indexed = index(Input[GI].indexed, GI, g_val);
-}
-#endif
+RWStructuredBuffer<VTYPE> buf;
 
 // A mixed-type overload to test overload resolution and mingle different vector element types in ops
 // Test assignment operators.
 // CHECK-LABEL: define void @"\01?assignments
-export void assignments(inout TYPE things[10]) {
+export void assignments(inout VTYPE things[10]) {
 
   // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{8|4|2}})
   // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0
@@ -111,8 +83,8 @@ export void assignments(inout TYPE things[10]) {
 
 // Test arithmetic operators.
 // CHECK-LABEL: define void @"\01?arithmetic
-export TYPE arithmetic(inout TYPE things[11])[11] {
-  TYPE res[11];
+export VTYPE arithmetic(inout VTYPE things[11])[11] {
+  VTYPE res[11];
   // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
   // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
   // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[res0]], i32 0
@@ -226,7 +198,7 @@ export TYPE arithmetic(inout TYPE things[11])[11] {
 // Test logic operators.
 // Only permissable in pre-HLSL2021
 // CHECK-LABEL: define void @"\01?logic
-export bool logic(bool truth[10], TYPE consequences[10])[10] {
+export bool logic(bool truth[10], VTYPE consequences[10])[10] {
   bool res[10];
   // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
   // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]]
@@ -332,9 +304,9 @@ static const int Ix = 2;
 
 // Test indexing operators
 // CHECK-LABEL: define void @"\01?index
-export TYPE index(TYPE things[10], int i)[10] {
+export VTYPE index(VTYPE things[10], int i)[10] {
   // CHECK: [[res:%.*]] = alloca [10 x [[ELTY]]]
-  TYPE res[10];
+  VTYPE res[10];
 
   // CHECK: [[res0:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 0
   // CHECK: store [[ELTY]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[ELTY]]* [[res0]]
@@ -375,7 +347,7 @@ export TYPE index(TYPE things[10], int i)[10] {
 #ifdef INT
 // Test bit twiddling operators.
 // INT-LABEL: define void @"\01?bittwiddlers
-export void bittwiddlers(inout TYPE things[13]) {
+export void bittwiddlers(inout VTYPE things[13]) {
   // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 1
   // INT: [[ld1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
   // INT: [[val1:%[0-9]*]] = extractelement [[TYPE]] [[ld1]], i32 0
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll
new file mode 100644
index 0000000000..f9a9b3d677
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll
@@ -0,0 +1,478 @@
+; RUN: %dxopt %s -hlsl-passes-resume -hlsl-dxil-scalarize-vector-load-stores -S | FileCheck %s
+
+; Verify that scalarize vector load stores pass will convert raw buffer vector operations
+; into the equivalent collection of scalar load store calls.
+; Sourced from buffer-load-stors-sm69.hlsl.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.v17f32 = type { <17 x float>, i32 }
+%struct.ByteAddressBuffer = type { i32 }
+%"class.StructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+%struct.RWByteAddressBuffer = type { i32 }
+%"class.RWStructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+%"class.ConsumeStructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+%"class.AppendStructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+
+@"\01?RoByBuf@@3UByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4
+@"\01?RwByBuf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4
+@"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+@"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+@"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+@"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+
+define void @main() {
+bb:
+  %tmp = load %dx.types.Handle, %dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A", align 4
+  %tmp2 = load %dx.types.Handle, %dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp3 = load %dx.types.Handle, %dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp4 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp5 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A", align 4
+  %tmp6 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %tmp7 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp5)
+  %tmp8 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp9 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i32 4)
+  %tmp10 = extractvalue %dx.types.ResRet.v17f32 %tmp9, 0
+  %tmp11 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp1)
+  %tmp12 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 11, i32 0 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix1]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix2]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix3]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix4]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp13 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i32 4)
+  %tmp14 = extractvalue %dx.types.ResRet.v17f32 %tmp13, 0
+  %tmp15 = fadd fast <17 x float> %tmp14, %tmp10
+
+  ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp15, i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp15, i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp15, i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp15, i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4)
+  ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16
+  ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp15, i64 4
+  ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp15, i64 5
+  ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp15, i64 6
+  ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp15, i64 7
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4)
+  ; CHECK: [[ix2:%.*]] = add i32 %80, 16
+  ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp15, i64 8
+  ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp15, i64 9
+  ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp15, i64 10
+  ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp15, i64 11
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4)
+  ; CHECK: [[ix3:%.*]] = add i32 %85, 16
+  ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp15, i64 12
+  ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp15, i64 13
+  ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp15, i64 14
+  ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp15, i64 15
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4)
+  ; CHECK: [[ix4:%.*]] = add i32 %90, 16
+  ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp15, i64 16
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, float [[val16]], float undef, float undef, float undef, i8 1, i32 4)
+  call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, <17 x float> %tmp15, i32 4)
+  %tmp16 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp4)
+  %tmp17 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp16, %dx.types.ResourceProperties { i32 4108, i32 68 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp18 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i32 4)
+  %tmp19 = extractvalue %dx.types.ResRet.v17f32 %tmp18, 0
+  %tmp20 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 1, i8 0, i32 undef)
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp21 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i32 4)
+  %tmp22 = extractvalue %dx.types.ResRet.v17f32 %tmp21, 0
+  %tmp23 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp)
+  %tmp24 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp23, %dx.types.ResourceProperties { i32 12, i32 68 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp25 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i32 4)
+  %tmp26 = extractvalue %dx.types.ResRet.v17f32 %tmp25, 0
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp27 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i32 4)
+  %tmp28 = extractvalue %dx.types.ResRet.v17f32 %tmp27, 0
+  %tmp29 = fadd fast <17 x float> %tmp22, %tmp19
+  %tmp30 = fadd fast <17 x float> %tmp29, %tmp26
+  %tmp31 = fadd fast <17 x float> %tmp30, %tmp28
+
+  ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp31, i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp31, i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp31, i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp31, i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp31, i64 4
+  ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp31, i64 5
+  ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp31, i64 6
+  ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp31, i64 7
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp31, i64 8
+  ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp31, i64 9
+  ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp31, i64 10
+  ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp31, i64 11
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp31, i64 12
+  ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp31, i64 13
+  ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp31, i64 14
+  ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp31, i64 15
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4)
+  ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp31, i64 16
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4)
+  call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, <17 x float> %tmp31, i32 4)
+  %tmp32 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp3)
+  %tmp33 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp32, %dx.types.ResourceProperties { i32 36876, i32 68 })
+  %tmp34 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp33, i8 -1)
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp35 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i32 4)
+  %tmp36 = extractvalue %dx.types.ResRet.v17f32 %tmp35, 0
+  %tmp37 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp2)
+  %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp37, %dx.types.ResourceProperties { i32 36876, i32 68 })
+  %tmp39 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp38, i8 1)
+
+  ; CHECK: [[val0:%.*]] = extractelement <17 x float> [[vec16]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <17 x float> [[vec16]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <17 x float> [[vec16]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <17 x float> [[vec16]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractelement <17 x float> [[vec16]], i64 4
+  ; CHECK: [[val5:%.*]] = extractelement <17 x float> [[vec16]], i64 5
+  ; CHECK: [[val6:%.*]] = extractelement <17 x float> [[vec16]], i64 6
+  ; CHECK: [[val7:%.*]] = extractelement <17 x float> [[vec16]], i64 7
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractelement <17 x float> [[vec16]], i64 8
+  ; CHECK: [[val9:%.*]] = extractelement <17 x float> [[vec16]], i64 9
+  ; CHECK: [[val10:%.*]] = extractelement <17 x float> [[vec16]], i64 10
+  ; CHECK: [[val11:%.*]] = extractelement <17 x float> [[vec16]], i64 11
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractelement <17 x float> [[vec16]], i64 12
+  ; CHECK: [[val13:%.*]] = extractelement <17 x float> [[vec16]], i64 13
+  ; CHECK: [[val14:%.*]] = extractelement <17 x float> [[vec16]], i64 14
+  ; CHECK: [[val15:%.*]] = extractelement <17 x float> [[vec16]], i64 15
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4)
+  ; CHECK: [[val16:%.*]] = extractelement <17 x float> [[vec16]], i64 16
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4)
+  call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, <17 x float> %tmp36, i32 4)
+  ret void
+}
+
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+declare %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32, %dx.types.Handle, i32, i32, i32) #1
+declare void @dx.op.rawBufferVectorStore.v17f32(i32, %dx.types.Handle, i32, i32, <17 x float>, i32) #2
+declare i32 @dx.op.bufferUpdateCounter(i32, %dx.types.Handle, i8) #2
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0
+declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind }
+
+!dx.version = !{!1}
+!dx.valver = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.typeAnnotations = !{!13}
+!dx.entryPoints = !{!17, !19}
+
+!1 = !{i32 1, i32 8}
+!2 = !{!"lib", i32 6, i32 8}
+!3 = !{!4, !8, null, null}
+!4 = !{!5, !6}
+!5 = !{i32 0, %struct.ByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A" to %struct.ByteAddressBuffer*), !"RoByBuf", i32 0, i32 1, i32 1, i32 11, i32 0, null}
+!6 = !{i32 1, %"class.StructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.StructuredBuffer<vector<float, 17> >"*), !"RoStBuf", i32 0, i32 2, i32 1, i32 12, i32 0, !7}
+!7 = !{i32 1, i32 68}
+!8 = !{!9, !10, !11, !12}
+!9 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"RwByBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!10 = !{i32 1, %"class.RWStructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.RWStructuredBuffer<vector<float, 17> >"*), !"RwStBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
+!11 = !{i32 2, %"class.ConsumeStructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.ConsumeStructuredBuffer<vector<float, 17> >"*), !"CnStBuf", i32 0, i32 4, i32 1, i32 12, i1 false, i1 true, i1 false, !7}
+!12 = !{i32 3, %"class.AppendStructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.AppendStructuredBuffer<vector<float, 17> >"*), !"ApStBuf", i32 0, i32 5, i32 1, i32 12, i1 false, i1 true, i1 false, !7}
+!13 = !{i32 1, void ()* @main, !14}
+!14 = !{!15}
+!15 = !{i32 0, !16, !16}
+!16 = !{}
+!17 = !{null, !"", null, !3, !18}
+!18 = !{i32 0, i64 8589934608}
+!19 = !{void ()* @main, !"main", !20, null, !24}
+!20 = !{!21, null, null}
+!21 = !{!22}
+!22 = !{i32 0, !"IX", i8 5, i8 0, !23, i8 0, i32 2, i8 1, i32 0, i8 0, null}
+!23 = !{i32 0, i32 1}
+!24 = !{i32 8, i32 1, i32 5, !25}
+!25 = !{i32 0}
diff --git a/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl
new file mode 100644
index 0000000000..fb9f6e0d76
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl
@@ -0,0 +1,41 @@
+// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fcgl  %s -spirv | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fspv-extension=SPV_KHR_16bit_storage -fcgl  %s -spirv | FileCheck %s --check-prefixes=CHECK,NOQUAD
+// RUN: not %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.0 -fcgl  %s -spirv 2>&1 | FileCheck %s --check-prefixes=ERROR
+
+// CHECK: ; Version: 1.3
+
+// QUAD: OpCapability QuadControlKHR
+// QUAD: OpExtension "SPV_KHR_quad_control"
+
+RWStructuredBuffer<float3> values;
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+  uint outIdx = (id.y * 8) + id.x;
+
+// CHECK:        [[val1:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}}
+// QUAD-NEXT:         {{%[0-9]+}} = OpGroupNonUniformQuadAnyKHR %bool [[val1]]
+
+// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_0
+// NOQUAD-NEXT:  [[or0:%[0-9]+]] = OpLogicalOr %bool [[val1]] [[inv0]]
+// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_1
+// NOQUAD-NEXT:  [[or1:%[0-9]+]] = OpLogicalOr %bool [[or0]] [[inv1]]
+// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_2
+// NOQUAD-NEXT:  [[or2:%[0-9]+]] = OpLogicalOr %bool [[or1]] [[inv2]]
+
+// ERROR: 27:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use
+    values[outIdx].x = QuadAny(outIdx % 4 == 0) ? 1.0 : 2.0;
+
+// CHECK:        [[val2:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}}
+// QUAD-NEXT:         {{%[0-9]+}} = OpGroupNonUniformQuadAllKHR %bool [[val2]]
+
+// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_0
+// NOQUAD-NEXT:  [[or0:%[0-9]+]] = OpLogicalAnd %bool [[val2]] [[inv0]]
+// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_1
+// NOQUAD-NEXT:  [[or1:%[0-9]+]] = OpLogicalAnd %bool [[or0]] [[inv1]]
+// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_2
+// NOQUAD-NEXT:  [[or2:%[0-9]+]] = OpLogicalAnd %bool [[or1]] [[inv2]]
+
+// ERROR: 40:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use
+    values[outIdx].y = QuadAll(outIdx % 2 == 0) ? 3.0 : 4.0;
+}
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl
new file mode 100644
index 0000000000..f0f5c54a16
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl
@@ -0,0 +1,28 @@
+// RUN: %dxc -spirv -E main -T cs_6_7 %s | FileCheck %s
+
+// Bug was causing alignment miss
+
+struct Content {
+  int a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+typedef vk::BufferPointer<BufferContent> BufferBuffer;
+
+RWStructuredBuffer<BufferBuffer> rwbuf;
+
+void foo(BufferContent bc) {
+  bc.Get().a = 1;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  foo(rwbuf[0].Get());
+}
+
+// CHECK: [[L0:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} %{{[_0-9A-Za-z]*}} Aligned 8
+// CHECK: [[L1:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} [[L0]] Aligned 8
+// CHECK: [[L2:%[_0-9A-Za-z]*]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[L1]] %int_0
+// CHECK: OpStore [[L2]] %int_1 Aligned 4
+
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl
new file mode 100644
index 0000000000..fc5b9edad0
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl
@@ -0,0 +1,72 @@
+// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s
+
+struct Globals_s
+{
+    float4 g_vSomeConstantA;
+    float4 g_vTestFloat4;
+    float4 g_vSomeConstantB;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+
+struct TestPushConstant_t
+{
+    Globals_p m_nBufferDeviceAddress;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+cbuffer cbuf {
+    [[vk::aliased_pointer]] Globals_p bp;
+}
+
+// CHECK: OpDecorate [[BP0:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: OpDecorate [[BP1:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: OpDecorate [[BP:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1
+// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK: [[V4C:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]]
+// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0
+// CHECK-DAG: [[I1:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 1
+// CHECK: [[GS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]]
+// CHECK: [[PGS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GS]]
+// CHECK: [[TT:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGS]]
+// CHECK: [[PTT:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[TT]]
+// CHECK: [[PFV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[V4FLOAT]]
+// CHECK: [[PPGS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGS]]
+// CHECK: [[PBV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+
+void f([[vk::aliased_pointer]] Globals_p bp) {
+}
+
+float4 MainPs(void) : SV_Target0
+{
+    float4 vTest = float4(1.0,0.0,0.0,0.0);
+    [[vk::aliased_pointer]] Globals_p bp0 = Globals_p(g_PushConstants.m_nBufferDeviceAddress);
+    [[vk::aliased_pointer]] Globals_p bp1 = Globals_p(g_PushConstants.m_nBufferDeviceAddress);
+    bp0.Get().g_vTestFloat4 = vTest;
+    f(bp0);
+    return bp1.Get().g_vTestFloat4; // Returns float4(1.0,0.0,0.0,0.0)
+}
+
+// CHECK: [[GP:%[_0-9A-Za-z]*]] = OpVariable [[PTT]] PushConstant
+// CHECK: [[VTEST:%[0-9A-Za-z]*]] = OpVariable [[PFV4FLOAT]] Function
+// CHECK: OpStore [[VTEST]] [[V4C]]
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad %_ptr_PhysicalStorageBuffer_Globals_s [[X1]]
+// CHECK: OpStore [[BP0]] [[X2]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]]
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[X3]]
+// CHECK: OpStore [[BP1]] [[X4]]
+// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[VTEST]]
+// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP0]] Aligned 16
+// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X6]] [[I1]]
+// CHECK: OpStore [[X7]] [[X5]] Aligned 16
+// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP1]] Aligned 16
+// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X8]] [[I1]]
+// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X9]] Aligned 16
+// CHECK: OpReturnValue [[X10]]
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl
new file mode 100644
index 0000000000..992d8b39fd
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl
@@ -0,0 +1,39 @@
+// RUN: %dxc -spirv -fcgl -T ps_6_0 %s | FileCheck %s
+
+struct S {
+  uint u;
+};
+
+typedef vk::BufferPointer<S> BP;
+
+struct PC {
+  BP bp;
+};
+
+[[vk::push_constant]] PC pc;
+
+// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0
+// CHECK: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0
+// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0
+// CHECK: [[S:%[_0-9A-Za-z]*]] = OpTypeStruct [[UINT]]
+// CHECK: [[PS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[S]]
+// CHECK: [[PU:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[UINT]]
+// CHECK: [[U1:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 1
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpVariable %{{[_0-9A-Za-z]*}} PushConstant
+
+void main()
+{
+// CHECK: [[IN:%[_0-9A-Za-z]*]] = OpVariable
+// CHECK: [[OUT:%[_0-9A-Za-z]*]] = OpVariable
+  uint u0, u1;
+
+// CHECK: [[X1:%[_0-9]+]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[PC]] [[I0]]
+// CHECK: [[X2:%[_0-9]+]] = OpLoad [[PS]] [[X1]] Aligned 4
+// CHECK: [[X3:%[_0-9]+]] = OpAccessChain [[PU]] [[X2]] [[I0]]
+// CHECK: [[X4:%[_0-9]+]] = OpLoad [[UINT]] [[IN]]
+// CHECK: [[X5:%[_0-9]+]] = OpAtomicExchange [[UINT]] [[X3]] [[U1]] [[U0]] [[X4]]
+// CHECK: OpStore [[OUT]] [[X5]]
+  InterlockedExchange(pc.bp.Get().u, u0, u1);
+}
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl
new file mode 100644
index 0000000000..86cf48c41e
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl
@@ -0,0 +1,19 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  float a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+
+[[vk::push_constant]]
+BufferContent buffer;
+
+[numthreads(1, 1, 1)]
+void main() {
+  float tmp = buffer.Get().a;
+  buffer.Get().a = tmp;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl
new file mode 100644
index 0000000000..09585a7664
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl
@@ -0,0 +1,19 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Globals_s {
+  float4 a;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+typedef vk::BufferPointer<Globals_p> Globals_pp;
+
+[[vk::push_constant]]
+Globals_pp bda;
+
+[numthreads(1, 1, 1)]
+void main() {
+  float4 r = bda.Get().Get().a;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl
new file mode 100644
index 0000000000..e803b5b754
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl
@@ -0,0 +1,19 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  uint a;
+};
+
+typedef vk::BufferPointer<uint> BufferContent;
+
+[[vk::push_constant]]
+BufferContent buffer;
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint data = buffer.Get();
+  buffer.Get() = data;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl
new file mode 100644
index 0000000000..1029aa7f2e
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl
@@ -0,0 +1,18 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  uint a;
+};
+
+typedef vk::BufferPointer<uint> BufferContent;
+
+[[vk::push_constant]]
+BufferContent buffer;
+
+[numthreads(1, 1, 1)]
+void main() {
+  buffer.Get() = 1;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl
new file mode 100644
index 0000000000..62bdb7f3cb
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl
@@ -0,0 +1,26 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  int a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+typedef vk::BufferPointer<BufferContent> BufferBuffer;
+
+//[[vk::push_constant]]
+//BufferContent buffer;
+
+RWStructuredBuffer<BufferBuffer> rwbuf;
+
+// Wrong type in the parameter.
+void foo(BufferContent bc) {
+  bc.Get().a = 1;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  foo(rwbuf[0]);
+}
+
+// CHECK: no matching function for call to 'foo'
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl
new file mode 100644
index 0000000000..a89b286edf
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl
@@ -0,0 +1,23 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  int a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+typedef vk::BufferPointer<BufferContent> BufferBuffer;
+
+RWStructuredBuffer<BufferContent> buf;
+
+void foo(const BufferContent bc) {
+  bc.Get().a = 1;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  static BufferContent bcs = buf[0];
+  static BufferBuffer bbs = (BufferContent)bcs;
+}
+
+// CHECK: cannot initialize a variable of type 'BufferPointer<BufferContent>' with an lvalue of type 'BufferPointer<Content>'
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl
new file mode 100644
index 0000000000..71fee1a795
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl
@@ -0,0 +1,101 @@
+// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s
+
+// CHECK: OpCapability PhysicalStorageBufferAddresses
+// CHECK: OpExtension "SPV_KHR_physical_storage_buffer"
+// CHECK: OpMemoryModel PhysicalStorageBuffer64 GLSL450
+// CHECK: OpEntryPoint Fragment [[MAIN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]]
+
+// Forward declaration
+typedef struct block_s block_t;
+typedef vk::BufferPointer<block_t, 32> block_p;
+
+struct block_s
+{
+      float4 x;
+      block_p next;
+};
+
+struct TestPushConstant_t
+{
+      block_p root;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+// CHECK: OpDecorate [[GP:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: OpDecorate [[COPY1:%[_0-9A-Za-z]*]] RestrictPointer
+// CHECK: OpDecorate [[COPY2:%[_0-9A-Za-z]*]] RestrictPointer
+// CHECK: OpMemberDecorate [[BLOCK:%[_0-9A-Za-z]*]] 1 Offset 16
+// CHECK: OpTypeForwardPointer [[PBLOCK:%[_0-9A-Za-z]*]] PhysicalStorageBuffer
+// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0
+// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1
+// CHECK: [[ULONG:%[_0-9A-Za-z]*]] = OpTypeInt 64 0
+// CHECK: [[UL0:%[_0-9A-Za-z]*]] = OpConstant [[ULONG]] 0
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F0]] [[F0]] [[F0]] [[F0]]
+// CHECK: [[BLOCK]] = OpTypeStruct [[V4FLOAT]] [[PBLOCK]]
+// CHECK: [[PBLOCK]] = OpTypePointer PhysicalStorageBuffer [[BLOCK]]
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PBLOCK]]
+// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]]
+// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]]
+// CHECK: [[PPBLOCK0:%[_0-9A-Za-z]*]] = OpTypePointer Function %_ptr_PhysicalStorageBuffer_block_s
+// CHECK: [[PPBLOCK1:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PBLOCK]]
+// CHECK: [[PPBLOCK2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[PBLOCK]]
+// CHECK: [[BOOL:%[_0-9A-Za-z]*]] = OpTypeBool
+// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant
+// CHECK: [[OUT]] = OpVariable [[PV4FLOAT1]] Output
+
+[numthreads(1,1,1)]
+float4 MainPs(void) : SV_Target0
+{
+  if (__has_feature(hlsl_vk_buffer_pointer)) {
+      [[vk::aliased_pointer]] block_p g_p =
+          vk::static_pointer_cast<block_t, 16>(g_PushConstants.root);
+      g_p = g_p.Get().next;
+      uint64_t addr = (uint64_t)g_p;
+      block_p copy1 = block_p(addr);
+      block_p copy2 = block_p(copy1);
+      if (addr == 0) // Null pointer test
+          return float4(0.0,0.0,0.0,0.0);
+      return g_p.Get().x;
+  }
+  return float4(0.0,0.0,0.0,0.0);
+}
+
+// CHECK: [[MAIN]] = OpFunction
+// CHECK-NEXT: OpLabel
+// CHECK-NEXT: [[RESULT:%[_0-9A-Za-z]*]] = OpFunctionCall [[V4FLOAT]] [[FUN:%[_0-9A-Za-z]*]]
+// CHECK: OpStore [[OUT]] [[RESULT]]
+// CHECK: OpFunctionEnd
+// CHECK: [[FUN]] = OpFunction [[V4FLOAT]]
+// CHECK: [[GP]] = OpVariable [[PPBLOCK0]] Function
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK1]] [[GPC]] [[S0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X1]]
+// CHECK: OpStore [[GP]] [[X2]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK2]] [[X3]] [[S1]]
+// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X4]] Aligned 8
+// CHECK: OpStore [[GP]] [[X5]]
+// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]]
+// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpConvertPtrToU [[ULONG]] [[X6]]
+// CHECK: OpStore [[ADDR:%[_0-9A-Za-z]*]] [[X7]]
+// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]]
+// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PBLOCK]] [[X8]]
+// CHECK: OpStore [[COPY1]] [[X9]]
+// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[COPY1]]
+// CHECK: OpStore [[COPY2]] [[X10]]
+// CHECK: [[X11:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]]
+// CHECK: [[X12:%[_0-9A-Za-z]*]] = OpIEqual %bool [[X11]] [[UL0]]
+// CHECK: OpBranchConditional [[X12]] [[IF_TRUE:%[_0-9A-Za-z]*]] [[IF_MERGE:%[_0-9A-Za-z]*]]
+// CHECK: [[IF_TRUE]] = OpLabel
+// CHECK: OpReturnValue [[CV4FLOAT]]
+// CHECK: [[IF_MERGE]] = OpLabel
+// CHECK: [[X13:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32
+// CHECK: [[X14:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X13]] [[S0]]
+// CHECK: [[X15:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X14]] Aligned 16
+// CHECK: OpReturnValue [[X15]]
+// CHECK: OpFunctionEnd
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl
new file mode 100644
index 0000000000..c7d6f0ed2b
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl
@@ -0,0 +1,48 @@
+// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s
+
+// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]]
+
+struct Globals_s
+{
+      float4 g_vSomeConstantA;
+      float4 g_vTestFloat4;
+      float4 g_vSomeConstantB;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+
+struct TestPushConstant_t
+{
+      Globals_p m_nBufferDeviceAddress;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0
+// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]]
+// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]]
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]]
+// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]]
+// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]]
+// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]]
+// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant
+// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output
+
+float4 MainPs(void) : SV_Target0
+{
+      float4 vTest = g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4;
+      return vTest;
+}
+
+// CHECK: [[FUN]] = OpFunction
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]]
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X3]] Aligned 16
+// CHECK: OpStore [[OUT]] [[X4]]
+// CHECK: OpFunctionEnd
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl
new file mode 100644
index 0000000000..b2efd02cbd
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl
@@ -0,0 +1,52 @@
+// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s
+
+// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]]
+
+struct Globals_s
+{
+      float4 g_vSomeConstantA;
+      float4 g_vTestFloat4;
+      float4 g_vSomeConstantB;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+
+struct TestPushConstant_t
+{
+      Globals_p m_nBufferDeviceAddress;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0
+// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK-DAG: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]]
+// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0
+// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1
+// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]]
+// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]]
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]]
+// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]]
+// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]]
+// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]]
+// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant
+// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output
+
+float4 MainPs(void) : SV_Target0
+{
+      float4 vTest = float4(1.0,0.0,0.0,0.0);
+      g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4 = vTest;
+      return vTest;
+}
+
+// CHECK: [[FUN]] = OpFunction
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]]
+// CHECK: OpStore [[X3]] [[CV4FLOAT]] Aligned 16
+// CHECK: OpStore [[OUT]] [[CV4FLOAT]]
+// CHECK: OpFunctionEnd
diff --git a/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl
new file mode 100644
index 0000000000..77c0f51911
--- /dev/null
+++ b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl
@@ -0,0 +1,21 @@
+// RUN: %dxc /T ps_6_5 -fcgl %s | FileCheck %s
+
+// Compiling this HLSL would trigger an assertion:
+//    While deleting: void (i32, float)* %dx.hl.op..void (i32, float)
+//    Use still stuck around after Def is destroyed:  call void @"dx.hl.op..void (i32, float)"(i32 120, float -1.000000e+00), !dbg <0x503000001cc8>
+//    Error: assert(use_empty() && "Uses remain when a value is destroyed!")
+//    File: <snip>/src/external/DirectXShaderCompiler/lib/IR/Value.cpp(83)
+//
+// Bug was fixed in CodeGenFunction::EmitDiscardStmt by skipping the emission of
+// an unreachable discard.
+
+// CHECK:      define void @main()
+// CHECK:      br label %
+// CHECK-NOT:  call void @"dx.hl.op..void (i32, float)"
+// CHECK:      ret void
+
+void main() {
+  while (true) {
+  }
+  discard;
+}
diff --git a/tools/clang/test/DXILValidation/load-store-validation.hlsl b/tools/clang/test/DXILValidation/load-store-validation.hlsl
new file mode 100644
index 0000000000..d4e5e29db8
--- /dev/null
+++ b/tools/clang/test/DXILValidation/load-store-validation.hlsl
@@ -0,0 +1,74 @@
+// This file is not used directly for testing.
+// This is the HLSL source for validation of various invalid load/store parameters.
+// It is used to generate LitDxilValidation/load-store-validation.ll using `dxc -T ps_6_9`.
+// Output is modified to trigger various validation errors.
+
+Texture1D<float4> Tex;
+RWTexture1D<float4> RwTex;
+SamplerState Samp;
+
+StructuredBuffer<float4> VecBuf;
+StructuredBuffer<float> ScalBuf;
+ByteAddressBuffer BaBuf;
+
+RWStructuredBuffer<float4> OutVecBuf;
+RWStructuredBuffer<float> OutScalBuf;
+RWByteAddressBuffer OutBaBuf;
+
+// Some simple ways to generate the vector ops in question.
+float4 main(int i : IX) : SV_Target {
+  // Texture provides some invalid handles to plug in.
+  float4 TexVal = Tex.Sample(Samp, i);
+  RwTex[0] = TexVal;
+
+  // For invalid RC on Load (and inevitably invalid RK).
+  float BadRCLd = ScalBuf[0];
+  // For invalid RK on Load.
+  float BadRKLd = ScalBuf[1];
+  // For non-constant alignment on Load.
+  float BadAlnLd = ScalBuf[2];
+  // For undefined offset on Structured Buffer Load.
+  float BadStrOffLd = ScalBuf[3];
+  // For defined (and therefore invalid) offset on Byte Address Buffer Load.
+  float BadBabOffLd = BaBuf.Load<float>(0);
+
+  // For invalid RC on Vector Load (and inevitably invalid RK).
+  float4 BadRCVcLd = VecBuf[0];
+  // For invalid RK on Vector Load.
+  float4 BadRKVcLd = VecBuf[1];
+  // For non-constant alignment on Vector Load.
+  float4 BadAlnVcLd = VecBuf[2];
+  // For undefined offset on Structured Buffer Vector Load.
+  float4 BadStrOffVcLd = VecBuf[3];
+  // For defined (and therefore invalid) offset on Byte Address Buffer Vector Load.
+  float4 BadBabOffVcLd = BaBuf.Load<float4>(4);
+
+  // For Store to non-UAV.
+  OutScalBuf[0] = BadRCLd;
+  // For invalid RK on Store.
+  OutScalBuf[1] = BadRKLd;
+  // For non-constant alignment on Store.
+  OutScalBuf[2] = BadAlnLd;
+  // For undefined offset on Structured Buffer Store.
+  OutScalBuf[3] = BadStrOffLd;
+  // For undefined value Store.
+  OutScalBuf[4] = 77;
+  // For defined (and therefore invalid) offset on Byte Address Buffer Store.
+  OutBaBuf.Store<float>(0, BadBabOffLd);
+
+  // For Vector Store to non-UAV.
+  OutVecBuf[0] = BadRCVcLd;
+  // For invalid RK on Vector Store.
+  OutVecBuf[1] = BadRKVcLd;
+  // For non-constant alignment on Vector Store.
+  OutVecBuf[2] = BadAlnVcLd;
+  // For undefined offset on Structured Buffer Vector Store.
+  OutVecBuf[3] = BadStrOffVcLd;
+  // For undefinded value Vector Store.
+  OutVecBuf[4] = 77;
+  // For defined (and therefore invalid) offset on Byte Address Buffer Vector Store.
+  OutBaBuf.Store<float4>(4, BadBabOffVcLd);
+
+  return TexVal;
+}
+
diff --git a/tools/clang/test/DXILValidation/vector-validation.hlsl b/tools/clang/test/DXILValidation/vector-validation.hlsl
index 87f24b2b0b..5d6a5cd4a2 100644
--- a/tools/clang/test/DXILValidation/vector-validation.hlsl
+++ b/tools/clang/test/DXILValidation/vector-validation.hlsl
@@ -1,16 +1,11 @@
-// RUN: %dxc -T vs_6_9 %s -Od | FileCheck %s
-
-// Just HLSL source for validation that vector operations produce errors pre-6.9
-// Output is modified to have 6.8 instead.
-
-struct Vector { int i; float4 f;};
+// This file is not used directly for testing.
+// This is the HLSL source for validation of disallowed 6.9 features in previous shader models.
+// It is used to generate LitDxilValidation/vector-validation.ll using `dxc -T ps_6_9`.
+// Output is modified to have shader model 6.8 instead.
 
 RWStructuredBuffer<float4> VecBuf;
-RWStructuredBuffer<Vector> StrBuf;
-RWStructuredBuffer<float> ScalBuf;
 
 // some simple ways to generate the vector ops in question.
-// CHECK-LABEL: define void @main
 float4 main(float val : VAL) :SV_Position {
   float4 vec = VecBuf[1];
   VecBuf[0] = val;
diff --git a/tools/clang/test/LitDXILValidation/load-store-validation.ll b/tools/clang/test/LitDXILValidation/load-store-validation.ll
new file mode 100644
index 0000000000..34b2f6b602
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/load-store-validation.ll
@@ -0,0 +1,229 @@
+; RUN: not %dxv %s 2>&1 | FileCheck %s
+
+; Ensure proper validation errors are produced for invalid parameters to load and store operations.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResBind = type { i32, i32, i32, i8 }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
+%dx.types.ResRet.v4f32 = type { <4 x float>, i32 }
+%"class.Texture1D<vector<float, 4> >" = type { <4 x float>, %"class.Texture1D<vector<float, 4> >::mips_type" }
+%"class.Texture1D<vector<float, 4> >::mips_type" = type { i32 }
+%"class.StructuredBuffer<vector<float, 4> >" = type { <4 x float> }
+%"class.StructuredBuffer<float>" = type { float }
+%struct.ByteAddressBuffer = type { i32 }
+%"class.RWStructuredBuffer<vector<float, 4> >" = type { <4 x float> }
+%"class.RWStructuredBuffer<float>" = type { float }
+%struct.RWByteAddressBuffer = type { i32 }
+%struct.SamplerState = type { i32 }
+
+; Unfortunately, the validation errors come in weird orders.
+; Inlining them isn't helpful, so we'll just dump them all here.
+; Inline comments, variable names, and notes should help find the corresponding source.
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4)'
+; CHECK: error: Assignment of undefined values to UAV.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4)
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4)
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4)'
+; CHECK: error: store should be on uav resource.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4)'
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at '%badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4)'
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at '%badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4)'
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at '%badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer
+; CHECK-NEXT: note: at '%badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4)'
+; CHECK: error: load can only run on UAV/SRV resource.
+; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)'
+; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)'
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4)'
+; CHECK: error: Assignment of undefined values to UAV.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4)'
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4)'
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4)'
+; CHECK: error: store should be on uav resource.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4)'
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at '%badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4)'
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at '%badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4)'
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at '%badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer
+; CHECK-NEXT: note: at '%badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4)'
+; CHECK: error: load can only run on UAV/SRV resource.
+; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)'
+; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)'
+
+define void @main() {
+bb:
+  %tmp = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false)
+  %tmp1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false)
+  %tmp2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false)
+  %tmp3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false)
+  %tmp4 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false)
+  %tmp5 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)
+  %tmp6 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false)
+  %tmp7 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 3 }, i32 0, i1 false)
+  %tmp8 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 0, i1 false)
+  %ix = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %texIx = sitofp i32 %ix to float
+  %tex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 1, i32 1033 })
+  %samp = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 14, i32 0 })
+  %tmp10 = call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 60, %dx.types.Handle %tex, %dx.types.Handle %samp, float %texIx, float undef, float undef, float undef, i32 0, i32 undef, i32 undef, float undef)
+  %tmp11 = extractvalue %dx.types.ResRet.f32 %tmp10, 0
+  %tmp12 = extractvalue %dx.types.ResRet.f32 %tmp10, 1
+  %tmp13 = extractvalue %dx.types.ResRet.f32 %tmp10, 2
+  %tmp14 = extractvalue %dx.types.ResRet.f32 %tmp10, 3
+  %rwTex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp8, %dx.types.ResourceProperties { i32 4097, i32 1033 })
+  call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %rwTex, i32 0, i32 undef, i32 undef, float %tmp11, float %tmp12, float %tmp13, float %tmp14, i8 15)
+  %scalBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp4, %dx.types.ResourceProperties { i32 12, i32 4 })
+  ; Invalid RC on Load (and inevitably invalid RK).
+  %badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)
+  %badRC = extractvalue %dx.types.ResRet.f32 %badRCLd, 0
+  ; Invalid RK on Load.
+  %badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4)
+  %badRK = extractvalue %dx.types.ResRet.f32 %badRKLd, 0
+  ; Non-constant alignment on Load.
+  %badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix)
+  %badAln = extractvalue %dx.types.ResRet.f32 %badAlnLd, 0
+  ; Undefined offset on Structured Buffer Load.
+  %badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4)
+  %badStrOff = extractvalue %dx.types.ResRet.f32 %badStrOffLd, 0
+  %baBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp3, %dx.types.ResourceProperties { i32 11, i32 0 })
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Load.
+  %badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4)
+  %badBabOff = extractvalue %dx.types.ResRet.f32 %badBabOffLd, 0
+
+  %vecBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp5, %dx.types.ResourceProperties { i32 12, i32 16 })
+  ; Invalid RC on Vector Load (and inevitably invalid RK).
+  %badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)
+  %badRCVc = extractvalue %dx.types.ResRet.v4f32 %badRCVcLd, 0
+  ; Invalid RK on Vector Load.
+  %badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4)
+  %badRKVc = extractvalue %dx.types.ResRet.v4f32 %badRKVcLd, 0
+  ; Non-constant alignment on Vector Load.
+  %badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix)
+  %badAlnVc = extractvalue %dx.types.ResRet.v4f32 %badAlnVcLd, 0
+  ; Undefined offset on Structured Buffer Vector Load.
+  %badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4)
+  %badStrOffVc = extractvalue %dx.types.ResRet.v4f32 %badStrOffVcLd, 0
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Load.
+  %badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4)
+  %badBabOffVc = extractvalue %dx.types.ResRet.v4f32 %badBabOffVcLd, 0
+
+  ; Store to non-UAV.
+  %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4)
+  ; Invalid RK on Store.
+  %tmp39 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4)
+  ; Non-constant alignment on Store.
+  %tmp40 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix)
+  ; Undefined offset on Structured Buffer Store.
+  %tmp41 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4)
+  ; Undefined value Store.
+  %tmp42 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4)
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Store.
+  %tmp44 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4)
+
+  ; Vector Store to non-UAV.
+  %tmp45 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %rwTex, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4)
+  ; Invalid RK on Vector Store.
+  %tmp46 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4)
+  ; Non-constant alignment on Vector Store.
+  %tmp47 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix)
+  ; Undefined offset on Structured Buffer Vector Store.
+  %tmp48 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4)
+  ; Undefinded value Vector Store.
+  %tmp49 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4)
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Store.
+  %tmp51 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4)
+
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %tmp11)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %tmp12)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %tmp13)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %tmp14)
+  ret void
+}
+
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #2
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
+declare %dx.types.ResRet.f32 @dx.op.sample.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float) #1
+declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #0
+declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #0
+declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
+declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #0
+declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #1
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
+declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+
+!dx.version = !{!1}
+!dx.valver = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.viewIdState = !{!18}
+!dx.entryPoints = !{!19}
+
+!1 = !{i32 1, i32 9}
+!2 = !{!"ps", i32 6, i32 9}
+!3 = !{!4, !12, null, !16}
+!4 = !{!5, !7, !9, !11}
+!5 = !{i32 0, %"class.Texture1D<vector<float, 4> >"* undef, !"", i32 0, i32 0, i32 1, i32 1, i32 0, !6}
+!6 = !{i32 0, i32 9}
+!7 = !{i32 1, %"class.StructuredBuffer<vector<float, 4> >"* undef, !"", i32 0, i32 1, i32 1, i32 12, i32 0, !8}
+!8 = !{i32 1, i32 16}
+!9 = !{i32 2, %"class.StructuredBuffer<float>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i32 0, !10}
+!10 = !{i32 1, i32 4}
+!11 = !{i32 3, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 3, i32 1, i32 11, i32 0, null}
+!12 = !{!13, !14, !15}
+!13 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 4> >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !8}
+!14 = !{i32 1, %"class.RWStructuredBuffer<float>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !10}
+!15 = !{i32 2, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 2, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!16 = !{!17}
+!17 = !{i32 0, %struct.SamplerState* undef, !"", i32 0, i32 0, i32 1, i32 0, null}
+!18 = !{[3 x i32] [i32 1, i32 4, i32 0]}
+!19 = !{void ()* @main, !"main", !20, !3, !27}
+!20 = !{!21, !24, null}
+!21 = !{!22}
+!22 = !{i32 0, !"IX", i8 4, i8 0, !23, i8 1, i32 1, i8 1, i32 0, i8 0, null}
+!23 = !{i32 0}
+!24 = !{!25}
+!25 = !{i32 0, !"SV_Target", i8 9, i8 16, !23, i8 0, i32 1, i8 4, i32 0, i8 0, !26}
+!26 = !{i32 3, i32 15}
+!27 = !{i32 0, i64 8589934608}
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl
index 42eb6b077c..54c85191da 100644
--- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl
@@ -3,6 +3,8 @@
 #define TYPE float
 #define NUM 5
 
+StructuredBuffer<vector<TYPE,NUM> > sbuf; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+
 struct LongVec {
   float4 f;
   vector<TYPE,NUM> vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl
new file mode 100644
index 0000000000..722187cf43
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl
@@ -0,0 +1,17 @@
+// RUN: %dxc -T lib_6_3 -validator-version 1.8 -verify %s
+
+// expected-warning@+1{{potential misuse of built-in constant 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}}
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
+
+RaytracingAccelerationStructure RTAS;
+// DXR entry to test that restricted flags are diagnosed.
+[shader("raygeneration")]
+void main(void) {
+	RayDesc rayDesc;
+
+	// expected-warning@+2{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}}
+	// expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}}
+	RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS> rayQuery;
+	// expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}}
+	rayQuery.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc);
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl
index 981788a688..5e484d193e 100644
--- a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl
@@ -1,5 +1,5 @@
-// RUN: %dxc -T vs_6_9 -E RayQueryTests -verify %s
-// RUN: %dxc -T vs_6_5 -E RayQueryTests2 -verify %s
+// RUN: %dxc -T vs_6_9 -verify %s
+// RUN: %dxc -T vs_6_5 -verify %s
 
 // validate 2nd template argument flags
 // expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
diff --git a/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl
new file mode 100644
index 0000000000..272a46a87e
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl
@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_0 -verify %s
+
+// expected-no-diagnostics
+// No diagnostic is expected because this is a non-library target,
+// and SubObjects are ignored on non-library targets.
+
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
+
+[shader("pixel")]
+int main(int i : INDEX) : SV_Target {
+  return 1;
+}
diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp
index bb81c1c953..e337d2951c 100644
--- a/tools/clang/unittests/HLSL/PixTest.cpp
+++ b/tools/clang/unittests/HLSL/PixTest.cpp
@@ -146,12 +146,17 @@ class PixTest : public ::testing::Test {
   TEST_METHOD(RootSignatureUpgrade_Annotation)
 
   TEST_METHOD(DxilPIXDXRInvocationsLog_SanityTest)
+  TEST_METHOD(DxilPIXDXRInvocationsLog_EmbeddedRootSigs)
 
   TEST_METHOD(DebugInstrumentation_TextOutput)
   TEST_METHOD(DebugInstrumentation_BlockReport)
 
   TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs)
 
+  TEST_METHOD(NonUniformResourceIndex_Resource)
+  TEST_METHOD(NonUniformResourceIndex_DescriptorHeap)
+  TEST_METHOD(NonUniformResourceIndex_Raytracing)
+
   dxc::DxcDllSupport m_dllSupport;
   VersionSupportInfo m_ver;
 
@@ -443,6 +448,11 @@ class PixTest : public ::testing::Test {
   std::string RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob);
   CComPtr<IDxcBlob> RunDxilPIXMeshShaderOutputPass(IDxcBlob *blob);
   CComPtr<IDxcBlob> RunDxilPIXDXRInvocationsLog(IDxcBlob *blob);
+  std::vector<std::string>
+  RunDxilNonUniformResourceIndexInstrumentation(IDxcBlob *blob,
+                                                std::string &outputText);
+  void TestNuriCase(const char *source, const wchar_t *target,
+                    uint32_t expectedResult);
   void TestPixUAVCase(char const *hlsl, wchar_t const *model,
                       wchar_t const *entry);
   std::string Disassemble(IDxcBlob *pProgram);
@@ -660,7 +670,7 @@ CComPtr<IDxcBlob> PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) {
   CComPtr<IDxcBlob> pOptimizedModule;
   CComPtr<IDxcBlobEncoding> pText;
   VERIFY_SUCCEEDED(pOptimizer->RunOptimizer(
-      dxil, Options.data(), Options.size(), &pOptimizedModule, &pText));
+      blob, Options.data(), Options.size(), &pOptimizedModule, &pText));
 
   std::string outputText;
   if (pText->GetBufferSize() != 0) {
@@ -670,6 +680,29 @@ CComPtr<IDxcBlob> PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) {
   return pOptimizedModule;
 }
 
+std::vector<std::string> PixTest::RunDxilNonUniformResourceIndexInstrumentation(
+    IDxcBlob *blob, std::string &outputText) {
+
+  CComPtr<IDxcBlob> dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob);
+  CComPtr<IDxcOptimizer> pOptimizer;
+  VERIFY_SUCCEEDED(
+      m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer));
+  std::array<LPCWSTR, 4> Options = {
+      L"-opt-mod-passes", L"-dxil-dbg-value-to-dbg-declare",
+      L"-dxil-annotate-with-virtual-regs",
+      L"-hlsl-dxil-non-uniform-resource-index-instrumentation"};
+
+  CComPtr<IDxcBlob> pOptimizedModule;
+  CComPtr<IDxcBlobEncoding> pText;
+  VERIFY_SUCCEEDED(pOptimizer->RunOptimizer(
+      dxil, Options.data(), Options.size(), &pOptimizedModule, &pText));
+
+  outputText = BlobToUtf8(pText);
+
+  const std::string disassembly = Disassemble(pOptimizedModule);
+  return Tokenize(disassembly, "\n");
+}
+
 std::string
 PixTest::RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob) {
   CComPtr<IDxcBlob> dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob);
@@ -2945,6 +2978,230 @@ void MyMiss(inout MyPayload payload)
   RunDxilPIXDXRInvocationsLog(compiledLib);
 }
 
+TEST_F(PixTest, DxilPIXDXRInvocationsLog_EmbeddedRootSigs) {
+
+  const char *source = R"x(
+
+GlobalRootSignature grs = {"CBV(b0)"};
+struct MyPayload
+{
+    float4 color;
+};
+
+[shader("raygeneration")]
+void MyRayGen()
+{
+}
+
+[shader("closesthit")]
+void MyClosestHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr)
+{
+}
+
+[shader("anyhit")]
+void MyAnyHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr)
+{
+}
+
+[shader("miss")]
+void MyMiss(inout MyPayload payload)
+{
+}
+
+)x";
+
+  auto compiledLib = Compile(m_dllSupport, source, L"lib_6_3",
+                             {L"-Qstrip_reflect"}, L"RootSig");
+  RunDxilPIXDXRInvocationsLog(compiledLib);
+}
+
+uint32_t NuriGetWaveInstructionCount(const std::vector<std::string> &lines) {
+  // This is the instruction we'll insert into the shader if we detect dynamic
+  // resource indexing
+  const char *const waveActiveAllEqual = "call i1 @dx.op.waveActiveAllEqual";
+
+  uint32_t instCount = 0;
+  for (const std::string &line : lines) {
+    instCount += line.find(waveActiveAllEqual) != std::string::npos;
+  }
+  return instCount;
+}
+
+void PixTest::TestNuriCase(const char *source, const wchar_t *target,
+                           uint32_t expectedResult) {
+
+  for (const OptimizationChoice &choice : OptimizationChoices) {
+    const std::vector<LPCWSTR> compilationOptions = {choice.Flag};
+
+    CComPtr<IDxcBlob> compiledLib =
+        Compile(m_dllSupport, source, target, compilationOptions);
+
+    std::string outputText;
+    const std::vector<std::string> dxilLines =
+        RunDxilNonUniformResourceIndexInstrumentation(compiledLib, outputText);
+
+    VERIFY_ARE_EQUAL(NuriGetWaveInstructionCount(dxilLines), expectedResult);
+
+    bool foundDynamicIndexingNoNuri = false;
+    const std::vector<std::string> outputTextLines = Tokenize(outputText, "\n");
+    for (const std::string &line : outputTextLines) {
+      if (line.find("FoundDynamicIndexingNoNuri") != std::string::npos) {
+        foundDynamicIndexingNoNuri = true;
+        break;
+      }
+    }
+
+    VERIFY_ARE_EQUAL((expectedResult != 0), foundDynamicIndexingNoNuri);
+  }
+}
+
+TEST_F(PixTest, NonUniformResourceIndex_Resource) {
+
+  const char *source = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint index = uv.x * uv.y;
+    return tex[index].Load(int3(0, 0, 0));
+})x";
+
+  const char *sourceWithNuri = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint i = uv.x * uv.y;
+    return tex[NonUniformResourceIndex(i)].Load(int3(0, 0, 0));
+})x";
+
+  TestNuriCase(source, L"ps_6_0", 1);
+  TestNuriCase(sourceWithNuri, L"ps_6_0", 0);
+
+  if (m_ver.SkipDxilVersion(1, 6)) {
+    return;
+  }
+
+  TestNuriCase(source, L"ps_6_6", 1);
+  TestNuriCase(sourceWithNuri, L"ps_6_6", 0);
+}
+
+TEST_F(PixTest, NonUniformResourceIndex_DescriptorHeap) {
+
+  if (m_ver.SkipDxilVersion(1, 6)) {
+    return;
+  }
+
+  const char *source = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint i = uv.x + uv.y;
+    Texture2D<float4> dynResTex = 
+        ResourceDescriptorHeap[i];
+    SamplerState dynResSampler = 
+        SamplerDescriptorHeap[i];
+    return dynResTex.Sample(dynResSampler, uv);
+})x";
+
+  const char *sourceWithNuri = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint i = uv.x + uv.y;
+    Texture2D<float4> dynResTex = 
+        ResourceDescriptorHeap[NonUniformResourceIndex(i)];
+    SamplerState dynResSampler = 
+        SamplerDescriptorHeap[NonUniformResourceIndex(i)];
+    return dynResTex.Sample(dynResSampler, uv);
+})x";
+
+  TestNuriCase(source, L"ps_6_6", 2);
+  TestNuriCase(sourceWithNuri, L"ps_6_6", 0);
+}
+
+TEST_F(PixTest, NonUniformResourceIndex_Raytracing) {
+
+  if (m_ver.SkipDxilVersion(1, 5)) {
+    return;
+  }
+
+  const char *source = R"x(
+RWTexture2D<float4> RT[] : register(u0);
+
+[noinline]
+void FuncNoInline(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.x * rayIndex.y;
+    float4 c = float4(0.5, 0.5, 0.5, 0);
+    RT[i][rayIndex.xy] += c;
+}
+
+void Func(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.y;
+    float4 c = float4(0, 1, 0, 0);
+    RT[i][rayIndex.xy] += c;
+}
+
+[shader("raygeneration")]
+void Main()
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+
+    uint i1 = rayIndex.x;
+    float4 c1 = float4(1, 0, 1, 1);
+    RT[i1][rayIndex.xy] += c1;
+
+    uint i2 = rayIndex.x * rayIndex.y * 0.25;
+    float4 c2 = float4(0.25, 0, 0.25, 0);
+    RT[i2][rayIndex.xy] += c2;
+
+    Func(i1);
+    FuncNoInline(i2);
+})x";
+
+  const char *sourceWithNuri = R"x(
+RWTexture2D<float4> RT[] : register(u0);
+
+[noinline]
+void FuncNoInline(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.x * rayIndex.y;
+    float4 c = float4(0.5, 0.5, 0.5, 0);
+    RT[NonUniformResourceIndex(i)][rayIndex.xy] += c;
+}
+
+void Func(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.y;
+    float4 c = float4(0, 1, 0, 0);
+    RT[NonUniformResourceIndex(i)][rayIndex.xy] += c;
+}
+
+[shader("raygeneration")]
+void Main()
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+
+    uint i1 = rayIndex.x;
+    float4 c1 = float4(1, 0, 1, 1);
+    RT[NonUniformResourceIndex(i1)][rayIndex.xy] += c1;
+
+    uint i2 = rayIndex.x * rayIndex.y * 0.25;
+    float4 c2 = float4(0.25, 0, 0.25, 0);
+    RT[NonUniformResourceIndex(i2)][rayIndex.xy] += c2;
+
+    Func(i1);
+    FuncNoInline(i2);
+})x";
+
+  TestNuriCase(source, L"lib_6_5", 4);
+  TestNuriCase(sourceWithNuri, L"lib_6_5", 0);
+}
+
 TEST_F(PixTest, DebugInstrumentation_TextOutput) {
 
   const char *source = R"x(
diff --git a/tools/clang/unittests/HLSL/PixTestUtils.cpp b/tools/clang/unittests/HLSL/PixTestUtils.cpp
index 91b6c4479c..61647ff5fa 100644
--- a/tools/clang/unittests/HLSL/PixTestUtils.cpp
+++ b/tools/clang/unittests/HLSL/PixTestUtils.cpp
@@ -397,7 +397,7 @@ CComPtr<IDxcBlob> Compile(dxc::DxcDllSupport &dllSupport, const char *hlsl,
       CheckOperationSucceeded(pResult, &pProgram);
 
       CComPtr<IDxcLibrary> pLib;
-      VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib));
+      VERIFY_SUCCEEDED(dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib));
       const hlsl::DxilContainerHeader *pContainer = hlsl::IsDxilContainerLike(
           pProgram->GetBufferPointer(), pProgram->GetBufferSize());
       VERIFY_IS_NOT_NULL(pContainer);
diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp
index f69b0be204..01f24e0227 100644
--- a/tools/clang/unittests/HLSL/ValidationTest.cpp
+++ b/tools/clang/unittests/HLSL/ValidationTest.cpp
@@ -1506,21 +1506,23 @@ TEST_F(ValidationTest, StructBufStrideOutOfBound) {
 }
 
 TEST_F(ValidationTest, StructBufLoadCoordinates) {
-  RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
-                          "bufferLoad.f32(i32 68, %dx.types.Handle "
-                          "%buf1_texture_structbuf, i32 1, i32 8)",
-                          "bufferLoad.f32(i32 68, %dx.types.Handle "
-                          "%buf1_texture_structbuf, i32 1, i32 undef)",
-                          "structured buffer require 2 coordinates");
+  RewriteAssemblyCheckMsg(
+      L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
+      "bufferLoad.f32(i32 68, %dx.types.Handle "
+      "%buf1_texture_structbuf, i32 1, i32 8)",
+      "bufferLoad.f32(i32 68, %dx.types.Handle "
+      "%buf1_texture_structbuf, i32 1, i32 undef)",
+      "structured buffer requires defined index and offset coordinates");
 }
 
 TEST_F(ValidationTest, StructBufStoreCoordinates) {
-  RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
-                          "bufferStore.f32(i32 69, %dx.types.Handle "
-                          "%buf2_UAV_structbuf, i32 0, i32 0",
-                          "bufferStore.f32(i32 69, %dx.types.Handle "
-                          "%buf2_UAV_structbuf, i32 0, i32 undef",
-                          "structured buffer require 2 coordinates");
+  RewriteAssemblyCheckMsg(
+      L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
+      "bufferStore.f32(i32 69, %dx.types.Handle "
+      "%buf2_UAV_structbuf, i32 0, i32 0",
+      "bufferStore.f32(i32 69, %dx.types.Handle "
+      "%buf2_UAV_structbuf, i32 0, i32 undef",
+      "structured buffer requires defined index and offset coordinates");
 }
 
 TEST_F(ValidationTest, TypedBufRetType) {
diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
index 0ab6759d95..3aff8bcda8 100644
--- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
+++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
@@ -5632,7 +5632,7 @@ void ExecutionTest::RunBasicShaderModelTest(CComPtr<ID3D12Device> pDevice,
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test is creating the resource to run
+      // this callback is called when the test is creating the resource to run
       // the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         UNREFERENCED_PARAMETER(Name);
@@ -6999,7 +6999,7 @@ TEST_F(ExecutionTest, UnaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp"));
@@ -7067,7 +7067,7 @@ TEST_F(ExecutionTest, BinaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp"));
@@ -7157,7 +7157,7 @@ TEST_F(ExecutionTest, TertiaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp"));
@@ -7234,7 +7234,7 @@ TEST_F(ExecutionTest, UnaryHalfOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp"));
@@ -7314,7 +7314,7 @@ TEST_F(ExecutionTest, BinaryHalfOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp"));
@@ -7424,7 +7424,7 @@ TEST_F(ExecutionTest, TertiaryHalfOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp"));
@@ -7494,7 +7494,7 @@ TEST_F(ExecutionTest, UnaryIntOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp"));
@@ -7554,7 +7554,7 @@ TEST_F(ExecutionTest, UnaryUintOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp"));
@@ -7619,7 +7619,7 @@ TEST_F(ExecutionTest, BinaryIntOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp"));
@@ -7707,7 +7707,7 @@ TEST_F(ExecutionTest, TertiaryIntOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp"));
@@ -7777,7 +7777,7 @@ TEST_F(ExecutionTest, BinaryUintOpTest) {
   int numExpected = Validation_Expected2->size() == 0 ? 1 : 2;
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp"));
@@ -7869,7 +7869,7 @@ TEST_F(ExecutionTest, TertiaryUintOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp"));
@@ -7948,7 +7948,7 @@ TEST_F(ExecutionTest, UnaryInt16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp"));
@@ -8016,7 +8016,7 @@ TEST_F(ExecutionTest, UnaryUint16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp"));
@@ -8091,7 +8091,7 @@ TEST_F(ExecutionTest, BinaryInt16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp"));
@@ -8187,7 +8187,7 @@ TEST_F(ExecutionTest, TertiaryInt16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp"));
@@ -8264,7 +8264,7 @@ TEST_F(ExecutionTest, BinaryUint16OpTest) {
   int numExpected = Validation_Expected2->size() == 0 ? 1 : 2;
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp"));
@@ -8363,7 +8363,7 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp"));
@@ -8948,7 +8948,7 @@ TEST_F(ExecutionTest, DotTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "DotOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SDotOp"));
@@ -9240,7 +9240,7 @@ TEST_F(ExecutionTest, Msad4Test) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "Msad4",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SMsad4"));
@@ -9342,7 +9342,7 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp"));
@@ -9455,7 +9455,7 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp"));
@@ -9883,7 +9883,7 @@ void ExecutionTest::WaveIntrinsicsActivePrefixTest(
          ++maskIndex) {
       std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(
           pDevice, m_support, "WaveIntrinsicsOp",
-          // this callbacked is called when the test
+          // this callback is called when the test
           // is creating the resource to run the test
           [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
             VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp"));
@@ -12612,7 +12612,7 @@ TEST_F(ExecutionTest, HelperLaneTest) {
 
     std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(
         pDevice, m_support, "HelperLaneTestNoWave",
-        // this callbacked is called when the test is creating the resource to
+        // this callback is called when the test is creating the resource to
         // run the test
         [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
           VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0"));
diff --git a/utils/git/requirements_formatting.txt b/utils/git/requirements_formatting.txt
index 06db8176c9..6f3e07dcf2 100644
--- a/utils/git/requirements_formatting.txt
+++ b/utils/git/requirements_formatting.txt
@@ -18,7 +18,7 @@ charset-normalizer==3.2.0
     # via requests
 click==8.1.7
     # via black
-cryptography==43.0.1
+cryptography==44.0.1
     # via pyjwt
 darker==1.7.2
     # via -r llvm/utils/git/requirements_formatting.txt.in
diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt
index 0ca5b0716b..55c3643d95 100644
--- a/utils/hct/gen_intrin_main.txt
+++ b/utils/hct/gen_intrin_main.txt
@@ -1,6 +1,9 @@
 // Copyright (C) Microsoft Corporation. All rights reserved.
 // This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 // See hctdb.py for the implementation of intrinsic file processing.
 //
 // Intrinsic declarations are grouped into namespaces that
@@ -393,7 +396,13 @@ void [[]] RawBufferStore(in u64 addr, in $funcT value);
 void [[]] RawBufferStore(in u64 addr, in $funcT value, in uint alignment);
 void [[]] ext_execution_mode(in uint mode, ...);
 void [[]] ext_execution_mode_id(in uint mode, ...);
+$funcT2 [[]] static_pointer_cast(in VkBufferPointer ptr);
+$funcT2 [[]] reinterpret_pointer_cast(in VkBufferPointer ptr);
+
+} namespace
 
+namespace BufferPointerMethods {
+$classT [[ro]] GetBufferContents();
 } namespace
 // SPIRV Change Ends
 
@@ -1147,4 +1156,3 @@ $classT [[]] SubpassLoad(in int sample) : subpassinputms_load;
 } namespace
 
 // SPIRV Change Ends
-
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 8f6887b5d4..870973d400 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -1,5 +1,7 @@
 # Copyright (C) Microsoft Corporation. All rights reserved.
 # This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details.
+# Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+# All rights reserved.
 ###############################################################################
 # DXIL information.                                                           #
 ###############################################################################
@@ -164,7 +166,7 @@ def process_oload_types(self):
         #     - "hf" means overloads for scalar half and float
         #     - ending with "<" means vector overload supporting the same
         #       components as defined for the scalar overload types.
-        #   - In the second overload dimension "<f":
+        #   - In the second overload dimension "<fd":
         #     - starting with "<" means only vector overloads are supported.
         #     - "fd" means the vector supports float or double components.
         #   - In the third overload dimension "i<1":
@@ -477,7 +479,7 @@ def populate_categories_and_models(self):
             self.name_idx[i].category = "Dot"
         for (
             i
-        ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore".split(
+        ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore,RawBufferVectorLoad,RawBufferVectorStore".split(
             ","
         ):
             self.name_idx[i].category = "Resources"
@@ -604,7 +606,7 @@ def populate_categories_and_models(self):
         for i in "RawBufferLoad,RawBufferStore".split(","):
             self.name_idx[i].shader_model = 6, 2
             self.name_idx[i].shader_model_translated = 6, 0
-        for i in "RawBufferVectorLoad,RawBufferVectorStore,InsertElement,ShuffleVector,ExtractValue".split(","):
+        for i in "RawBufferVectorLoad,RawBufferVectorStore".split(","):
             self.name_idx[i].shader_model = 6, 9
         for i in "DispatchRaysIndex,DispatchRaysDimensions".split(","):
             self.name_idx[i].category = "Ray Dispatch Arguments"
@@ -5849,11 +5851,12 @@ def UFI(name, **mappings):
         next_op_idx += 1
 
         # End of DXIL 1.9 opcodes.
-        self.set_op_count_for_version(1, 9, next_op_idx)
-        assert next_op_idx == 305, (
-            "305 is expected next operation index but encountered %d and thus opcodes are broken"
-            % next_op_idx
-        )
+        # NOTE!! Update and uncomment when DXIL 1.9 opcodes are finalized:
+        # self.set_op_count_for_version(1, 9, next_op_idx)
+        # assert next_op_idx == NNN, (
+        #    "NNN is expected next operation index but encountered %d and thus opcodes are broken"
+        #    % next_op_idx
+        # )
 
         # Set interesting properties.
         self.build_indices()
@@ -6337,6 +6340,12 @@ def add_pass(name, type_name, doc, opts):
             "HLSL DXIL Logs all non-RayGen DXR 1.0 invocations into a UAV",
             [{"n": "maxNumEntriesInLog", "t": "int", "c": 1}],
         )
+        add_pass(
+            "hlsl-dxil-non-uniform-resource-index-instrumentation",
+            "DxilNonUniformResourceIndexInstrumentation",
+            "HLSL DXIL NonUniformResourceIndex instrumentation for PIX",
+            [],
+        )
 
         category_lib = "dxil_gen"
 
@@ -7690,11 +7699,15 @@ def build_valrules(self):
         )
         self.add_valrule(
             "Instr.CoordinateCountForRawTypedBuf",
-            "raw/typed buffer don't need 2 coordinates.",
+            "raw/typed buffer offset must be undef.",
+        )
+        self.add_valrule(
+            "Instr.ConstAlignForRawBuf",
+            "Raw Buffer alignment value must be a constant.",
         )
         self.add_valrule(
             "Instr.CoordinateCountForStructBuf",
-            "structured buffer require 2 coordinates.",
+            "structured buffer requires defined index and offset coordinates.",
         )
         self.add_valrule(
             "Instr.MipLevelForGetDimension",
@@ -8669,6 +8682,7 @@ def __init__(self, intrinsic_defs, opcode_data):
             "GroupNodeOutputRecords": "LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS",
             "ThreadNodeOutputRecords": "LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS",
             "DxHitObject": "LICOMPTYPE_HIT_OBJECT",
+            "VkBufferPointer": "LICOMPTYPE_VK_BUFFER_POINTER",
         }
 
         self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"}
@@ -8730,7 +8744,8 @@ def load_intrinsics(self, intrinsic_defs):
             (?:RW)?(?:Texture\w*|ByteAddressBuffer) |
             acceleration_struct | ray_desc | RayQuery | DxHitObject |
             Node\w* | RWNode\w* | EmptyNode\w* |
-            AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w*
+            AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w* |
+            VkBufferPointer
             $)""",
             flags=re.VERBOSE,
         )
@@ -8782,6 +8797,10 @@ def process_arg(desc, idx, done_args, intrinsic_name):
                 template_id = "-3"
                 component_id = "0"
                 type_name = "void"
+            elif type_name == "$funcT2":
+                template_id = "-4"
+                component_id = "0"
+                type_name = "void"
             elif type_name == "...":
                 assert idx != 0, "'...' can only be used in the parameter list"
                 template_id = "-2"
@@ -8910,6 +8929,8 @@ def do_object(m):
                 template_id = "INTRIN_TEMPLATE_VARARGS"
             elif template_id == "-3":
                 template_id = "INTRIN_TEMPLATE_FROM_FUNCTION"
+            elif template_id == "-4":
+                template_id = "INTRIN_TEMPLATE_FROM_FUNCTION_2"
             if component_id == "-1":
                 component_id = "INTRIN_COMPTYPE_FROM_TYPE_ELT0"
             if component_id == "-2":
diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py
index aeb32d027e..f0d8b0ebae 100644
--- a/utils/hct/hctdb_instrhelp.py
+++ b/utils/hct/hctdb_instrhelp.py
@@ -54,7 +54,6 @@ def get_db_hlsl():
 
 
 def get_max_oload_dims():
-    db = get_db_dxil()
     return f"const unsigned kDxilMaxOloadDims = {dxil_max_overload_dims};"
 
 
@@ -551,9 +550,13 @@ def print_opfunc_props(self):
             vector_masks = []
             if i.num_oloads > 0:
                 for n, o in enumerate(i.oload_types.split(",")):
-                    v = o.split("<")
-                    scalar_masks.append(oload_to_mask(v[0] + "<" if len(v) > 1 else v[0]))
-                    vector_masks.append(oload_to_mask(v[1]) if len(v) > 1 else 0)
+                    if "<" in o:
+                        v = o.split("<")
+                        scalar_masks.append(oload_to_mask(v[0] + "<"))
+                        vector_masks.append(oload_to_mask(v[1]))
+                    else:
+                        scalar_masks.append(oload_to_mask(o))
+                        vector_masks.append(0)
             print(
                 (
                     "  {{  {OC}::{name:24} {quotName:27} {OCC}::{className:25} "
diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json
index 4c85069488..c4527277cd 100644
--- a/utils/hct/hlsl_intrinsic_opcodes.json
+++ b/utils/hct/hlsl_intrinsic_opcodes.json
@@ -1,6 +1,6 @@
 {
   "IntrinsicOpCodes": {
-    "Num_Intrinsics": 360,
+    "Num_Intrinsics": 363,
     "IOP_AcceptHitAndEndSearch": 0,
     "IOP_AddUint64": 1,
     "IOP_AllMemoryBarrier": 2,
@@ -360,6 +360,9 @@
     "MOP_InterlockedUMax": 356,
     "MOP_InterlockedUMin": 357,
     "MOP_DxHitObject_MakeNop": 358,
-    "IOP_DxMaybeReorderThread": 359
+    "IOP_DxMaybeReorderThread": 359,
+    "IOP_Vkreinterpret_pointer_cast": 360,
+    "IOP_Vkstatic_pointer_cast": 361,
+    "MOP_GetBufferContents": 362
   }
 }