From 68a284b49eeb7f38bd1f963da55a7067da40d3de Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 2 Dec 2024 19:14:45 -1000
Subject: [PATCH 01/17] Allow native vectors for LLVM operations

Disables various forms of scalarization and vector elimination to permit
vectors to pass through to final DXIL when used in native LLVM
operations and loading/storing.

Introduces a few vector manipulation llvm instructions to DXIL allowing
for them to appear in output DXIL.

Skips passes for 6.9 that scalarize, convert to arrays, or otherwise eliminate vectors.
This eliminates the element-by-element loading of the vectors
In many cases, this required plumbing the shader model information to
passes that didn't have it before.

Many changes were needed for the MatrixBitcastLower pass related to
linking to avoid converting matrix vectors, but also to perform the
conversion if a shader was compiled for 6.9+, but then linked to a
earlier target.
This now adapts to the linker target to either preserve vectors for 6.9 or arrays for previous versions.
This requires running the DynamicIndexing VectorToArray pass during linking since 6_x and 6_9+ will fail to run this in the initial compile, but will still need to lower vectors to arrays.

Ternary conditional/select operators were element extracted in codegen.
Removing this allows 6.9 to preserve the vectors, but also maintains
behavior for previous shader models because the operations get
scalarized later anyway.

Keep groupshared variables as vectors for 6.9. They are no longer represented as indivual groupshared scalars.

Adds extensive tests for these operations using different types and
sizes and testing them appropriately. Booleans produce significantly
different code, so they get their own test.

Fixes #7123
---
 include/dxc/DXIL/DxilInstructions.h           |  36 ++
 lib/DxilValidation/DxilValidation.cpp         |   2 +
 lib/HLSL/DxilLinker.cpp                       |   6 +
 lib/HLSL/HLMatrixBitcastLowerPass.cpp         |  44 +-
 lib/Transforms/Scalar/DxilEliminateVector.cpp |   6 +
 lib/Transforms/Scalar/LowerTypePasses.cpp     |  18 +-
 .../Scalar/ScalarReplAggregatesHLSL.cpp       |  18 +-
 lib/Transforms/Scalar/Scalarizer.cpp          |   6 +
 tools/clang/lib/CodeGen/CGExprScalar.cpp      |  15 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             |   8 +-
 .../hlsl/types/longvec-operators-bool.hlsl    | 463 +++++++++++++++++
 .../hlsl/types/longvec-operators-int.hlsl     |  58 +++
 .../hlsl/types/longvec-operators.hlsl         | 491 ++++++++++++++++++
 tools/clang/unittests/HLSL/LinkerTest.cpp     |   5 +
 utils/hct/hctdb.py                            |  31 ++
 15 files changed, 1165 insertions(+), 42 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl

diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
index f5d8759db7..ef6483bba2 100644
--- a/include/dxc/DXIL/DxilInstructions.h
+++ b/include/dxc/DXIL/DxilInstructions.h
@@ -645,6 +645,42 @@ struct LlvmInst_VAArg {
   bool isAllowed() const { return false; }
 };
 
+/// This instruction extracts from vector
+struct LlvmInst_ExtractElement {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_ExtractElement(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::ExtractElement;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
+/// This instruction inserts into vector
+struct LlvmInst_InsertElement {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_InsertElement(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::InsertElement;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
+/// This instruction Shuffle two vectors
+struct LlvmInst_ShuffleVector {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_ShuffleVector(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::ShuffleVector;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
 /// This instruction extracts from aggregate
 struct LlvmInst_ExtractValue {
   llvm::Instruction *Instr;
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index 957afb943b..f89f6fd474 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -2158,6 +2158,8 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
     return true;
 
   if (Ty->isVectorTy()) {
+    if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
+      return true;
     ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoVector);
     return false;
   }
diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp
index 68c83fc037..ca343662ab 100644
--- a/lib/HLSL/DxilLinker.cpp
+++ b/lib/HLSL/DxilLinker.cpp
@@ -1255,6 +1255,12 @@ void DxilLinkJob::RunPreparePass(Module &M) {
   // For static global handle.
   PM.add(createLowerStaticGlobalIntoAlloca());
 
+  // Change dynamic indexing vector to array where vectors aren't
+  // supported, but might be there from the initial compile.
+  if (!pSM->IsSM69Plus())
+    PM.add(
+        createDynamicIndexingVectorToArrayPass(false /* ReplaceAllVector */));
+
   // Remove MultiDimArray from function call arg.
   PM.add(createMultiDimArrayToOneDimArrayPass());
 
diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
index 93ba3b9816..b708293fca 100644
--- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp
+++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
@@ -113,13 +113,13 @@ class MatrixBitcastLowerPass : public FunctionPass {
 
     // Lower matrix first.
     for (BitCastInst *BCI : matCastSet) {
-      lowerMatrix(BCI, BCI->getOperand(0));
+      lowerMatrix(DM, BCI, BCI->getOperand(0));
     }
     return bUpdated;
   }
 
 private:
-  void lowerMatrix(Instruction *M, Value *A);
+  void lowerMatrix(DxilModule &DM, Instruction *M, Value *A);
   bool hasCallUser(Instruction *M);
 };
 
@@ -180,7 +180,8 @@ Value *CreateEltGEP(Value *A, unsigned i, Value *zeroIdx,
 }
 } // namespace
 
-void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
+void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
+                                         Value *A) {
   for (auto it = M->user_begin(); it != M->user_end();) {
     User *U = *(it++);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
@@ -193,31 +194,42 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
         SmallVector<Value *, 2> idxList(GEP->idx_begin(), GEP->idx_end());
         DXASSERT(idxList.size() == 2,
                  "else not one dim matrix array index to matrix");
-
-        HLMatrixType MatTy = HLMatrixType::cast(EltTy);
-        Value *matSize = Builder.getInt32(MatTy.getNumElements());
-        idxList.back() = Builder.CreateMul(idxList.back(), matSize);
+        if (!DM.GetShaderModel()->IsSM69Plus()) {
+          HLMatrixType MatTy = HLMatrixType::cast(EltTy);
+          Value *matSize = Builder.getInt32(MatTy.getNumElements());
+          idxList.back() = Builder.CreateMul(idxList.back(), matSize);
+        }
         Value *NewGEP = Builder.CreateGEP(A, idxList);
-        lowerMatrix(GEP, NewGEP);
+        lowerMatrix(DM, GEP, NewGEP);
         DXASSERT(GEP->user_empty(), "else lower matrix fail");
         GEP->eraseFromParent();
       } else {
         DXASSERT(0, "invalid GEP for matrix");
       }
     } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
-      lowerMatrix(BCI, A);
+      lowerMatrix(DM, BCI, A);
       DXASSERT(BCI->user_empty(), "else lower matrix fail");
       BCI->eraseFromParent();
     } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       if (VectorType *Ty = dyn_cast<VectorType>(LI->getType())) {
         IRBuilder<> Builder(LI);
-        Value *zeroIdx = Builder.getInt32(0);
-        unsigned vecSize = Ty->getNumElements();
-        Value *NewVec = UndefValue::get(LI->getType());
-        for (unsigned i = 0; i < vecSize; i++) {
-          Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
-          Value *Elt = Builder.CreateLoad(GEP);
-          NewVec = Builder.CreateInsertElement(NewVec, Elt, i);
+        Value *NewVec = nullptr;
+        if (DM.GetShaderModel()->IsSM69Plus()) {
+          // Just create a replacement load using the vector pointer.
+          Instruction *NewLI = LI->clone();
+          unsigned VecIdx = NewLI->getNumOperands() - 1;
+          NewLI->setOperand(VecIdx, A);
+          Builder.Insert(NewLI);
+          NewVec = NewLI;
+        } else {
+          Value *zeroIdx = Builder.getInt32(0);
+          unsigned vecSize = Ty->getNumElements();
+          NewVec = UndefValue::get(LI->getType());
+          for (unsigned i = 0; i < vecSize; i++) {
+            Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
+            Value *Elt = Builder.CreateLoad(GEP);
+            NewVec = Builder.CreateInsertElement(NewVec, Elt, i);
+          }
         }
         LI->replaceAllUsesWith(NewVec);
         LI->eraseFromParent();
diff --git a/lib/Transforms/Scalar/DxilEliminateVector.cpp b/lib/Transforms/Scalar/DxilEliminateVector.cpp
index 366f011dae..bb9cf43594 100644
--- a/lib/Transforms/Scalar/DxilEliminateVector.cpp
+++ b/lib/Transforms/Scalar/DxilEliminateVector.cpp
@@ -10,6 +10,8 @@
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
+#include "dxc/DXIL/DxilModule.h"
+
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
@@ -151,6 +153,10 @@ bool DxilEliminateVector::TryRewriteDebugInfoForVector(InsertElementInst *IE) {
 
 bool DxilEliminateVector::runOnFunction(Function &F) {
 
+  if (F.getParent()->HasDxilModule())
+    if (F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus())
+      return false;
+
   auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
 
diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp
index feeb23a5da..6d6b93f951 100644
--- a/lib/Transforms/Scalar/LowerTypePasses.cpp
+++ b/lib/Transforms/Scalar/LowerTypePasses.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "dxc/DXIL/DxilConstants.h"
+#include "dxc/DXIL/DxilModule.h"
 #include "dxc/DXIL/DxilOperations.h"
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/HLSL/HLModule.h"
@@ -180,10 +181,12 @@ bool LowerTypePass::runOnModule(Module &M) {
 namespace {
 class DynamicIndexingVectorToArray : public LowerTypePass {
   bool ReplaceAllVectors;
+  bool SupportsVectors;
 
 public:
   explicit DynamicIndexingVectorToArray(bool ReplaceAll = false)
-      : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll) {}
+      : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll),
+        SupportsVectors(false) {}
   static char ID; // Pass identification, replacement for typeid
   void applyOptions(PassOptions O) override;
   void dumpConfig(raw_ostream &OS) override;
@@ -194,6 +197,7 @@ class DynamicIndexingVectorToArray : public LowerTypePass {
   Type *lowerType(Type *Ty) override;
   Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
   StringRef getGlobalPrefix() override { return ".v"; }
+  void initialize(Module &M) override;
 
 private:
   bool HasVectorDynamicIndexing(Value *V);
@@ -207,6 +211,11 @@ class DynamicIndexingVectorToArray : public LowerTypePass {
   void ReplaceAddrSpaceCast(ConstantExpr *CE, Value *A, IRBuilder<> &Builder);
 };
 
+void DynamicIndexingVectorToArray::initialize(Module &M) {
+  if (M.HasHLModule())
+    SupportsVectors = M.GetHLModule().GetShaderModel()->IsSM69Plus();
+}
+
 void DynamicIndexingVectorToArray::applyOptions(PassOptions O) {
   GetPassOptionBool(O, "ReplaceAllVectors", &ReplaceAllVectors,
                     ReplaceAllVectors);
@@ -286,7 +295,7 @@ void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) {
             StoreInst *stInst = cast<StoreInst>(GEPUser);
             Value *val = stInst->getValueOperand();
             Value *ldVal = Builder.CreateLoad(V);
-            ldVal = Builder.CreateInsertElement(ldVal, val, constIdx);
+            ldVal = Builder.CreateInsertElement(ldVal, val, constIdx); // UGH
             Builder.CreateStore(ldVal, V);
             stInst->eraseFromParent();
           }
@@ -306,8 +315,11 @@ void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) {
 }
 
 bool DynamicIndexingVectorToArray::needToLower(Value *V) {
+  // Only needed where vectors aren't supported.
+  if (SupportsVectors)
+    return false;
   Type *Ty = V->getType()->getPointerElementType();
-  if (dyn_cast<VectorType>(Ty)) {
+  if (isa<VectorType>(Ty)) {
     if (isa<GlobalVariable>(V) || ReplaceAllVectors) {
       return true;
     }
diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
index 0c3e13f608..6737c9100e 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
@@ -1869,7 +1869,8 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
       // if
       // all its users can be transformed, then split up the aggregate into its
       // separate elements.
-      if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
+      if (!HLM.GetShaderModel()->IsSM69Plus() && ShouldAttemptScalarRepl(AI) &&
+          isSafeAllocaToScalarRepl(AI)) {
         std::vector<Value *> Elts;
         IRBuilder<> Builder(dxilutil::FindAllocaInsertionPt(AI));
         bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
@@ -1945,8 +1946,9 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         continue;
       }
 
-      // Flat Global vector if no dynamic vector indexing.
-      bool bFlatVector = !hasDynamicVectorIndexing(GV);
+      // Flat Global vector if no dynamic vector indexing and pre-6.9.
+      bool bFlatVector =
+          !hasDynamicVectorIndexing(GV) && !HLM.GetShaderModel()->IsSM69Plus();
 
       if (bFlatVector) {
         GVDbgOffset &dbgOffset = GVDbgOffsetMap[GV];
@@ -1980,10 +1982,12 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
       } else {
         // SROA_Parameter_HLSL has no access to a domtree, if one is needed,
         // it'll be generated
-        SROAed = SROA_Helper::DoScalarReplacement(
-            GV, Elts, Builder, bFlatVector,
-            // TODO: set precise.
-            /*hasPrecise*/ false, typeSys, DL, DeadInsts, /*DT*/ nullptr);
+        if (!HLM.GetShaderModel()->IsSM69Plus()) {
+          SROAed = SROA_Helper::DoScalarReplacement(
+              GV, Elts, Builder, bFlatVector,
+              // TODO: set precise.
+              /*hasPrecise*/ false, typeSys, DL, DeadInsts, /*DT*/ nullptr);
+        }
       }
 
       if (SROAed) {
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 729771c7c7..1b07d5f14f 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,6 +14,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "dxc/DXIL/DxilModule.h"
+
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
@@ -290,6 +292,10 @@ bool Scalarizer::doInitialization(Module &M) {
 }
 
 bool Scalarizer::runOnFunction(Function &F) {
+  if (F.getParent()->HasDxilModule())
+    if (F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus())
+      return false;
+
   for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
     BasicBlock *BB = BBI;
     for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
diff --git a/tools/clang/lib/CodeGen/CGExprScalar.cpp b/tools/clang/lib/CodeGen/CGExprScalar.cpp
index 0cb993e6f4..530c791fcc 100644
--- a/tools/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/tools/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3713,20 +3713,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
       llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
       llvm::Value *LHS = Visit(lhsExpr);
       llvm::Value *RHS = Visit(rhsExpr);
-      if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(CondV->getType())) {
-        llvm::VectorType *ResultVT = cast<llvm::VectorType>(LHS->getType());
-        llvm::Value *result = llvm::UndefValue::get(ResultVT);
-        for (unsigned i = 0; i < VT->getNumElements(); i++) {
-          llvm::Value *EltCond = Builder.CreateExtractElement(CondV, i);
-          llvm::Value *EltL = Builder.CreateExtractElement(LHS, i);
-          llvm::Value *EltR = Builder.CreateExtractElement(RHS, i);
-          llvm::Value *EltSelect = Builder.CreateSelect(EltCond, EltL, EltR);
-          result = Builder.CreateInsertElement(result, EltSelect, i);
-        }
-        return result;
-      } else {
-        return Builder.CreateSelect(CondV, LHS, RHS);
-      }
+      return Builder.CreateSelect(CondV, LHS, RHS);
     }
     if (hlsl::IsHLSLMatType(E->getType())) {
       llvm::Value *Cond = CGF.EmitScalarExpr(condExpr);
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index c5a30e00fa..6c602c9864 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -6487,6 +6487,9 @@ bool HLSLExternalSource::MatchArguments(
     }
   }
 
+  std::string profile = m_sema->getLangOpts().HLSLProfile;
+  const ShaderModel *SM = hlsl::ShaderModel::GetByName(profile.c_str());
+
   // Populate argTypes.
   for (size_t i = 0; i <= Args.size(); i++) {
     const HLSL_INTRINSIC_ARGUMENT *pArgument = &pIntrinsic->pArgs[i];
@@ -6657,8 +6660,9 @@ bool HLSLExternalSource::MatchArguments(
       }
 
       // Verify that the final results are in bounds.
-      CAB(uCols > 0 && uCols <= MaxVectorSize && uRows > 0 &&
-              uRows <= MaxVectorSize,
+      CAB((uCols > 0 && uRows > 0 &&
+           ((uCols <= MaxVectorSize && uRows <= MaxVectorSize) ||
+            (SM->IsSM69Plus() && uRows == 1))),
           i);
 
       // Const
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
new file mode 100644
index 0000000000..bb2cae6756
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
@@ -0,0 +1,463 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=2 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=5 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=3 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=9 %s | FileCheck %s
+
+// Test relevant operators on an assortment bool vector sizes with 6.9 native vectors.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Uses non vector buffer to avoid interacting with that implementation.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]]
+RWStructuredBuffer< bool > buf;
+
+groupshared vector<bool, NUM> gs_vec1, gs_vec2;
+groupshared vector<bool, NUM+1> gs_vec3;
+
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout vector<bool, NUM> things[10], bool scales[10]) {
+
+  // Another trick to capture the size.
+  // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]]
+  // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.i32 [[res]], 0
+  // CHECK: [[bscl:%[0-9]*]] = icmp ne i32 [[scl]], 0
+  bool scalar = buf.Load(NUM);
+
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add9]]
+  // CHECK: [[bvec9:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec9]], zeroinitializer
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec9]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  things[0] = things[9];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i1> undef, i1 [[bscl]], i32 0
+  // CHECK: [[res:%[0-9]*]] = shufflevector <[[NUM]] x i1> [[spt]], <[[NUM]] x i1> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[res]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  things[5] = scalar;
+
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export vector<bool, NUM> arithmetic(inout vector<bool, NUM> things[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]]
+
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[svec0:%[0-9]*]] = sext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  // CHECK: [[bsvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[svec0]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bsvec0]] to <[[NUM]] x i32>
+  res[0] = -things[0];
+
+  // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  res[1] = +things[0];
+
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32>
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32>
+  // CHECK: [[res2:%[0-9]*]] = add nuw nsw <[[NUM]] x i32> [[vec2]], [[vec1]]
+  // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer
+  // CHECK: [[res2:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32>
+  // CHECK: [[res3:%[0-9]*]] = sub nsw <[[NUM]] x i32> [[vec2]], [[vec3]]
+  // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer
+  // CHECK: [[res3:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32>
+  // CHECK: [[res4:%[0-9]*]] = mul nuw nsw <[[NUM]] x i32> [[vec4]], [[vec3]]
+  // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer
+  // CHECK: [[res4:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32>
+  // CHECK: [[res5:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec4]], [[vec5]]
+  // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer
+  // CHECK: [[res5:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  res[5] = things[4] / things[5];
+
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32>
+  // CHECK: [[res6:%[0-9]*]] = {{[ufs]?rem( fast)?}} <[[NUM]] x i32> [[vec5]], [[vec6]]
+  res[6] = things[5] % things[6];
+
+  // Stores into res[]. Previous were for things[] inout.
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+// CHECK-LABEL: define void @"\01?scarithmetic
+export vector<bool, NUM> scarithmetic(inout vector<bool, NUM> things[10], bool scales[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]]
+
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 0
+  // CHECK: [[scl0:%[0-9]*]] = load i32, i32* [[add0]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl0]], i32 0
+  // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = add <[[NUM]] x i32> [[spt0]], [[vec0]]
+  // CHECK: [[bres0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res0]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32>
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32>
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 1
+  // CHECK: [[scl1:%[0-9]*]] = load i32, i32* [[add1]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = sub <[[NUM]] x i32> [[vec1]], [[spt1]]
+  // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = things[1] - scales[1];
+
+
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32>
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 2
+  // CHECK: [[scl2:%[0-9]*]] = load i32, i32* [[add2]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt2]], [[vec2]]
+  // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32>
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 3
+  // CHECK: [[scl3:%[0-9]*]] = load i32, i32* [[add3]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec3]], [[spt3]]
+  // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 4
+  // CHECK: [[scl4:%[0-9]*]] = load i32, i32* [[add4]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32>
+  // CHECK: [[res4:%[0-9]*]] = add <[[NUM]] x i32> [[spt4]], [[vec4]]
+  // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 5
+  // CHECK: [[scl5:%[0-9]*]] = load i32, i32* [[add5]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl5]], i32 0
+  // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32>
+  // CHECK: [[res5:%[0-9]*]] = sub <[[NUM]] x i32> [[spt5]], [[vec5]]
+  // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 6
+  // CHECK: [[scl6:%[0-9]*]] = load i32, i32* [[add6]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl6]], i32 0
+  // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32>
+  // CHECK: [[res6:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt6]], [[vec6]]
+  // CHECK: [[bres6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res6]], zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  res[6] = scales[6] * things[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<bool, NUM> consequences[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32>
+  res[0] = !truth[0];
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]]
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // MORE STUFF
+
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[bres4:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[bvec0]], [[bvec1]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bres5:%[0-9]*]] = icmp {{u?}}ne <[[NUM]] x i1> [[bvec1]], [[bvec2]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[bres6:%[0-9]*]] = icmp {{[osu]?}}lt <[[NUM]] x i1> [[bvec2]], [[bvec3]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[bres7:%[0-9]*]] = icmp {{[osu]]?}}gt <[[NUM]] x i1> [[bvec3]], [[bvec4]]
+  // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32>
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[bres8:%[0-9]*]] = icmp {{[osu]]?}}le <[[NUM]] x i1> [[bvec4]], [[bvec5]]
+  // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32>
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]]
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[bres9:%[0-9]*]] = icmp {{[osu]?}}ge <[[NUM]] x i1> [[bvec5]], [[bvec6]]
+  // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32>
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]]
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]]
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]]
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9
+  // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]]
+  // CHECK: ret void
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export vector<bool, NUM> index(vector<bool, NUM> things[10], int i, bool val)[10] {
+  vector<bool, NUM> res[10];
+
+  // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x i32>]
+  // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> zeroinitializer, <[[NUM]] x i32>* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 %i
+  // CHECK: store <[[NUM]] x i32> <i32 1{{.*}}>, <[[NUM]] x i32>* [[resi]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> <i32 1{{.*}}>, <[[NUM]] x i32>* [[res2]]
+  res[Ix] = true;
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[bthg0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg0]], zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 3
+  // CHECK: [[thg0:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg0]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[thg0]], <[[NUM]] x i32>* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 %i
+  // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[addi]]
+  // CHECK: [[bthgi:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thgi]], zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 4
+  // CHECK: [[thgi:%[0-9]*]] = zext <[[NUM]] x i1> [[bthgi]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[thgi]], <[[NUM]] x i32>* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bthg2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg2]], zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 5
+  // CHECK: [[thg2:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg2]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[thg2]], <[[NUM]] x i32>* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+
+}
+
+// Test bit twiddling operators.
+// CHECK-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout vector<bool, NUM> things[10]) {
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32>
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32>
+  // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]]
+  // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec4]], [[bvec3]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32>
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32>
+  // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[bres4:%[0-9]*]] = or <[[NUM]] x i1> [[bvec6]], [[bvec4]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  things[4] |= things[6];
+
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[bvec7:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec7]], zeroinitializer
+  // CHECK: [[bres5:%[0-9]*]] = and <[[NUM]] x i1> [[bvec7]], [[bvec5]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+  things[5] &= things[7];
+
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[bvec8:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec8]], zeroinitializer
+  // CHECK: [[bres6:%[0-9]*]] = xor <[[NUM]] x i1> [[bvec6]], [[bvec8]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  things[6] ^= things[8];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
new file mode 100644
index 0000000000..8c07f40af7
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
@@ -0,0 +1,58 @@
+// RUN: %dxc -HV 2018 -T lib_6_9   -DTYPE=uint     -DNUM=5 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9   -DTYPE=int64_t  -DNUM=3 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9   -DTYPE=uint16_t -DNUM=9 -enable-16bit-types %s | FileCheck %s
+
+// Test bitwise operators on an assortment vector sizes and integer types with 6.9 native vectors.
+
+// Test bit twiddling operators.
+// CHECK-LABEL: define void @"\01?bittwiddlers
+// CHECK-SAME: ([10 x <[[NUM:[0-9][0-9]*]] x [[TYPE:[a-z0-9]*]]>]*
+export void bittwiddlers(inout vector<TYPE, NUM> things[10]) {
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1,
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  things[0] = ~things[1];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[res4:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec6]], [[vec4]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  things[4] |= things[6];
+
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[res5:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]], [[vec5]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+  things[5] &= things[7];
+
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[res6:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec6]], [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  things[6] ^= things[8];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
new file mode 100644
index 0000000000..b617bf15b1
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
@@ -0,0 +1,491 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=8 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=9 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=10 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=11 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=12 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=13 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=14 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=15 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=16 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=18 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=128 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Less exhaustive testing for some other types.
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int      -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint     -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double   -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DNUM=9 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t   -DNUM=177 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Uses non vector buffer to avoid interacting with that implementation.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]]
+
+RWStructuredBuffer< TYPE > buf;
+
+export void assignments(inout vector<TYPE, NUM> things[10], TYPE scales[10]);
+export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11];
+export vector<TYPE, NUM> scarithmetic(inout vector<TYPE, NUM> things[10], TYPE scales[10])[10];
+export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[10])[10];
+export vector<TYPE, NUM> index(vector<TYPE, NUM> things[10], int i, TYPE val)[10];
+
+struct Interface {
+  vector<TYPE, NUM> assigned[10];
+  vector<TYPE, NUM> arithmeticked[11];
+  vector<TYPE, NUM> scarithmeticked[10];
+  vector<bool, NUM> logicked[10];
+  vector<TYPE, NUM> indexed[10];
+  TYPE scales[10];
+};
+
+#if 0
+// Requires vector loading support. Enable when available.
+RWStructuredBuffer<Interface> Input;
+RWStructuredBuffer<Interface> Output;
+
+TYPE g_val;
+
+[shader("compute")]
+[numthreads(8,1,1)]
+void main(uint GI : SV_GroupIndex) {
+  assignments(Output[GI].assigned, Input[GI].scales);
+  Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked);
+  Output[GI].scarithmeticked = scarithmetic(Input[GI].scarithmeticked, Input[GI].scales);
+  Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned);
+  Output[GI].indexed = index(Input[GI].indexed, GI, g_val);
+}
+#endif
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout vector<TYPE, NUM> things[10], TYPE scales[10]) {
+
+  // Another trick to capture the size.
+  // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]]
+  // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.[[TY]] [[res]], 0
+  TYPE scalar = buf.Load(NUM);
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl]], i32 0
+  // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  things[0] = scalar;
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  things[1] += things[5];
+
+   // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  things[2] -= things[6];
+
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  things[3] *= things[7];
+
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  things[4] /= things[8];
+
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]]
+#ifdef DBL
+  // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float>
+  // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
+  // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]]
+  // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double>
+  vector<float,NUM> f9 = things[9];
+  vector<float,NUM> f5 = things[5];
+  f5 %= f9;
+  things[5] = f5;
+#else
+  // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]]
+  things[5] %= things[9];
+#endif
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1
+  // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  things[6] += scales[1];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2
+  // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]]
+  things[7] -= scales[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3
+  // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]]
+  things[8] *= scales[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4
+  // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]]
+  things[9] /= scales[4];
+
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11] {
+  vector<TYPE, NUM> res[11];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[res1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]>
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  res[5] = things[4] / things[5];
+
+  // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+#ifdef DBL
+  // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float>
+  // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]]
+  // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double>
+  res[6] = (vector<float,NUM>)things[5] % (vector<float,NUM>)things[6];
+#else
+  // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]]
+  // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add10]]
+  // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]]
+  res[10] = --things[10];
+
+  // Stores into res[]. Previous were for things[] inout.
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  // These two were post ops, so the original value goes into res[].
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 7
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 8
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 9
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]]
+  // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 10
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+// CHECK-LABEL: define void @"\01?scarithmetic
+export vector<TYPE, NUM> scarithmetic(inout vector<TYPE, NUM> things[10], TYPE scales[10])[10] {
+  vector<TYPE, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 0
+  // CHECK: [[scl0:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add0]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0
+  // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]]
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1
+  // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]]
+  res[1] = things[1] - scales[1];
+
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2
+  // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]]
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3
+  // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]]
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4
+  // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]]
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 5
+  // CHECK: [[scl5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add5]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0
+  // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]]
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 6
+  // CHECK: [[scl6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add6]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0
+  // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]]
+  res[6] = scales[6] * things[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32>
+  res[0] = !truth[0];
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]]
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+
+  // NOT RIGHT STUFF.. Select is still extracting everything, slows WAY down with over 100 elements
+
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32>
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32>
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32>
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32>
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32>
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32>
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]]
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]]
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]]
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9
+  // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]]
+  // CHECK: ret void
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export vector<TYPE, NUM> index(vector<TYPE, NUM> things[10], int i, TYPE val)[10] {
+  vector<TYPE, NUM> res[10];
+
+  // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x [[TYPE]]>]
+  // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 %i
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|0xH3C00).*}}>, <[[NUM]] x [[TYPE]]>* [[resi]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|0xH4000).*}}>, <[[NUM]] x [[TYPE]]>* [[res2]]
+  res[Ix] = 2;
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 %i
+  // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[addi]]
+  // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
diff --git a/tools/clang/unittests/HLSL/LinkerTest.cpp b/tools/clang/unittests/HLSL/LinkerTest.cpp
index 7cafa0db06..df8bb644e1 100644
--- a/tools/clang/unittests/HLSL/LinkerTest.cpp
+++ b/tools/clang/unittests/HLSL/LinkerTest.cpp
@@ -526,6 +526,11 @@ TEST_F(LinkerTest, RunLinkMatArrayParam) {
   Link(L"main", L"ps_6_0", pLinker, {libName, libName2},
        {"alloca [24 x float]", "getelementptr [12 x float], [12 x float]*"},
        {});
+
+  Link(L"main", L"ps_6_9", pLinker, {libName, libName2},
+       {"alloca [2 x <12 x float>]",
+        "getelementptr [12 x float], [12 x float]*"},
+       {});
 }
 
 TEST_F(LinkerTest, RunLinkMatParam) {
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 2f632aceee..9e451a51c4 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -1172,6 +1172,37 @@ def populate_llvm_instructions(self):
         self.add_llvm_instr(
             "OTHER", 53, "VAArg", "VAArgInst", "vaarg instruction", "", []
         )
+
+        self.add_llvm_instr(
+            "OTHER",
+            54,
+            "ExtractElement",
+            "ExtractElementInst",
+            "extracts from vector",
+            "",
+            [],
+        )
+
+        self.add_llvm_instr(
+            "OTHER",
+            55,
+            "InsertElement",
+            "InsertElementInst",
+            "inserts into vector",
+            "",
+            [],
+        )
+
+        self.add_llvm_instr(
+            "OTHER",
+            56,
+            "ShuffleVector",
+            "ShuffleVectorInst",
+            "Shuffle two vectors",
+            "",
+            [],
+        )
+
         self.add_llvm_instr(
             "OTHER",
             57,

From 556f6e6d07ef9a4fe99f93830ae9765fb0f04756 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 18 Feb 2025 11:17:17 -0700
Subject: [PATCH 02/17] fix assert for tesselation patch template args

This got lost somewhere
---
 tools/clang/lib/Sema/SemaHLSL.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 6c602c9864..61571026a2 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -5185,8 +5185,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       }
       return false;
     } else if (Template->getTemplatedDecl()->hasAttr<HLSLTessPatchAttr>()) {
-      DXASSERT(TemplateArgList.size() == 1,
-               "Tessellation patch has more than one template arg");
+      DXASSERT(TemplateArgList.size() > 0,
+               "Tessellation patch should have at least one template args");
       const TemplateArgumentLoc &argLoc = TemplateArgList[0];
       const TemplateArgument &arg = argLoc.getArgument();
       DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "");

From 248fe805c02fef67cc184cc25de3677ca6331db1 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Wed, 12 Mar 2025 00:46:56 -0600
Subject: [PATCH 03/17] Scalarization pass enable improvements

Determine native vector support at construction. In some passes, this is
complicated because they can ben invoked from different environments
where different types of modules or information to create modules may or
may not be available. This required making the module building tolerate
modules without resources as it does when called from a dxilmodule
environment.

Re-enables lowering of single element vectors as they are not intended to pass through as native vectors.

Remove presently unneeded changes to EliminateVector pass.

Fix a few flaws in testing

fixes to bitcastlowerpass to support stores and correctly retrieve the
shader model information.
---
 lib/DxilValidation/DxilValidation.cpp         |   3 +-
 lib/HLSL/HLMatrixBitcastLowerPass.cpp         |  50 +-
 lib/HLSL/HLModule.cpp                         |   3 +
 lib/Transforms/Scalar/DxilEliminateVector.cpp |   4 -
 lib/Transforms/Scalar/LowerTypePasses.cpp     |  39 +-
 .../Scalar/ScalarReplAggregatesHLSL.cpp       |  40 +-
 lib/Transforms/Scalar/Scalarizer.cpp          |  36 +-
 .../hlsl/types/longvec-operators-bool.hlsl    |   1 +
 .../hlsl/types/longvec-operators-int.hlsl     |  89 +-
 .../hlsl/types/longvec-operators-scalars.hlsl | 342 ++++++++
 .../hlsl/types/longvec-operators-vec1s.hlsl   | 460 ++++++++++
 .../hlsl/types/longvec-operators.hlsl         |  15 +-
 .../passes/longvec-alloca-gv-dynvec2array.ll  | 304 +++++++
 .../passes/longvec-alloca-gv-sroa.ll          | 328 +++++++
 .../CodeGenDXIL/passes/longvec-alloca-gv.hlsl | 112 +++
 .../longvec-operators-vec1-scalarizer.ll      | 804 ++++++++++++++++++
 .../passes/longvec-operators-vec1.hlsl        | 425 +++++++++
 .../passes/dxil/lower_type/vec_array_param.ll |  22 +
 18 files changed, 2978 insertions(+), 99 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl

diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index a9c36b3b13..d068262674 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -2193,7 +2193,8 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
     return true;
 
   if (Ty->isVectorTy()) {
-    if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
+    if (Ty->getVectorNumElements() > 1 &&
+        ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
       return true;
     ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoVector);
     return false;
diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
index b708293fca..99784e5079 100644
--- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp
+++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
@@ -76,13 +76,15 @@ Type *TryLowerMatTy(Type *Ty) {
 }
 
 class MatrixBitcastLowerPass : public FunctionPass {
-
+  bool SupportsVectors = false;
 public:
   static char ID; // Pass identification, replacement for typeid
   explicit MatrixBitcastLowerPass() : FunctionPass(ID) {}
 
   StringRef getPassName() const override { return "Matrix Bitcast lower"; }
   bool runOnFunction(Function &F) override {
+    if (F.getParent()->HasDxilModule())
+      SupportsVectors = F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus();
     bool bUpdated = false;
     std::unordered_set<BitCastInst *> matCastSet;
     for (auto blkIt = F.begin(); blkIt != F.end(); ++blkIt) {
@@ -194,10 +196,10 @@ void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
         SmallVector<Value *, 2> idxList(GEP->idx_begin(), GEP->idx_end());
         DXASSERT(idxList.size() == 2,
                  "else not one dim matrix array index to matrix");
-        if (!DM.GetShaderModel()->IsSM69Plus()) {
-          HLMatrixType MatTy = HLMatrixType::cast(EltTy);
-          Value *matSize = Builder.getInt32(MatTy.getNumElements());
-          idxList.back() = Builder.CreateMul(idxList.back(), matSize);
+        unsigned NumElts = HLMatrixType::cast(EltTy).getNumElements();
+        if (!SupportsVectors || NumElts == 1) {
+          Value *MatSize = Builder.getInt32(NumElts);
+          idxList.back() = Builder.CreateMul(idxList.back(), MatSize);
         }
         Value *NewGEP = Builder.CreateGEP(A, idxList);
         lowerMatrix(DM, GEP, NewGEP);
@@ -214,18 +216,18 @@ void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
       if (VectorType *Ty = dyn_cast<VectorType>(LI->getType())) {
         IRBuilder<> Builder(LI);
         Value *NewVec = nullptr;
-        if (DM.GetShaderModel()->IsSM69Plus()) {
-          // Just create a replacement load using the vector pointer.
-          Instruction *NewLI = LI->clone();
-          unsigned VecIdx = NewLI->getNumOperands() - 1;
-          NewLI->setOperand(VecIdx, A);
-          Builder.Insert(NewLI);
-          NewVec = NewLI;
+        unsigned VecSize = Ty->getVectorNumElements();
+        if (SupportsVectors && VecSize > 1) {
+          // Create a replacement load using the vector pointer.
+          Instruction *NewLd = LI->clone();
+          unsigned VecIdx = NewLd->getNumOperands() - 1;
+          NewLd->setOperand(VecIdx, A);
+          Builder.Insert(NewLd);
+          NewVec = NewLd;
         } else {
           Value *zeroIdx = Builder.getInt32(0);
-          unsigned vecSize = Ty->getNumElements();
           NewVec = UndefValue::get(LI->getType());
-          for (unsigned i = 0; i < vecSize; i++) {
+          for (unsigned i = 0; i < VecSize; i++) {
             Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
             Value *Elt = Builder.CreateLoad(GEP);
             NewVec = Builder.CreateInsertElement(NewVec, Elt, i);
@@ -240,12 +242,20 @@ void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
       Value *V = ST->getValueOperand();
       if (VectorType *Ty = dyn_cast<VectorType>(V->getType())) {
         IRBuilder<> Builder(LI);
-        Value *zeroIdx = Builder.getInt32(0);
-        unsigned vecSize = Ty->getNumElements();
-        for (unsigned i = 0; i < vecSize; i++) {
-          Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
-          Value *Elt = Builder.CreateExtractElement(V, i);
-          Builder.CreateStore(Elt, GEP);
+        if (SupportsVectors && Ty->getVectorNumElements() > 1) {
+          // Create a replacement store using the vector pointer.
+          Instruction *NewSt = ST->clone();
+          unsigned VecIdx = NewSt->getNumOperands() - 1;
+          NewSt->setOperand(VecIdx, A);
+          Builder.Insert(NewSt);
+        } else {
+          Value *zeroIdx = Builder.getInt32(0);
+          unsigned vecSize = Ty->getNumElements();
+          for (unsigned i = 0; i < vecSize; i++) {
+            Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
+            Value *Elt = Builder.CreateExtractElement(V, i);
+            Builder.CreateStore(Elt, GEP);
+          }
         }
         ST->eraseFromParent();
       } else {
diff --git a/lib/HLSL/HLModule.cpp b/lib/HLSL/HLModule.cpp
index 037885c9d8..a67877ef3e 100644
--- a/lib/HLSL/HLModule.cpp
+++ b/lib/HLSL/HLModule.cpp
@@ -604,6 +604,9 @@ MDTuple *HLModule::EmitHLResources() {
 
 void HLModule::LoadHLResources(const llvm::MDOperand &MDO) {
   const llvm::MDTuple *pSRVs, *pUAVs, *pCBuffers, *pSamplers;
+  // No resources. Nothing to do.
+  if (MDO.get() == nullptr)
+    return;
   m_pMDHelper->GetDxilResources(MDO, pSRVs, pUAVs, pCBuffers, pSamplers);
 
   // Load SRV records.
diff --git a/lib/Transforms/Scalar/DxilEliminateVector.cpp b/lib/Transforms/Scalar/DxilEliminateVector.cpp
index bb9cf43594..3ebd48e420 100644
--- a/lib/Transforms/Scalar/DxilEliminateVector.cpp
+++ b/lib/Transforms/Scalar/DxilEliminateVector.cpp
@@ -153,10 +153,6 @@ bool DxilEliminateVector::TryRewriteDebugInfoForVector(InsertElementInst *IE) {
 
 bool DxilEliminateVector::runOnFunction(Function &F) {
 
-  if (F.getParent()->HasDxilModule())
-    if (F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus())
-      return false;
-
   auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
 
diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp
index 6d6b93f951..7dada4277e 100644
--- a/lib/Transforms/Scalar/LowerTypePasses.cpp
+++ b/lib/Transforms/Scalar/LowerTypePasses.cpp
@@ -212,8 +212,18 @@ class DynamicIndexingVectorToArray : public LowerTypePass {
 };
 
 void DynamicIndexingVectorToArray::initialize(Module &M) {
-  if (M.HasHLModule())
-    SupportsVectors = M.GetHLModule().GetShaderModel()->IsSM69Plus();
+  // Can be invoked in a few places:
+  //  - From standard compile before dxilgen.
+  //  - When linking, where dxmodule is available.
+  //  - In isolated dxopt, where the module will need to be created.
+  // Since HL module can't be created when linking, check for that first.
+  // Otherwise, either retrieve or generate the HL module.
+  if (M.HasDxilModule()) {
+    SupportsVectors = M.GetDxilModule().GetShaderModel()->IsSM69Plus();
+  } else {
+    HLModule &HLM = M.GetOrCreateHLModule();
+    SupportsVectors = HLM.GetShaderModel()->IsSM69Plus();
+  }
 }
 
 void DynamicIndexingVectorToArray::applyOptions(PassOptions O) {
@@ -295,7 +305,7 @@ void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) {
             StoreInst *stInst = cast<StoreInst>(GEPUser);
             Value *val = stInst->getValueOperand();
             Value *ldVal = Builder.CreateLoad(V);
-            ldVal = Builder.CreateInsertElement(ldVal, val, constIdx); // UGH
+            ldVal = Builder.CreateInsertElement(ldVal, val, constIdx);
             Builder.CreateStore(ldVal, V);
             stInst->eraseFromParent();
           }
@@ -315,12 +325,21 @@ void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) {
 }
 
 bool DynamicIndexingVectorToArray::needToLower(Value *V) {
-  // Only needed where vectors aren't supported.
-  if (SupportsVectors)
-    return false;
+  bool MustReplaceVector = ReplaceAllVectors;
   Type *Ty = V->getType()->getPointerElementType();
+
+  if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+    // Array must be replaced even without dynamic indexing to remove vector
+    // type in dxil.
+    MustReplaceVector = true;
+    Ty = dxilutil::GetArrayEltTy(AT);
+  }
+
   if (isa<VectorType>(Ty)) {
-    if (isa<GlobalVariable>(V) || ReplaceAllVectors) {
+    // Only needed for 2+ vectors where native vectors unsupported.
+    if (SupportsVectors && Ty->getVectorNumElements() > 1)
+      return false;
+    if (isa<GlobalVariable>(V) || MustReplaceVector) {
       return true;
     }
     // Don't lower local vector which only static indexing.
@@ -331,12 +350,6 @@ bool DynamicIndexingVectorToArray::needToLower(Value *V) {
       ReplaceStaticIndexingOnVector(V);
       return false;
     }
-  } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
-    // Array must be replaced even without dynamic indexing to remove vector
-    // type in dxil.
-    // TODO: optimize static array index in later pass.
-    Type *EltTy = dxilutil::GetArrayEltTy(AT);
-    return isa<VectorType>(EltTy);
   }
   return false;
 }
diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
index 6737c9100e..7ec297fb32 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
@@ -81,6 +81,7 @@ class SROA_Helper {
   static bool DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
                                   Type *&BrokenUpTy, uint64_t &NumInstances,
                                   IRBuilder<> &Builder, bool bFlatVector,
+                                  bool SupportsVectors,
                                   bool hasPrecise, DxilTypeSystem &typeSys,
                                   const DataLayout &DL,
                                   SmallVector<Value *, 32> &DeadInsts,
@@ -88,7 +89,7 @@ class SROA_Helper {
 
   static bool
   DoScalarReplacement(GlobalVariable *GV, std::vector<Value *> &Elts,
-                      IRBuilder<> &Builder, bool bFlatVector, bool hasPrecise,
+                      IRBuilder<> &Builder, bool bFlatVector, bool SupportsVectors, bool hasPrecise,
                       DxilTypeSystem &typeSys, const DataLayout &DL,
                       SmallVector<Value *, 32> &DeadInsts, DominatorTree *DT);
   static unsigned GetEltAlign(unsigned ValueAlign, const DataLayout &DL,
@@ -1714,6 +1715,7 @@ bool isGroupShareOrConstStaticArray(GlobalVariable *GV) {
 
 bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
   Module &M = *HLM.GetModule();
+  bool SupportsVectors = HLM.GetShaderModel()->IsSM69Plus();
   DxilTypeSystem &typeSys = HLM.GetTypeSystem();
 
   const DataLayout &DL = M.getDataLayout();
@@ -1869,8 +1871,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
       // if
       // all its users can be transformed, then split up the aggregate into its
       // separate elements.
-      if (!HLM.GetShaderModel()->IsSM69Plus() && ShouldAttemptScalarRepl(AI) &&
-          isSafeAllocaToScalarRepl(AI)) {
+      if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
         std::vector<Value *> Elts;
         IRBuilder<> Builder(dxilutil::FindAllocaInsertionPt(AI));
         bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
@@ -1879,7 +1880,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         uint64_t NumInstances = 1;
         bool SROAed = SROA_Helper::DoScalarReplacement(
             AI, Elts, BrokenUpTy, NumInstances, Builder,
-            /*bFlatVector*/ true, hasPrecise, typeSys, DL, DeadInsts, &DT);
+            /*bFlatVector*/ true, SupportsVectors, hasPrecise, typeSys, DL, DeadInsts, &DT);
 
         if (SROAed) {
           Type *Ty = AI->getAllocatedType();
@@ -1946,9 +1947,9 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         continue;
       }
 
-      // Flat Global vector if no dynamic vector indexing and pre-6.9.
+      // Flatten Global vector if no dynamic vector indexing.
       bool bFlatVector =
-          !hasDynamicVectorIndexing(GV) && !HLM.GetShaderModel()->IsSM69Plus();
+        !hasDynamicVectorIndexing(GV);
 
       if (bFlatVector) {
         GVDbgOffset &dbgOffset = GVDbgOffsetMap[GV];
@@ -1982,12 +1983,10 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
       } else {
         // SROA_Parameter_HLSL has no access to a domtree, if one is needed,
         // it'll be generated
-        if (!HLM.GetShaderModel()->IsSM69Plus()) {
-          SROAed = SROA_Helper::DoScalarReplacement(
-              GV, Elts, Builder, bFlatVector,
-              // TODO: set precise.
-              /*hasPrecise*/ false, typeSys, DL, DeadInsts, /*DT*/ nullptr);
-        }
+        SROAed = SROA_Helper::DoScalarReplacement(
+            GV, Elts, Builder, bFlatVector, SupportsVectors,
+            // TODO: set precise.
+            /*hasPrecise*/ false, typeSys, DL, DeadInsts, /*DT*/ nullptr);
       }
 
       if (SROAed) {
@@ -2924,6 +2923,7 @@ static ArrayType *CreateNestArrayTy(Type *FinalEltTy,
 bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
                                       Type *&BrokenUpTy, uint64_t &NumInstances,
                                       IRBuilder<> &Builder, bool bFlatVector,
+                                      bool SupportsVectors,
                                       bool hasPrecise, DxilTypeSystem &typeSys,
                                       const DataLayout &DL,
                                       SmallVector<Value *, 32> &DeadInsts,
@@ -3037,6 +3037,10 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
       if (!bFlatVector)
         return false;
 
+      // Skip vector where supported if it has more than 1 element.
+      if (SupportsVectors && ElTy->getVectorNumElements() > 1)
+        return false;
+
       // for array of vector
       // split into arrays of scalar
       VectorType *ElVT = cast<VectorType>(ElTy);
@@ -3121,6 +3125,7 @@ unsigned SROA_Helper::GetEltAlign(unsigned ValueAlign, const DataLayout &DL,
 bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
                                       std::vector<Value *> &Elts,
                                       IRBuilder<> &Builder, bool bFlatVector,
+                                      bool SupportsVectors,
                                       bool hasPrecise, DxilTypeSystem &typeSys,
                                       const DataLayout &DL,
                                       SmallVector<Value *, 32> &DeadInsts,
@@ -3138,6 +3143,9 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
   // Skip basic types.
   if (Ty->isSingleValueType() && !Ty->isVectorTy())
     return false;
+  // Skip vector where supported if it has more than 1 element.
+  if (Ty->isVectorTy() && SupportsVectors && Ty->getVectorNumElements() > 1)
+    return false;
   // Skip matrix types.
   if (HLMatrixType::isa(Ty))
     return false;
@@ -3244,6 +3252,10 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
       if (!bFlatVector)
         return false;
 
+      // Skip vector where supported if it has more than 1 element.
+      if (SupportsVectors && ElTy->getVectorNumElements() > 1)
+        return false;
+
       // for array of vector
       // split into arrays of scalar
       VectorType *ElVT = cast<VectorType>(ElTy);
@@ -5281,6 +5293,8 @@ void SROA_Parameter_HLSL::flattenArgument(
     std::vector<DxilParameterAnnotation> &FlatAnnotationList,
     BasicBlock *EntryBlock, ArrayRef<DbgDeclareInst *> DDIs) {
   std::deque<AnnotatedValue> WorkList;
+  bool SupportsVectors = m_pHLModule->GetShaderModel()->IsSM69Plus();
+
   WorkList.push_back({Arg, paramAnnotation});
 
   unsigned startArgIndex = FlatAnnotationList.size();
@@ -5355,7 +5369,7 @@ void SROA_Parameter_HLSL::flattenArgument(
       // DomTree isn't used by arguments
       SROAed = SROA_Helper::DoScalarReplacement(
           V, Elts, BrokenUpTy, NumInstances, Builder,
-          /*bFlatVector*/ false, annotation.IsPrecise(), dxilTypeSys, DL,
+          /*bFlatVector*/ false, SupportsVectors, annotation.IsPrecise(), dxilTypeSys, DL,
           DeadInsts, /*DT*/ nullptr);
     }
 
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 1b07d5f14f..d3c6d0e7e2 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -153,6 +153,7 @@ class Scalarizer : public FunctionPass,
 
 // HLSL Change Begin
   bool AllowFolding = false;
+  bool SupportsVectors = false;
   Scalarizer(bool AllowFolding) :
     FunctionPass(ID),
     AllowFolding(AllowFolding) {
@@ -294,7 +295,7 @@ bool Scalarizer::doInitialization(Module &M) {
 bool Scalarizer::runOnFunction(Function &F) {
   if (F.getParent()->HasDxilModule())
     if (F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus())
-      return false;
+      SupportsVectors = true;
 
   for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
     BasicBlock *BB = BBI;
@@ -442,7 +443,8 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
 template<typename Splitter>
 bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
   VectorType *VT = dyn_cast<VectorType>(I.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -463,7 +465,8 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
 
 bool Scalarizer::visitSelectInst(SelectInst &SI) {
   VectorType *VT = dyn_cast<VectorType>(SI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -506,7 +509,8 @@ bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
 
 bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   IRBuilder<> Builder(GEPI.getParent(), &GEPI);
@@ -540,7 +544,8 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
 
 bool Scalarizer::visitCastInst(CastInst &CI) {
   VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -563,8 +568,15 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
   if (!DstVT || !SrcVT)
     return false;
 
+
   unsigned DstNumElems = DstVT->getNumElements();
   unsigned SrcNumElems = SrcVT->getNumElements();
+
+  // HLSL Change Begin - allow > 1 vectors where supported.
+  if (SupportsVectors &&  (DstNumElems > 1 || SrcNumElems > 1))
+    return false;
+  // HLSL Change End - allow > 1 vectors where supported.
+
   IRBuilder<> Builder(BCI.getParent(), &BCI);
   Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
@@ -615,7 +627,8 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
 
 bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   VectorType *VT = dyn_cast<VectorType>(SVI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -649,7 +662,8 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
 
 bool Scalarizer::visitPHINode(PHINode &PHI) {
   VectorType *VT = dyn_cast<VectorType>(PHI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -685,6 +699,10 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
+  // HLSL Change Begin - allow > 1 vectors where supported.
+  if (SupportsVectors && NumElems > 1)
+    return false;
+  // HLSL Change End - allow > 1 vectors where supported.
   IRBuilder<> Builder(LI.getParent(), &LI);
   Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
@@ -711,6 +729,10 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
+  // HLSL Change Begin - allow > 1 vectors where supported.
+  if (SupportsVectors && NumElems > 1)
+    return false;
+  // HLSL Change End - allow > 1 vectors where supported.
   IRBuilder<> Builder(SI.getParent(), &SI);
   Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
index bb2cae6756..12955c87f9 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
@@ -4,6 +4,7 @@
 // RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=9 %s | FileCheck %s
 
 // Test relevant operators on an assortment bool vector sizes with 6.9 native vectors.
+// Bools have a different representation in memory and a smaller set of interesting ops.
 
 // Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
 // Uses non vector buffer to avoid interacting with that implementation.
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
index 8c07f40af7..b749a3b255 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
@@ -1,58 +1,73 @@
-// RUN: %dxc -HV 2018 -T lib_6_9   -DTYPE=uint     -DNUM=5 %s | FileCheck %s
-// RUN: %dxc -HV 2018 -T lib_6_9   -DTYPE=int64_t  -DNUM=3 %s | FileCheck %s
-// RUN: %dxc -HV 2018 -T lib_6_9   -DTYPE=uint16_t -DNUM=9 -enable-16bit-types %s | FileCheck %s
+// RUN: %dxc -T lib_6_9   -DTYPE=uint     -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,UNSIG
+// RUN: %dxc -T lib_6_9   -DTYPE=int64_t  -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,SIG
+// RUN: %dxc -T lib_6_9   -DTYPE=uint16_t -DNUM=9 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,UNSIG
 
 // Test bitwise operators on an assortment vector sizes and integer types with 6.9 native vectors.
 
 // Test bit twiddling operators.
 // CHECK-LABEL: define void @"\01?bittwiddlers
-// CHECK-SAME: ([10 x <[[NUM:[0-9][0-9]*]] x [[TYPE:[a-z0-9]*]]>]*
-export void bittwiddlers(inout vector<TYPE, NUM> things[10]) {
-  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
-  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+// CHECK-SAME: ([11 x <[[NUM:[0-9][0-9]*]] x [[TYPE:[a-z0-9]*]]>]*
+export void bittwiddlers(inout vector<TYPE, NUM> things[11]) {
+  // CHECK: [[adr1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
   // CHECK: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1,
-  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
-  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[adr0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr0]]
   things[0] = ~things[1];
 
-  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
-  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[adr2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
 
-  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
-  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[adr3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
   // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]]
-  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]]
   things[1] = things[2] | things[3];
 
-  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
-  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[adr4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
   // CHECK: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
-  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
   things[2] = things[3] & things[4];
 
-  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
-  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[adr5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
   // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
-  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
   things[3] = things[4] ^ things[5];
 
-  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
-  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
-  // CHECK: [[res4:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec6]], [[vec4]]
-  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
-  things[4] |= things[6];
-
-  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
-  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
-  // CHECK: [[res5:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]], [[vec5]]
-  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
-  things[5] &= things[7];
-
-  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
-  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
-  // CHECK: [[res6:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec6]], [[vec8]]
-  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
-  things[6] ^= things[8];
+  // CHECK: [[adr6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // CHECK: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec6]], <[[TYPE]]
+  // CHECK: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec5]], [[shv6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // CHECK: [[adr7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // CHECK: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // CHECK: [[adr8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // CHECK: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec8]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]]
+  things[6] |= things[8];
+
+  // CHECK: [[adr9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // CHECK: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec9]], [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  things[7] &= things[9];
+
+  // CHECK: [[adr10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // CHECK: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec8]], [[vec10]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  things[8] ^= things[10];
 
   // CHECK: ret void
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl
new file mode 100644
index 0000000000..8b12b96c80
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl
@@ -0,0 +1,342 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float  %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int       %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint      %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double    -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int64_t   %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t  %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t   -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t  -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z0-9_]*]]
+RWStructuredBuffer<TYPE> buf;
+
+export void assignments(inout TYPE things[10], TYPE scales[10]);
+export TYPE arithmetic(inout TYPE things[11])[11];
+export bool logic(bool truth[10], TYPE consequences[10])[10];
+export TYPE index(TYPE things[10], int i, TYPE val)[10];
+
+struct Interface {
+  TYPE assigned[10];
+  TYPE arithmeticked[11];
+  bool logicked[10];
+  TYPE indexed[10];
+  TYPE scales[10];
+};
+
+#if 0
+// Requires vector loading support. Enable when available.
+RWStructuredBuffer<Interface> Input;
+RWStructuredBuffer<Interface> Output;
+
+TYPE g_val;
+
+[shader("compute")]
+[numthreads(8,1,1)]
+void main(uint GI : SV_GroupIndex) {
+  assignments(Output[GI].assigned, Input[GI].scales);
+  Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked);
+  Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned);
+  Output[GI].indexed = index(Input[GI].indexed, GI, g_val);
+}
+#endif
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout TYPE things[10]) {
+
+  // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{(8|4|2)}})
+  // CHECK: [[res0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  things[0] = buf.Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast| nsw)?]] [[TYPE]] [[val1]], [[val5]]
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast| nsw)?]] [[TYPE]] [[val2]], [[val6]]
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[val7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast| nsw)?]] [[TYPE]] [[val3]], [[val7]]
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[val8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast| nsw)?]] [[TYPE]] [[val4]], [[val8]]
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] /= things[8];
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[val9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+#ifdef DBL
+  // DBL: [[fvec9:%.*]] = fptrunc double [[val9]] to float
+  // DBL: [[fvec5:%.*]] = fptrunc double [[val5]] to float
+  // DBL: [[fres5:%.*]] = [[REM:[ufs]?rem( fast| nsw)?]] float [[fvec5]], [[fvec9]]
+  // DBL: [[res5:%.*]] = fpext float [[fres5]] to double
+  float f9 = things[9];
+  float f5 = things[5];
+  f5 %= f9;
+  things[5] = f5;
+#else
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast| nsw)?]] [[TYPE]] [[val5]], [[val9]]
+  things[5] %= things[9];
+#endif
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export TYPE arithmetic(inout TYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[res1:%.*]] = [[SUB]] [[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000)}}, [[res0]]
+  res[0] = +things[0];
+  res[1] = -things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[ADD]] [[TYPE]] [[val2]], [[val1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[SUB]] [[TYPE]] [[val2]], [[val3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[MUL]] [[TYPE]] [[val4]], [[val3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[res5:%.*]] = [[DIV]] [[TYPE]] [[val4]], [[val5]]
+  res[5] = things[4] / things[5];
+
+  // DBL: [[fvec5:%.*]] = fptrunc double [[val5]] to float
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+#ifdef DBL
+  // DBL: [[fvec6:%.*]] = fptrunc double [[val6]] to float
+  // DBL: [[fres6:%.*]] = [[REM]] float [[fvec5]], [[fvec6]]
+  // DBL: [[res6:%.*]] = fpext float [[fres6]] to double
+  res[6] = (float)things[5] % (float)things[6];
+#else
+  // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[val7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[res7:%.*]] = [[ADD:f?add( fast| nsw)?]] [[TYPE]] [[val7]], {{(1|1\.?0*e?\+?0*|0xH3C00)}}
+  // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[val8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[res8:%.*]] = [[ADD]] [[TYPE]] [[val8]]
+  // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[val9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // CHECK: [[res9:%.*]] = [[ADD]] [[TYPE]] [[val9]]
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // CHECK: [[val10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // CHECK: [[res10:%.*]] = [[ADD]] [[TYPE]] [[val10]]
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  res[10] = --things[10];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 1
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 2
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 3
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 4
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 5
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 6
+  // CHECK: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 7
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 8
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 9
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 10
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export bool logic(bool truth[10], TYPE consequences[10])[10] {
+  bool res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]]
+  // CHECK: [[res0:%.*]] = xor i32 [[val0]], 1
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[res1:%.*]] = or i32 [[val2]], [[val1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bvec2:%.*]] = icmp ne i32 [[val2]], 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne i32 [[val3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bvec2]], [[bvec3]]
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[bvec4:%.*]] = icmp ne i32 [[val4]], 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[bvec5:%.*]] = icmp ne i32 [[val5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bvec3]], i1 [[bvec4]], i1 [[bvec5]]
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0
+  // CHECK: [[val0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast| nsw)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]]
+  // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]]
+  // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]]
+  // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]]
+  // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]]
+  // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6
+  // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]]
+  // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0
+  // CHECK: store i32 [[res0]], i32* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9
+  // CHECK: store i32 [[res9]], i32* [[adr9]]
+
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export TYPE index(TYPE things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x [[TYPE]]]
+  TYPE res[10];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 0
+  // CHECK: store [[TYPE]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]]
+  res[0] = 0;
+
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 %i
+  // CHECK: store [[TYPE]] {{(1|1\.?0*e?\+?0*|0xH3C00)}}, [[TYPE]]* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 2
+  // CHECK: store [[TYPE]] {{(2|2\.?0*e?\+?0*|0xH4000)}}, [[TYPE]]* [[adr2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 3
+  // CHECK: store [[TYPE]] [[thg0]], [[TYPE]]* [[adr3]]
+  res[3] = things[0];
+
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 %i
+  // CHECK: [[thgi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 4
+  // CHECK: store [[TYPE]] [[thgi]], [[TYPE]]* [[adr4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 5
+  // CHECK: store [[TYPE]] [[thg2]], [[TYPE]]* [[adr5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
new file mode 100644
index 0000000000..377c797b93
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
@@ -0,0 +1,460 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float1          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int1      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double1   -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t1      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t1  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[ELTY:[a-z0-9_]*]]
+// CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:.*]] }
+RWStructuredBuffer<TYPE> buf;
+
+export void assignments(inout TYPE things[10], TYPE scales[10]);
+export TYPE arithmetic(inout TYPE things[11])[11];
+export bool logic(bool truth[10], TYPE consequences[10])[10];
+export TYPE index(TYPE things[10], int i, TYPE val)[10];
+
+struct Interface {
+  TYPE assigned[10];
+  TYPE arithmeticked[11];
+  bool logicked[10];
+  TYPE indexed[10];
+  TYPE scales[10];
+};
+
+#if 0
+// Requires vector loading support. Enable when available.
+RWStructuredBuffer<Interface> Input;
+RWStructuredBuffer<Interface> Output;
+
+TYPE g_val;
+
+[shader("compute")]
+[numthreads(8,1,1)]
+void main(uint GI : SV_GroupIndex) {
+  assignments(Output[GI].assigned, Input[GI].scales);
+  Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked);
+  Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned);
+  Output[GI].indexed = index(Input[GI].indexed, GI, g_val);
+}
+#endif
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout TYPE things[10]) {
+
+  // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{8|4|2}})
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0
+  // CHECK: [[res0:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[val0]], i64 0
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  things[0] = buf.Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // CHECK: [[add1:%.*]] = [[ADD:f?add( fast)?]] [[ELTY]] [[val1]], [[val5]]
+  // CHECK: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add1]], i32 0
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]], i32 0
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // CHECK: [[sub2:%.*]] = [[SUB:f?sub( fast)?]] [[ELTY]] [[val2]], [[val6]]
+  // CHECK: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub2]], i32 0
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[val7:%.*]] = extractelement [[TYPE]] [[ld7]], i32 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // CHECK: [[mul3:%.*]] = [[MUL:f?mul( fast)?]] [[ELTY]] [[val3]], [[val7]]
+  // CHECK: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[mul3]], i32 0
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[val8:%.*]] = extractelement [[TYPE]] [[ld8]], i32 0
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // CHECK: [[div4:%.*]] = [[DIV:[ufs]?div( fast)?]] [[ELTY]] [[val4]], [[val8]]
+  // CHECK: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[div4]], i32 0
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] /= things[8];
+
+#ifndef DBL
+  // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9
+  // NODBL: [[ld9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // NODBL: [[val9:%.*]] = extractelement [[TYPE]] [[ld9]]
+  // NODBL: [[rem5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[ELTY]] [[val5]], [[val9]]
+  // NODBL: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[rem5]], i32 0
+  // NODBL: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  things[5] %= things[9];
+#endif
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export TYPE arithmetic(inout TYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[res0]], i32 0
+  // CHECK: [[sub1:%.*]] = [[SUB]] [[ELTY]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]]
+  res[0] = +things[0];
+  res[1] = -things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // CHECK: [[add2:%.*]] = [[ADD]] [[ELTY]] [[val2]], [[val1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // CHECK: [[sub3:%.*]] = [[SUB]] [[ELTY]] [[val2]], [[val3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // CHECK: [[mul4:%.*]] = [[MUL]] [[ELTY]] [[val4]], [[val3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // CHECK: [[div5:%.*]] = [[DIV]] [[ELTY]] [[val4]], [[val5]]
+  res[5] = things[4] / things[5];
+
+#ifndef DBL
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // NODBL: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // NODBL: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]]
+  // NODBL: [[rem6:%.*]] = [[REM]] [[ELTY]] [[val5]], [[val6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[val7:%.*]] = extractelement [[TYPE]] [[ld7]], i32 0
+  // CHECK: [[add7:%.*]] = [[ADD]] [[ELTY]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  // CHECK: [[res7:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add7]], i32 0
+  // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[val8:%.*]] = extractelement [[TYPE]] [[ld8]], i32 0
+  // CHECK: [[add8:%.*]] = [[ADD]] [[ELTY]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]]
+  // CHECK: [[res8:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add8]], i32 0
+  // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[ld9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // CHECK: [[val9:%.*]] = extractelement [[TYPE]] [[ld9]], i32 0
+  // CHECK: [[add9:%.*]] = [[ADD]] [[ELTY]] [[val9]], [[POS1]]
+  // CHECK: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add9]], i32 0
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // CHECK: [[ld10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // CHECK: [[val10:%.*]] = extractelement [[TYPE]] [[ld10]], i32 0
+  // CHECK: [[add10:%.*]] = [[ADD]] [[ELTY]] [[val10]], [[NEG1]]
+  // CHECK: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add10]], i32 0
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  res[10] = --things[10];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 1
+  // CHECK: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub1]], i64 0
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 2
+  // CHECK: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add2]], i64 0
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub3]], i64 0
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[mul4]], i64 0
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 5
+  // CHECK: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[div5]], i64 0
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 6
+  // NODBL: [[res6:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[rem6]], i64 0
+  // NODBL: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 7
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[ld7]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 8
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[ld8]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 9
+  // CHECK: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add9]], i64 0
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 10
+  // CHECK: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add10]], i64 0
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export bool logic(bool truth[10], TYPE consequences[10])[10] {
+  bool res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]]
+  // CHECK: [[res0:%.*]] = xor i32 [[val0]], 1
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[res1:%.*]] = or i32 [[val2]], [[val1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bval2:%.*]] = icmp ne i32 [[val2]], 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bval3:%.*]] = icmp ne i32 [[val3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]]
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[bval4:%.*]] = icmp ne i32 [[val4]], 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[bval5:%.*]] = icmp ne i32 [[val5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]]
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0
+  // CHECK: [[ld0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[ld0]], i32 0
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[ELTY]] [[val0]], [[val1]]
+  // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[ELTY]] [[val1]], [[val2]]
+  // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[ELTY]] [[val2]], [[val3]]
+  // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[ELTY]] [[val3]], [[val4]]
+  // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[ELTY]] [[val4]], [[val5]]
+  // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]], i32 0
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[ELTY]] [[val5]], [[val6]]
+  // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0
+  // CHECK: store i32 [[res0]], i32* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9
+  // CHECK: store i32 [[res9]], i32* [[adr9]]
+
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export TYPE index(TYPE things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x [[ELTY]]]
+  TYPE res[10];
+
+  // CHECK: [[res0:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 0
+  // CHECK: store [[ELTY]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[ELTY]]* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[adri:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 %i
+  // CHECK: store [[ELTY]] [[POS1]], [[ELTY]]* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[adr2:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 2
+  // CHECK: store [[ELTY]] {{(2|2\.?0*e?\+?0*|0xH4000)}}, [[ELTY]]* [[adr2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[ld0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr3:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 3
+  // CHECK: [[thg0:%.*]] = extractelement [[TYPE]] [[ld0]], i64 0
+  // CHECK: store [[ELTY]] [[thg0]], [[ELTY]]* [[adr3]]
+  res[3] = things[0];
+
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 %i
+  // CHECK: [[ldi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]]
+  // CHECK: [[adr4:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 4
+  // CHECK: [[thgi:%.*]] = extractelement [[TYPE]] [[ldi]], i64 0
+  // CHECK: store [[ELTY]] [[thgi]], [[ELTY]]* [[adr4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr5:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 5
+  // CHECK: [[thg2:%.*]] = extractelement [[TYPE]] [[ld2]], i64 0
+  // CHECK: store [[ELTY]] [[thg2]], [[ELTY]]* [[adr5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+#ifdef INT
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout TYPE things[11]) {
+  // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+  // INT: [[ld1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // INT: [[val1:%[0-9]*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // INT: [[xor1:%[0-9]*]] = xor [[ELTY]] [[val1]], -1
+  // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor1]], i32 0
+  // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr0]]
+  things[0] = ~things[1];
+
+  // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // INT: [[ld2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // INT: [[val2:%[0-9]*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // INT: [[ld3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // INT: [[val3:%[0-9]*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // INT: [[or1:%[0-9]*]] = or [[ELTY]] [[val3]], [[val2]]
+  // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[or1]], i32 0
+  // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // INT: [[ld4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // INT: [[val4:%[0-9]*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // INT: [[and2:%[0-9]*]] = and [[ELTY]] [[val4]], [[val3]]
+  // INT: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[and2]], i32 0
+  // INT: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // INT: [[ld5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // INT: [[val5:%[0-9]*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // INT: [[xor3:%[0-9]*]] = xor [[ELTY]] [[val5]], [[val4]]
+  // INT: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor3]], i32 0
+  // INT: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // INT: [[ld6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // INT: [[val6:%[0-9]*]] = extractelement [[TYPE]] [[ld6]], i32 0
+  // INT: [[shv6:%[0-9]*]] = and [[ELTY]] [[val6]]
+  // INT: [[shl4:%[0-9]*]] = shl [[ELTY]] [[val5]], [[shv6]]
+  // INT: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shl4]], i32 0
+  // INT: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // INT: [[ld7:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // INT: [[val7:%[0-9]*]] = extractelement [[TYPE]] [[ld7]], i32 0
+  // INT: [[shv7:%[0-9]*]] = and [[ELTY]] [[val7]]
+  // UNSIG: [[shr5:%[0-9]*]] = lshr [[ELTY]] [[val6]], [[shv7]]
+  // SIG: [[shr5:%[0-9]*]] = ashr [[ELTY]] [[val6]], [[shv7]]
+  // INT: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shr5]], i32 0
+  // INT: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // INT: [[ld8:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // INT: [[val8:%[0-9]*]] = extractelement [[TYPE]] [[ld8]], i32 0
+  // INT: [[or6:%[0-9]*]] = or [[ELTY]] [[val8]], [[val6]]
+  // INT: [[res6:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[or6]], i32 0
+  // INT: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  things[6] |= things[8];
+
+  // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // INT: [[ld9:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // INT: [[val9:%[0-9]*]] = extractelement [[TYPE]] [[ld9]], i32 0
+  // INT: [[and7:%[0-9]*]] = and [[ELTY]] [[val9]], [[val7]]
+  // INT: [[res7:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[and7]], i32 0
+  // INT: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  things[7] &= things[9];
+
+  // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // INT: [[ld10:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // INT: [[val10:%[0-9]*]] = extractelement [[TYPE]] [[ld10]], i32 0
+  // INT: [[xor8:%[0-9]*]] = xor [[ELTY]] [[val10]], [[val8]]
+  // INT: [[res8:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor8]], i32 0
+  // INT: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  things[8] ^= things[10];
+
+  // INT: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
index b617bf15b1..789be0091e 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
@@ -25,7 +25,7 @@
 // RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
 // RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t   -DNUM=177 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
 
-// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors.
+// Test relevant operators on an assortment vector sizes and types with 6.9 native vectors.
 
 // Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
 // Uses non vector buffer to avoid interacting with that implementation.
@@ -117,6 +117,7 @@ export void assignments(inout vector<TYPE, NUM> things[10], TYPE scales[10]) {
   // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
   // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]]
 #ifdef DBL
+  // DBL can't use remainder operator, do something anyway to keep the rest consistent.
   // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float>
   // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
   // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]]
@@ -201,6 +202,7 @@ export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11] {
   // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
   // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
 #ifdef DBL
+  // DBL can't use remainder operator, do something anyway to keep the rest consistent.
   // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float>
   // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]]
   // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double>
@@ -387,9 +389,8 @@ export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> co
   // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5
   // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
   // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
-
-  // NOT RIGHT STUFF.. Select is still extracting everything, slows WAY down with over 100 elements
-
+  // CHECK: [[bres3:%[0-9]*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]]
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
   res[3] = truth[3] ? truth[4] : truth[5];
 
   // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0
@@ -432,6 +433,12 @@ export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> co
 
   // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
   // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]]
   // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
   // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
   // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
new file mode 100644
index 0000000000..a811ff9f47
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
@@ -0,0 +1,304 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dynamic-vector-to-array,ReplaceAllVectors=0 -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.VectRec1 = type { <1 x float> }
+%struct.VectRec2 = type { <2 x float> }
+
+; Vec2s should be preserved.
+; CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+; CHECK-DAG: @dygrec2.0 = internal global <2 x float> zeroinitializer, align 4
+
+; CHECK-DAG: @stgrec2.0 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+
+; Dynamic Vec1s should be reduced.
+; CHECK-DAG: @dygar1.v = internal global [2 x [1 x float]] zeroinitializer, align 4
+; CHECK-DAG: @dygrec1.0.v = internal global [1 x float] zeroinitializer, align 4
+; CHECK-DAG: @dyglob1.v = internal global [1 x float] zeroinitializer, align 4
+
+; These static accessed Vec1s were already reduced by SROA
+; CHECK-DAG: @stgar1.0 = internal global [2 x float] zeroinitializer, align 4
+; CHECK-DAG: @stglob1.0 = internal global float 0.000000e+00, align 4
+; CHECK-DAG: @stgrec1.0.0 = internal global float 0.000000e+00, align 4
+
+@dyglob1 = internal global <1 x float> zeroinitializer, align 4
+@dyglob2 = internal global <2 x float> zeroinitializer, align 4
+@stglob2 = internal global <2 x float> zeroinitializer, align 4
+@dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+@dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@dygrec2.0 = internal global <2 x float> zeroinitializer, align 4
+@stgrec2.0 = internal global <2 x float> zeroinitializer, align 4
+@stgar1.0 = internal global [2 x float] zeroinitializer, align 4
+@dygrec1.0 = internal global <1 x float> zeroinitializer, align 4
+@stglob1.0 = internal global float 0.000000e+00, align 4
+@stgrec1.0.0 = internal global float 0.000000e+00, align 4
+
+; Function Attrs: nounwind
+; CHECK-LOCAL: define <4 x float> @"\01?tester
+define <4 x float> @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z"(i32 %ix, [12 x float]* %vals) #0 {
+bb:
+  ; Vec2s are preserved.
+  ; CHECK-DAG: %dyloc2 = alloca <2 x float>
+  ; CHECK-DAG: %dylar2 = alloca [4 x <2 x float>]
+  ; CHECK-DAG: %dylorc2.0 = alloca <2 x float>
+
+  ; CHECK-DAG: %stloc2 = alloca <2 x float>
+  ; CHECK-DAG: %stlar2 = alloca [4 x <2 x float>]
+  ; CHECK-DAG: %stlorc2.0 = alloca <2 x float>
+
+  ; Statics vec1s are unaltered by dynamic vector to array.
+  ; CHECK-DAG: %stloc1 = alloca <1 x float>
+  ; CHECK-DAG: %stlar1.0 = alloca [3 x float]
+  ; CHECK-DAG: %stlorc1.0 = alloca <1 x float>
+
+  ; Dynamic vec1s are removed and lose their names.
+  ; CHECK-DAG: alloca [1 x float]
+  ; CHECK-DAG: alloca [3 x [1 x float]]
+  ; CHECK-DAG: alloca [1 x float]
+
+  %dylorc1.0 = alloca <1 x float>
+  %stlorc1.0 = alloca <1 x float>
+  %dylorc2.0 = alloca <2 x float>
+  %stlorc2.0 = alloca <2 x float>
+  %stlar1.0 = alloca [3 x float]
+  %tmp = alloca i32, align 4, !dx.temp !14
+  %dyloc1 = alloca <1 x float>, align 4
+  %dyloc2 = alloca <2 x float>, align 4
+  %dylar1 = alloca [3 x <1 x float>], align 4
+  %dylar2 = alloca [4 x <2 x float>], align 4
+  %stloc1 = alloca <1 x float>, align 4
+  %stloc2 = alloca <2 x float>, align 4
+  %stlar2 = alloca [4 x <2 x float>], align 4
+  store i32 %ix, i32* %tmp, align 4, !tbaa !22
+
+  %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7
+  %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10
+  %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10
+  %tmp16 = icmp ne i1 %tmp15, false ; line:53 col:10
+  br i1 %tmp16, label %bb17, label %bb76 ; line:53 col:7
+
+bb17:                                             ; preds = %bb
+  %tmp18 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 0 ; line:54 col:30
+  %tmp19 = load float, float* %tmp18, align 4 ; line:54 col:30
+  %tmp20 = load i32, i32* %tmp, align 4 ; line:54 col:24
+  %tmp21 = getelementptr <1 x float>, <1 x float>* %dyloc1, i32 0, i32 %tmp20 ; line:54 col:17
+  store float %tmp19, float* %tmp21 ; line:54 col:28
+  %tmp22 = getelementptr <1 x float>, <1 x float>* %stloc1, i32 0, i32 0 ; line:54 col:5
+  store float %tmp19, float* %tmp22 ; line:54 col:15
+  %tmp23 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 1 ; line:55 col:30
+  %tmp24 = load float, float* %tmp23, align 4 ; line:55 col:30
+  %tmp25 = load i32, i32* %tmp, align 4 ; line:55 col:24
+  %tmp26 = getelementptr <2 x float>, <2 x float>* %dyloc2, i32 0, i32 %tmp25 ; line:55 col:17
+  store float %tmp24, float* %tmp26 ; line:55 col:28
+  %tmp27 = getelementptr <2 x float>, <2 x float>* %stloc2, i32 0, i32 1 ; line:55 col:5
+  store float %tmp24, float* %tmp27 ; line:55 col:15
+  %tmp28 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 2 ; line:56 col:37
+  %tmp29 = load float, float* %tmp28, align 4 ; line:56 col:37
+  %tmp30 = load i32, i32* %tmp, align 4 ; line:56 col:27
+  %tmp31 = load i32, i32* %tmp, align 4 ; line:56 col:31
+  %tmp32 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp30, i32 %tmp31 ; line:56 col:20
+  store float %tmp29, float* %tmp32 ; line:56 col:35
+  %tmp33 = getelementptr inbounds [3 x float], [3 x float]* %stlar1.0, i32 0, i32 1 ; line:56 col:5
+  store float %tmp29, float* %tmp33 ; line:56 col:18
+  %tmp34 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 3 ; line:57 col:37
+  %tmp35 = load float, float* %tmp34, align 4 ; line:57 col:37
+  %tmp36 = load i32, i32* %tmp, align 4 ; line:57 col:27
+  %tmp37 = load i32, i32* %tmp, align 4 ; line:57 col:31
+  %tmp38 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp36, i32 %tmp37 ; line:57 col:20
+  store float %tmp35, float* %tmp38 ; line:57 col:35
+  %tmp39 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 1, i32 0 ; line:57 col:5
+  store float %tmp35, float* %tmp39 ; line:57 col:18
+  %tmp40 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 4 ; line:58 col:36
+  %tmp41 = load float, float* %tmp40, align 4 ; line:58 col:36
+  %tmp42 = load i32, i32* %tmp, align 4 ; line:58 col:30
+  %tmp43 = getelementptr inbounds <1 x float>, <1 x float>* %dylorc1.0, i32 0, i32 %tmp42 ; line:58 col:20
+  store float %tmp41, float* %tmp43 ; line:58 col:34
+  %tmp44 = getelementptr inbounds <1 x float>, <1 x float>* %stlorc1.0, i32 0, i32 0 ; line:58 col:5
+  store float %tmp41, float* %tmp44 ; line:58 col:18
+  %tmp45 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 5 ; line:59 col:36
+  %tmp46 = load float, float* %tmp45, align 4 ; line:59 col:36
+  %tmp47 = load i32, i32* %tmp, align 4 ; line:59 col:30
+  %tmp48 = getelementptr inbounds <2 x float>, <2 x float>* %dylorc2.0, i32 0, i32 %tmp47 ; line:59 col:20
+  store float %tmp46, float* %tmp48 ; line:59 col:34
+  %tmp49 = getelementptr inbounds <2 x float>, <2 x float>* %stlorc2.0, i32 0, i32 1 ; line:59 col:5
+  store float %tmp46, float* %tmp49 ; line:59 col:18
+  %tmp50 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 6 ; line:61 col:32
+  %tmp51 = load float, float* %tmp50, align 4 ; line:61 col:32
+  %tmp52 = load i32, i32* %tmp, align 4 ; line:61 col:26
+  %tmp53 = getelementptr <1 x float>, <1 x float>* @dyglob1, i32 0, i32 %tmp52 ; line:61 col:18
+  store float %tmp51, float* %tmp53 ; line:61 col:30
+  store float %tmp51, float* @stglob1.0 ; line:61 col:16
+  %tmp54 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 7 ; line:62 col:32
+  %tmp55 = load float, float* %tmp54, align 4 ; line:62 col:32
+  %tmp56 = load i32, i32* %tmp, align 4 ; line:62 col:26
+  %tmp57 = getelementptr <2 x float>, <2 x float>* @dyglob2, i32 0, i32 %tmp56 ; line:62 col:18
+  store float %tmp55, float* %tmp57 ; line:62 col:30
+  store float %tmp55, float* getelementptr inbounds (<2 x float>, <2 x float>* @stglob2, i32 0, i32 1) ; line:62 col:16
+  %tmp58 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 8 ; line:63 col:37
+  %tmp59 = load float, float* %tmp58, align 4 ; line:63 col:37
+  %tmp60 = load i32, i32* %tmp, align 4 ; line:63 col:27
+  %tmp61 = load i32, i32* %tmp, align 4 ; line:63 col:31
+  %tmp62 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp60, i32 %tmp61 ; line:63 col:20
+  store float %tmp59, float* %tmp62 ; line:63 col:35
+  store float %tmp59, float* getelementptr inbounds ([2 x float], [2 x float]* @stgar1.0, i32 0, i32 1) ; line:63 col:18
+  %tmp63 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 9 ; line:64 col:37
+  %tmp64 = load float, float* %tmp63, align 4 ; line:64 col:37
+  %tmp65 = load i32, i32* %tmp, align 4 ; line:64 col:27
+  %tmp66 = load i32, i32* %tmp, align 4 ; line:64 col:31
+  %tmp67 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp65, i32 %tmp66 ; line:64 col:20
+  store float %tmp64, float* %tmp67 ; line:64 col:35
+  store float %tmp64, float* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 1, i32 1) ; line:64 col:18
+  %tmp68 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 10 ; line:65 col:36
+  %tmp69 = load float, float* %tmp68, align 4 ; line:65 col:36
+  %tmp70 = load i32, i32* %tmp, align 4 ; line:65 col:30
+  %tmp71 = getelementptr inbounds <1 x float>, <1 x float>* @dygrec1.0, i32 0, i32 %tmp70 ; line:65 col:20
+  store float %tmp69, float* %tmp71 ; line:65 col:34
+  store float %tmp69, float* @stgrec1.0.0 ; line:65 col:18
+  %tmp72 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 11 ; line:66 col:36
+  %tmp73 = load float, float* %tmp72, align 4 ; line:66 col:36
+  %tmp74 = load i32, i32* %tmp, align 4 ; line:66 col:30
+  %tmp75 = getelementptr inbounds <2 x float>, <2 x float>* @dygrec2.0, i32 0, i32 %tmp74 ; line:66 col:20
+  store float %tmp73, float* %tmp75 ; line:66 col:34
+  store float %tmp73, float* getelementptr inbounds (<2 x float>, <2 x float>* @stgrec2.0, i32 0, i32 1) ; line:66 col:18
+  br label %bb76 ; line:67 col:3
+
+bb76:                                             ; preds = %bb17, %bb
+  %tmp77 = load <1 x float>, <1 x float>* %dyloc1, align 4 ; line:68 col:17
+  %tmp78 = extractelement <1 x float> %tmp77, i32 0 ; line:68 col:17
+  %tmp79 = load <2 x float>, <2 x float>* %dyloc2, align 4 ; line:68 col:27
+  %tmp80 = extractelement <2 x float> %tmp79, i32 1 ; line:68 col:27
+  %tmp81 = load <1 x float>, <1 x float>* %stloc1, align 4 ; line:68 col:37
+  %tmp82 = extractelement <1 x float> %tmp81, i32 0 ; line:68 col:37
+  %tmp83 = load <2 x float>, <2 x float>* %stloc2, align 4 ; line:68 col:47
+  %tmp84 = extractelement <2 x float> %tmp83, i32 1 ; line:68 col:47
+  %tmp85 = insertelement <4 x float> undef, float %tmp78, i64 0 ; line:68 col:16
+  %tmp86 = insertelement <4 x float> %tmp85, float %tmp80, i64 1 ; line:68 col:16
+  %tmp87 = insertelement <4 x float> %tmp86, float %tmp82, i64 2 ; line:68 col:16
+  %tmp88 = insertelement <4 x float> %tmp87, float %tmp84, i64 3 ; line:68 col:16
+  %tmp89 = load i32, i32* %tmp, align 4 ; line:68 col:73
+  %tmp90 = load i32, i32* %tmp, align 4 ; line:68 col:77
+  %tmp91 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp89, i32 %tmp90 ; line:68 col:66
+  %tmp92 = load float, float* %tmp91 ; line:68 col:66
+  %tmp93 = load i32, i32* %tmp, align 4 ; line:68 col:89
+  %tmp94 = load i32, i32* %tmp, align 4 ; line:68 col:93
+  %tmp95 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp93, i32 %tmp94 ; line:68 col:82
+  %tmp96 = load float, float* %tmp95 ; line:68 col:82
+  %tmp97 = getelementptr [3 x float], [3 x float]* %stlar1.0, i32 0, i32 0 ; line:68 col:98
+  %load = load float, float* %tmp97 ; line:68 col:98
+  %insert = insertelement <1 x float> undef, float %load, i64 0 ; line:68 col:98
+  %tmp98 = extractelement <1 x float> %insert, i32 0 ; line:68 col:98
+  %tmp99 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 0 ; line:68 col:111
+  %tmp100 = load <2 x float>, <2 x float>* %tmp99, align 4 ; line:68 col:111
+  %tmp101 = extractelement <2 x float> %tmp100, i32 1 ; line:68 col:111
+  %tmp102 = insertelement <4 x float> undef, float %tmp92, i64 0 ; line:68 col:65
+  %tmp103 = insertelement <4 x float> %tmp102, float %tmp96, i64 1 ; line:68 col:65
+  %tmp104 = insertelement <4 x float> %tmp103, float %tmp98, i64 2 ; line:68 col:65
+  %tmp105 = insertelement <4 x float> %tmp104, float %tmp101, i64 3 ; line:68 col:65
+  %tmp106 = fadd <4 x float> %tmp88, %tmp105 ; line:68 col:57
+  %tmp107 = load <1 x float>, <1 x float>* @dyglob1, align 4 ; line:69 col:10
+  %tmp108 = extractelement <1 x float> %tmp107, i32 0 ; line:69 col:10
+  %tmp109 = load <2 x float>, <2 x float>* @dyglob2, align 4 ; line:69 col:21
+  %tmp110 = extractelement <2 x float> %tmp109, i32 1 ; line:69 col:21
+  %load3 = load float, float* @stglob1.0 ; line:69 col:32
+  %insert4 = insertelement <1 x float> undef, float %load3, i64 0 ; line:69 col:32
+  %tmp111 = extractelement <1 x float> %insert4, i32 0 ; line:69 col:32
+  %tmp112 = load <2 x float>, <2 x float>* @stglob2, align 4 ; line:69 col:43
+  %tmp113 = extractelement <2 x float> %tmp112, i32 1 ; line:69 col:43
+  %tmp114 = insertelement <4 x float> undef, float %tmp108, i64 0 ; line:69 col:9
+  %tmp115 = insertelement <4 x float> %tmp114, float %tmp110, i64 1 ; line:69 col:9
+  %tmp116 = insertelement <4 x float> %tmp115, float %tmp111, i64 2 ; line:69 col:9
+  %tmp117 = insertelement <4 x float> %tmp116, float %tmp113, i64 3 ; line:69 col:9
+  %tmp118 = fadd <4 x float> %tmp106, %tmp117 ; line:68 col:124
+  %tmp119 = load i32, i32* %tmp, align 4 ; line:69 col:70
+  %tmp120 = load i32, i32* %tmp, align 4 ; line:69 col:74
+  %tmp121 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp119, i32 %tmp120 ; line:69 col:63
+  %tmp122 = load float, float* %tmp121 ; line:69 col:63
+  %tmp123 = load i32, i32* %tmp, align 4 ; line:69 col:86
+  %tmp124 = load i32, i32* %tmp, align 4 ; line:69 col:90
+  %tmp125 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp123, i32 %tmp124 ; line:69 col:79
+  %tmp126 = load float, float* %tmp125 ; line:69 col:79
+  %load1 = load float, float* getelementptr inbounds ([2 x float], [2 x float]* @stgar1.0, i32 0, i32 0) ; line:69 col:95
+  %insert2 = insertelement <1 x float> undef, float %load1, i64 0 ; line:69 col:95
+  %tmp127 = extractelement <1 x float> %insert2, i32 0 ; line:69 col:95
+  %tmp128 = load <2 x float>, <2 x float>* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 0), align 4 ; line:69 col:108
+  %tmp129 = extractelement <2 x float> %tmp128, i32 1 ; line:69 col:108
+  %tmp130 = insertelement <4 x float> undef, float %tmp122, i64 0 ; line:69 col:62
+  %tmp131 = insertelement <4 x float> %tmp130, float %tmp126, i64 1 ; line:69 col:62
+  %tmp132 = insertelement <4 x float> %tmp131, float %tmp127, i64 2 ; line:69 col:62
+  %tmp133 = insertelement <4 x float> %tmp132, float %tmp129, i64 3 ; line:69 col:62
+  %tmp134 = fadd <4 x float> %tmp118, %tmp133 ; line:69 col:54
+  %tmp135 = load <1 x float>, <1 x float>* %stlorc1.0, align 4 ; line:70 col:20
+  %tmp136 = extractelement <1 x float> %tmp135, i64 0 ; line:70 col:11
+  %tmp137 = getelementptr inbounds <2 x float>, <2 x float>* %stlorc2.0, i32 0, i32 1 ; line:70 col:23
+  %tmp138 = load float, float* %tmp137 ; line:70 col:23
+  %tmp139 = load <1 x float>, <1 x float>* %dylorc1.0, align 4 ; line:70 col:45
+  %tmp140 = extractelement <1 x float> %tmp139, i64 0 ; line:70 col:11
+  %tmp141 = load i32, i32* %tmp, align 4 ; line:70 col:58
+  %tmp142 = getelementptr inbounds <2 x float>, <2 x float>* %dylorc2.0, i32 0, i32 %tmp141 ; line:70 col:48
+  %tmp143 = load float, float* %tmp142 ; line:70 col:48
+  %tmp144 = insertelement <4 x float> undef, float %tmp136, i64 0 ; line:70 col:11
+  %tmp145 = insertelement <4 x float> %tmp144, float %tmp138, i64 1 ; line:70 col:11
+  %tmp146 = insertelement <4 x float> %tmp145, float %tmp140, i64 2 ; line:70 col:11
+  %tmp147 = insertelement <4 x float> %tmp146, float %tmp143, i64 3 ; line:70 col:11
+  %tmp148 = fadd <4 x float> %tmp134, %tmp147 ; line:69 col:121
+  %load5 = load float, float* @stgrec1.0.0 ; line:70 col:80
+  %insert6 = insertelement <1 x float> undef, float %load5, i64 0 ; line:70 col:80
+  %tmp149 = extractelement <1 x float> %insert6, i64 0 ; line:70 col:71
+  %tmp150 = load float, float* getelementptr inbounds (<2 x float>, <2 x float>* @stgrec2.0, i32 0, i32 1) ; line:70 col:83
+  %tmp151 = load <1 x float>, <1 x float>* @dygrec1.0, align 4 ; line:70 col:105
+  %tmp152 = extractelement <1 x float> %tmp151, i64 0 ; line:70 col:71
+  %tmp153 = load i32, i32* %tmp, align 4 ; line:70 col:118
+  %tmp154 = getelementptr inbounds <2 x float>, <2 x float>* @dygrec2.0, i32 0, i32 %tmp153 ; line:70 col:108
+  %tmp155 = load float, float* %tmp154 ; line:70 col:108
+  %tmp156 = insertelement <4 x float> undef, float %tmp149, i64 0 ; line:70 col:71
+  %tmp157 = insertelement <4 x float> %tmp156, float %tmp150, i64 1 ; line:70 col:71
+  %tmp158 = insertelement <4 x float> %tmp157, float %tmp152, i64 2 ; line:70 col:71
+  %tmp159 = insertelement <4 x float> %tmp158, float %tmp155, i64 3 ; line:70 col:71
+  %tmp160 = fadd <4 x float> %tmp148, %tmp159 ; line:70 col:63
+  ret <4 x float> %tmp160 ; line:68 col:3
+}
+
+attributes #0 = { nounwind }
+
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !10}
+!dx.entryPoints = !{!19}
+!dx.fnprops = !{}
+!dx.options = !{!20, !21}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4845 (disable_disble_spirv, 2514104b9-dirty)"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"lib", i32 6, i32 9}
+!5 = !{i32 0, %struct.VectRec1 undef, !6, %struct.VectRec2 undef, !8}
+!6 = !{i32 4, !7}
+!7 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC1", i32 7, i32 9, i32 13, i32 1}
+!8 = !{i32 8, !9}
+!9 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC2", i32 7, i32 9, i32 13, i32 2}
+!10 = !{i32 1, <4 x float> (i32, [12 x float]*)* @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z", !11}
+!11 = !{!12, !15, !17}
+!12 = !{i32 1, !13, !14}
+!13 = !{i32 7, i32 9, i32 13, i32 4}
+!14 = !{}
+!15 = !{i32 0, !16, !14}
+!16 = !{i32 4, !"IX", i32 7, i32 4}
+!17 = !{i32 0, !18, !14}
+!18 = !{i32 4, !"VAL", i32 7, i32 9}
+!19 = !{null, !"", null, null, null}
+!20 = !{i32 64}
+!21 = !{i32 -1}
+!22 = !{!23, !23, i64 0}
+!23 = !{!"int", !24, i64 0}
+!24 = !{!"omnipotent char", !25, i64 0}
+!25 = !{!"Simple C/C++ TBAA"}
+!44 = !{!45, !45, i64 0}
+!45 = !{!"float", !24, i64 0}
+!148 = !{!24, !24, i64 0}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
new file mode 100644
index 0000000000..a3ba294c62
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
@@ -0,0 +1,328 @@
+; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s
+
+; Test for SROA reduction of globals and allocas.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.VectRec1 = type { <1 x float> }
+%struct.VectRec2 = type { <2 x float> }
+%ConstantBuffer = type opaque
+
+; Confirm that the dynamic globals are untouched and the statics are scalarized.
+; DAG used to preserve the convenient ordering.
+
+; Dynamic access preserves even vec1s in SROA.
+; CHECK-DAG: @dyglob1 = internal global <1 x float> zeroinitializer, align 4
+; CHECK-DAG: @dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+; CHECK-DAG: @dygrec1.0 = internal global <1 x float> zeroinitializer, align 4
+; CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+; CHECK-DAG: @dygrec2.0 = internal global <2 x float> zeroinitializer, align 4
+
+; Having >1 elements preserves even statically-accessed vec2s.
+; CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+; CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @stgrec2.0 = internal global <2 x float> zeroinitializer, align 4
+
+; Statically-accessed vec1s should get scalarized.
+; CHECK-DAG: @stgar1.0 = internal global [2 x float] zeroinitializer, align 4
+; CHECK-DAG: @stglob1.0 = internal global float 0.000000e+00, align 4
+; CHECK-DAG: @stgrec1.0.0 = internal global float 0.000000e+00, align 4
+
+@dyglob2 = internal global <2 x float> zeroinitializer, align 4
+@dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@dygrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+@dyglob1 = internal global <1 x float> zeroinitializer, align 4
+@dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+@dygrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+@stglob2 = internal global <2 x float> zeroinitializer, align 4
+@stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@stgrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+
+@stglob1 = internal global <1 x float> zeroinitializer, align 4
+@stgar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+@stgrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+@"$Globals" = external constant %ConstantBuffer
+
+; Function Attrs: nounwind
+define <4 x float> @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z"(i32 %ix, [12 x float]* %vals) #0 {
+bb:
+  ; Dynamic access preserves even vec1s in SROA.
+  ; CHECK-DAG: %dylorc1.0 = alloca <1 x float>
+  ; CHECK-DAG: %dylorc2.0 = alloca <2 x float>
+  ; CHECK-DAG: %dylorc1.0 = alloca <1 x float>
+  ; CHECK-DAG: %dylorc2.0 = alloca <2 x float>
+  ; CHECK-DAG: %dylar1 = alloca [3 x <1 x float>]
+  ; CHECK-DAG: %dylar2 = alloca [4 x <2 x float>]
+
+  ; SROA doesn't reduce non-array allocas because scalarizer should get them.
+  ; CHECK-DAG: %stlorc1.0 = alloca <1 x float>
+  ; CHECK-DAG: %stlorc2.0 = alloca <2 x float>
+  ; CHECK-DAG: %stloc1 = alloca <1 x float>, align 4
+  ; CHECK-DAG: %stloc2 = alloca <2 x float>, align 4
+
+  ; Statically-accessed arrays should get reduced.
+  ; CHECK-DAG: %stlar2 = alloca [4 x <2 x float>]
+  ; CHECK-DAG: %stlar1.0 = alloca [3 x float]
+
+  %tmp = alloca i32, align 4, !dx.temp !14
+  %dyloc1 = alloca <1 x float>, align 4
+  %dyloc2 = alloca <2 x float>, align 4
+  %dylar1 = alloca [3 x <1 x float>], align 4
+  %dylar2 = alloca [4 x <2 x float>], align 4
+  %dylorc1 = alloca %struct.VectRec1, align 4
+  %dylorc2 = alloca %struct.VectRec2, align 4
+  %stloc1 = alloca <1 x float>, align 4
+  %stloc2 = alloca <2 x float>, align 4
+  %stlar1 = alloca [3 x <1 x float>], align 4
+  %stlar2 = alloca [4 x <2 x float>], align 4
+  %stlorc1 = alloca %struct.VectRec1, align 4
+  %stlorc2 = alloca %struct.VectRec2, align 4
+
+  store i32 %ix, i32* %tmp, align 4, !tbaa !25
+  %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7
+  %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10
+  %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10
+  %tmp16 = icmp ne i1 %tmp15, false ; line:53 col:10
+  br i1 %tmp16, label %bb17, label %bb86 ; line:53 col:7
+
+bb17:                                             ; preds = %bb
+  %tmp18 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 0 ; line:54 col:30
+  %tmp19 = load float, float* %tmp18, align 4 ; line:54 col:30
+  %tmp20 = load i32, i32* %tmp, align 4 ; line:54 col:24
+  %tmp21 = getelementptr <1 x float>, <1 x float>* %dyloc1, i32 0, i32 %tmp20 ; line:54 col:17
+  store float %tmp19, float* %tmp21 ; line:54 col:28
+  %tmp22 = getelementptr <1 x float>, <1 x float>* %stloc1, i32 0, i32 0 ; line:54 col:5
+  store float %tmp19, float* %tmp22 ; line:54 col:15
+  %tmp23 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 1 ; line:55 col:30
+  %tmp24 = load float, float* %tmp23, align 4 ; line:55 col:30
+  %tmp25 = load i32, i32* %tmp, align 4 ; line:55 col:24
+  %tmp26 = getelementptr <2 x float>, <2 x float>* %dyloc2, i32 0, i32 %tmp25 ; line:55 col:17
+  store float %tmp24, float* %tmp26 ; line:55 col:28
+  %tmp27 = getelementptr <2 x float>, <2 x float>* %stloc2, i32 0, i32 1 ; line:55 col:5
+  store float %tmp24, float* %tmp27 ; line:55 col:15
+  %tmp28 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 2 ; line:56 col:37
+  %tmp29 = load float, float* %tmp28, align 4 ; line:56 col:37
+  %tmp30 = load i32, i32* %tmp, align 4 ; line:56 col:27
+  %tmp31 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp30 ; line:56 col:20
+  %tmp32 = load i32, i32* %tmp, align 4 ; line:56 col:31
+  %tmp33 = getelementptr <1 x float>, <1 x float>* %tmp31, i32 0, i32 %tmp32 ; line:56 col:20
+  store float %tmp29, float* %tmp33 ; line:56 col:35
+  %tmp34 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %stlar1, i32 0, i32 1 ; line:56 col:5
+  %tmp35 = getelementptr <1 x float>, <1 x float>* %tmp34, i32 0, i32 0 ; line:56 col:5
+  store float %tmp29, float* %tmp35 ; line:56 col:18
+  %tmp36 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 3 ; line:57 col:37
+  %tmp37 = load float, float* %tmp36, align 4 ; line:57 col:37
+  %tmp38 = load i32, i32* %tmp, align 4 ; line:57 col:27
+  %tmp39 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp38 ; line:57 col:20
+  %tmp40 = load i32, i32* %tmp, align 4 ; line:57 col:31
+  %tmp41 = getelementptr <2 x float>, <2 x float>* %tmp39, i32 0, i32 %tmp40 ; line:57 col:20
+  store float %tmp37, float* %tmp41 ; line:57 col:35
+  %tmp42 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 1 ; line:57 col:5
+  %tmp43 = getelementptr <2 x float>, <2 x float>* %tmp42, i32 0, i32 0 ; line:57 col:5
+  store float %tmp37, float* %tmp43 ; line:57 col:18
+  %tmp44 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 4 ; line:58 col:36
+  %tmp45 = load float, float* %tmp44, align 4 ; line:58 col:36
+  %tmp46 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %dylorc1, i32 0, i32 0 ; line:58 col:28
+  %tmp47 = load i32, i32* %tmp, align 4 ; line:58 col:30
+  %tmp48 = getelementptr <1 x float>, <1 x float>* %tmp46, i32 0, i32 %tmp47 ; line:58 col:20
+  store float %tmp45, float* %tmp48 ; line:58 col:34
+  %tmp49 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %stlorc1, i32 0, i32 0 ; line:58 col:13
+  %tmp50 = getelementptr <1 x float>, <1 x float>* %tmp49, i32 0, i32 0 ; line:58 col:5
+  store float %tmp45, float* %tmp50 ; line:58 col:18
+  %tmp51 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 5 ; line:59 col:36
+  %tmp52 = load float, float* %tmp51, align 4 ; line:59 col:36
+  %tmp53 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %dylorc2, i32 0, i32 0 ; line:59 col:28
+  %tmp54 = load i32, i32* %tmp, align 4 ; line:59 col:30
+  %tmp55 = getelementptr <2 x float>, <2 x float>* %tmp53, i32 0, i32 %tmp54 ; line:59 col:20
+  store float %tmp52, float* %tmp55 ; line:59 col:34
+  %tmp56 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %stlorc2, i32 0, i32 0 ; line:59 col:13
+  %tmp57 = getelementptr <2 x float>, <2 x float>* %tmp56, i32 0, i32 1 ; line:59 col:5
+  store float %tmp52, float* %tmp57 ; line:59 col:18
+  %tmp58 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 6 ; line:61 col:32
+  %tmp59 = load float, float* %tmp58, align 4 ; line:61 col:32
+  %tmp60 = load i32, i32* %tmp, align 4 ; line:61 col:26
+  %tmp61 = getelementptr <1 x float>, <1 x float>* @dyglob1, i32 0, i32 %tmp60 ; line:61 col:18
+  store float %tmp59, float* %tmp61 ; line:61 col:30
+  store float %tmp59, float* getelementptr inbounds (<1 x float>, <1 x float>* @stglob1, i32 0, i32 0) ; line:61 col:16
+  %tmp62 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 7 ; line:62 col:32
+  %tmp63 = load float, float* %tmp62, align 4 ; line:62 col:32
+  %tmp64 = load i32, i32* %tmp, align 4 ; line:62 col:26
+  %tmp65 = getelementptr <2 x float>, <2 x float>* @dyglob2, i32 0, i32 %tmp64 ; line:62 col:18
+  store float %tmp63, float* %tmp65 ; line:62 col:30
+  store float %tmp63, float* getelementptr inbounds (<2 x float>, <2 x float>* @stglob2, i32 0, i32 1) ; line:62 col:16
+  %tmp66 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 8 ; line:63 col:37
+  %tmp67 = load float, float* %tmp66, align 4 ; line:63 col:37
+  %tmp68 = load i32, i32* %tmp, align 4 ; line:63 col:27
+  %tmp69 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp68 ; line:63 col:20
+  %tmp70 = load i32, i32* %tmp, align 4 ; line:63 col:31
+  %tmp71 = getelementptr <1 x float>, <1 x float>* %tmp69, i32 0, i32 %tmp70 ; line:63 col:20
+  store float %tmp67, float* %tmp71 ; line:63 col:35
+  store float %tmp67, float* getelementptr inbounds ([2 x <1 x float>], [2 x <1 x float>]* @stgar1, i32 0, i32 1, i32 0) ; line:63 col:18
+  %tmp72 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 9 ; line:64 col:37
+  %tmp73 = load float, float* %tmp72, align 4 ; line:64 col:37
+  %tmp74 = load i32, i32* %tmp, align 4 ; line:64 col:27
+  %tmp75 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp74 ; line:64 col:20
+  %tmp76 = load i32, i32* %tmp, align 4 ; line:64 col:31
+  %tmp77 = getelementptr <2 x float>, <2 x float>* %tmp75, i32 0, i32 %tmp76 ; line:64 col:20
+  store float %tmp73, float* %tmp77 ; line:64 col:35
+  store float %tmp73, float* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 1, i32 1) ; line:64 col:18
+  %tmp78 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 10 ; line:65 col:36
+  %tmp79 = load float, float* %tmp78, align 4 ; line:65 col:36
+  %tmp80 = load i32, i32* %tmp, align 4 ; line:65 col:30
+  %tmp81 = getelementptr <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @dygrec1, i32 0, i32 0), i32 0, i32 %tmp80 ; line:65 col:20
+  store float %tmp79, float* %tmp81 ; line:65 col:34
+  store float %tmp79, float* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @stgrec1, i32 0, i32 0, i32 0) ; line:65 col:18
+  %tmp82 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 11 ; line:66 col:36
+  %tmp83 = load float, float* %tmp82, align 4 ; line:66 col:36
+  %tmp84 = load i32, i32* %tmp, align 4 ; line:66 col:30
+  %tmp85 = getelementptr <2 x float>, <2 x float>* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @dygrec2, i32 0, i32 0), i32 0, i32 %tmp84 ; line:66 col:20
+  store float %tmp83, float* %tmp85 ; line:66 col:34
+  store float %tmp83, float* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @stgrec2, i32 0, i32 0, i32 1) ; line:66 col:18
+  br label %bb86 ; line:67 col:3
+
+bb86:                                             ; preds = %bb17, %bb
+  %tmp87 = load <1 x float>, <1 x float>* %dyloc1, align 4 ; line:68 col:17
+  %tmp88 = extractelement <1 x float> %tmp87, i32 0 ; line:68 col:17
+  %tmp89 = load <2 x float>, <2 x float>* %dyloc2, align 4 ; line:68 col:27
+  %tmp90 = extractelement <2 x float> %tmp89, i32 1 ; line:68 col:27
+  %tmp91 = load <1 x float>, <1 x float>* %stloc1, align 4 ; line:68 col:37
+  %tmp92 = extractelement <1 x float> %tmp91, i32 0 ; line:68 col:37
+  %tmp93 = load <2 x float>, <2 x float>* %stloc2, align 4 ; line:68 col:47
+  %tmp94 = extractelement <2 x float> %tmp93, i32 1 ; line:68 col:47
+  %tmp95 = insertelement <4 x float> undef, float %tmp88, i64 0 ; line:68 col:16
+  %tmp96 = insertelement <4 x float> %tmp95, float %tmp90, i64 1 ; line:68 col:16
+  %tmp97 = insertelement <4 x float> %tmp96, float %tmp92, i64 2 ; line:68 col:16
+  %tmp98 = insertelement <4 x float> %tmp97, float %tmp94, i64 3 ; line:68 col:16
+  %tmp99 = load i32, i32* %tmp, align 4 ; line:68 col:73
+  %tmp100 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp99 ; line:68 col:66
+  %tmp101 = load i32, i32* %tmp, align 4 ; line:68 col:77
+  %tmp102 = getelementptr <1 x float>, <1 x float>* %tmp100, i32 0, i32 %tmp101 ; line:68 col:66
+  %tmp103 = load float, float* %tmp102 ; line:68 col:66
+  %tmp104 = load i32, i32* %tmp, align 4 ; line:68 col:89
+  %tmp105 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp104 ; line:68 col:82
+  %tmp106 = load i32, i32* %tmp, align 4 ; line:68 col:93
+  %tmp107 = getelementptr <2 x float>, <2 x float>* %tmp105, i32 0, i32 %tmp106 ; line:68 col:82
+  %tmp108 = load float, float* %tmp107 ; line:68 col:82
+  %tmp109 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %stlar1, i32 0, i32 0 ; line:68 col:98
+  %tmp110 = load <1 x float>, <1 x float>* %tmp109, align 4 ; line:68 col:98
+  %tmp111 = extractelement <1 x float> %tmp110, i32 0 ; line:68 col:98
+  %tmp112 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 0 ; line:68 col:111
+  %tmp113 = load <2 x float>, <2 x float>* %tmp112, align 4 ; line:68 col:111
+  %tmp114 = extractelement <2 x float> %tmp113, i32 1 ; line:68 col:111
+  %tmp115 = insertelement <4 x float> undef, float %tmp103, i64 0 ; line:68 col:65
+  %tmp116 = insertelement <4 x float> %tmp115, float %tmp108, i64 1 ; line:68 col:65
+  %tmp117 = insertelement <4 x float> %tmp116, float %tmp111, i64 2 ; line:68 col:65
+  %tmp118 = insertelement <4 x float> %tmp117, float %tmp114, i64 3 ; line:68 col:65
+  %tmp119 = fadd <4 x float> %tmp98, %tmp118 ; line:68 col:57
+  %tmp120 = load <1 x float>, <1 x float>* @dyglob1, align 4 ; line:69 col:10
+  %tmp121 = extractelement <1 x float> %tmp120, i32 0 ; line:69 col:10
+  %tmp122 = load <2 x float>, <2 x float>* @dyglob2, align 4 ; line:69 col:21
+  %tmp123 = extractelement <2 x float> %tmp122, i32 1 ; line:69 col:21
+  %tmp124 = load <1 x float>, <1 x float>* @stglob1, align 4 ; line:69 col:32
+  %tmp125 = extractelement <1 x float> %tmp124, i32 0 ; line:69 col:32
+  %tmp126 = load <2 x float>, <2 x float>* @stglob2, align 4 ; line:69 col:43
+  %tmp127 = extractelement <2 x float> %tmp126, i32 1 ; line:69 col:43
+  %tmp128 = insertelement <4 x float> undef, float %tmp121, i64 0 ; line:69 col:9
+  %tmp129 = insertelement <4 x float> %tmp128, float %tmp123, i64 1 ; line:69 col:9
+  %tmp130 = insertelement <4 x float> %tmp129, float %tmp125, i64 2 ; line:69 col:9
+  %tmp131 = insertelement <4 x float> %tmp130, float %tmp127, i64 3 ; line:69 col:9
+  %tmp132 = fadd <4 x float> %tmp119, %tmp131 ; line:68 col:124
+  %tmp133 = load i32, i32* %tmp, align 4 ; line:69 col:70
+  %tmp134 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp133 ; line:69 col:63
+  %tmp135 = load i32, i32* %tmp, align 4 ; line:69 col:74
+  %tmp136 = getelementptr <1 x float>, <1 x float>* %tmp134, i32 0, i32 %tmp135 ; line:69 col:63
+  %tmp137 = load float, float* %tmp136 ; line:69 col:63
+  %tmp138 = load i32, i32* %tmp, align 4 ; line:69 col:86
+  %tmp139 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp138 ; line:69 col:79
+  %tmp140 = load i32, i32* %tmp, align 4 ; line:69 col:90
+  %tmp141 = getelementptr <2 x float>, <2 x float>* %tmp139, i32 0, i32 %tmp140 ; line:69 col:79
+  %tmp142 = load float, float* %tmp141 ; line:69 col:79
+  %tmp143 = load <1 x float>, <1 x float>* getelementptr inbounds ([2 x <1 x float>], [2 x <1 x float>]* @stgar1, i32 0, i32 0), align 4 ; line:69 col:95
+  %tmp144 = extractelement <1 x float> %tmp143, i32 0 ; line:69 col:95
+  %tmp145 = load <2 x float>, <2 x float>* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 0), align 4 ; line:69 col:108
+  %tmp146 = extractelement <2 x float> %tmp145, i32 1 ; line:69 col:108
+  %tmp147 = insertelement <4 x float> undef, float %tmp137, i64 0 ; line:69 col:62
+  %tmp148 = insertelement <4 x float> %tmp147, float %tmp142, i64 1 ; line:69 col:62
+  %tmp149 = insertelement <4 x float> %tmp148, float %tmp144, i64 2 ; line:69 col:62
+  %tmp150 = insertelement <4 x float> %tmp149, float %tmp146, i64 3 ; line:69 col:62
+  %tmp151 = fadd <4 x float> %tmp132, %tmp150 ; line:69 col:54
+  %tmp152 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %stlorc1, i32 0, i32 0 ; line:70 col:20
+  %tmp153 = load <1 x float>, <1 x float>* %tmp152, align 4 ; line:70 col:20
+  %tmp154 = extractelement <1 x float> %tmp153, i64 0 ; line:70 col:11
+  %tmp155 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %stlorc2, i32 0, i32 0 ; line:70 col:31
+  %tmp156 = getelementptr <2 x float>, <2 x float>* %tmp155, i32 0, i32 1 ; line:70 col:23
+  %tmp157 = load float, float* %tmp156 ; line:70 col:23
+  %tmp158 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %dylorc1, i32 0, i32 0 ; line:70 col:45
+  %tmp159 = load <1 x float>, <1 x float>* %tmp158, align 4 ; line:70 col:45
+  %tmp160 = extractelement <1 x float> %tmp159, i64 0 ; line:70 col:11
+  %tmp161 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %dylorc2, i32 0, i32 0 ; line:70 col:56
+  %tmp162 = load i32, i32* %tmp, align 4 ; line:70 col:58
+  %tmp163 = getelementptr <2 x float>, <2 x float>* %tmp161, i32 0, i32 %tmp162 ; line:70 col:48
+  %tmp164 = load float, float* %tmp163 ; line:70 col:48
+  %tmp165 = insertelement <4 x float> undef, float %tmp154, i64 0 ; line:70 col:11
+  %tmp166 = insertelement <4 x float> %tmp165, float %tmp157, i64 1 ; line:70 col:11
+  %tmp167 = insertelement <4 x float> %tmp166, float %tmp160, i64 2 ; line:70 col:11
+  %tmp168 = insertelement <4 x float> %tmp167, float %tmp164, i64 3 ; line:70 col:11
+  %tmp169 = fadd <4 x float> %tmp151, %tmp168 ; line:69 col:121
+  %tmp170 = load <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @stgrec1, i32 0, i32 0), align 4 ; line:70 col:80
+  %tmp171 = extractelement <1 x float> %tmp170, i64 0 ; line:70 col:71
+  %tmp172 = load float, float* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @stgrec2, i32 0, i32 0, i32 1) ; line:70 col:83
+  %tmp173 = load <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @dygrec1, i32 0, i32 0), align 4 ; line:70 col:105
+  %tmp174 = extractelement <1 x float> %tmp173, i64 0 ; line:70 col:71
+  %tmp175 = load i32, i32* %tmp, align 4 ; line:70 col:118
+  %tmp176 = getelementptr <2 x float>, <2 x float>* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @dygrec2, i32 0, i32 0), i32 0, i32 %tmp175 ; line:70 col:108
+  %tmp177 = load float, float* %tmp176 ; line:70 col:108
+  %tmp178 = insertelement <4 x float> undef, float %tmp171, i64 0 ; line:70 col:71
+  %tmp179 = insertelement <4 x float> %tmp178, float %tmp172, i64 1 ; line:70 col:71
+  %tmp180 = insertelement <4 x float> %tmp179, float %tmp174, i64 2 ; line:70 col:71
+  %tmp181 = insertelement <4 x float> %tmp180, float %tmp177, i64 3 ; line:70 col:71
+  %tmp182 = fadd <4 x float> %tmp169, %tmp181 ; line:70 col:63
+  ret <4 x float> %tmp182 ; line:68 col:3
+}
+
+attributes #0 = { nounwind }
+
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !10}
+!dx.entryPoints = !{!19}
+!dx.fnprops = !{}
+!dx.options = !{!23, !24}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4845 (disable_disble_spirv, 2514104b9-dirty)"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"lib", i32 6, i32 9}
+!5 = !{i32 0, %struct.VectRec1 undef, !6, %struct.VectRec2 undef, !8}
+!6 = !{i32 4, !7}
+!7 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC1", i32 7, i32 9, i32 13, i32 1}
+!8 = !{i32 8, !9}
+!9 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC2", i32 7, i32 9, i32 13, i32 2}
+!10 = !{i32 1, <4 x float> (i32, [12 x float]*)* @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z", !11}
+!11 = !{!12, !15, !17}
+!12 = !{i32 1, !13, !14}
+!13 = !{i32 7, i32 9, i32 13, i32 4}
+!14 = !{}
+!15 = !{i32 0, !16, !14}
+!16 = !{i32 4, !"IX", i32 7, i32 4}
+!17 = !{i32 0, !18, !14}
+!18 = !{i32 4, !"VAL", i32 7, i32 9}
+!19 = !{null, !"", null, !20, null}
+!20 = !{null, null, !21, null}
+!21 = !{!22}
+!22 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
+!23 = !{i32 64}
+!24 = !{i32 -1}
+!25 = !{!26, !26, i64 0}
+!26 = !{!"int", !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C/C++ TBAA"}
+!47 = !{!48, !48, i64 0}
+!48 = !{!"float", !27, i64 0}
+!155 = !{!27, !27, i64 0}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl
new file mode 100644
index 0000000000..7641cb4f39
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl
@@ -0,0 +1,112 @@
+// RUN: %dxc -fcgl -T lib_6_9 %s | FileCheck %s
+
+// Mainly a source for the ScalarReductionOfAggregatesHLSL(SROA)
+//  and DynamicIndexingVectorToArray(DIVA) IR tests with native vectors
+//  using allocas, static globals, and parameters.
+// Dynamically accessed 1-element vectors should get skipped by SROA,
+//  but addressed by DynamicIndexingVectorToArray (hence the name).
+// Larger vectors should be untouched.
+// Arrays of vectors get some special treatment as well.
+// Verifies that the original code is as expected for the IR tests.
+
+struct VectRec1 {
+  float1 f : REC1;
+};
+struct VectRec2 {
+  float2 f : REC2;
+};
+
+// Vec2s will be preserved.
+// CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4
+// CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+// CHECK-DAG: @dygrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+
+// Dynamic vec1s will get replaced with dynamic vector to array.
+// CHECK-DAG: @dyglob1 = internal global <1 x float> zeroinitializer, align 4
+// CHECK-DAG: @dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+// CHECK-DAG: @dygrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+// Vec2s will be preserved.
+// CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4
+// CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+// CHECK-DAG: @stgrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+
+// Static vec1s will get replaced with SROA.
+// CHECK-DAG: @stglob1 = internal global <1 x float> zeroinitializer, align 4
+// CHECK-DAG: @stgar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+// CHECK-DAG: @stgrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+static float1 dyglob1;
+static float2 dyglob2;
+static float1 dygar1[2];
+static float2 dygar2[3];
+static VectRec1 dygrec1;
+static VectRec2 dygrec2;
+
+static float1 stglob1;
+static float2 stglob2;
+static float1 stgar1[2];
+static float2 stgar2[3];
+static VectRec1 stgrec1;
+static VectRec2 stgrec2;
+
+// Test assignment operators.
+// Vec2s should be skipped by SROA and DIVA
+// DIVA will lower statically-indexed vectors and vectors in an array.
+// CHECK-LABEL: define <4 x float> @"\01?tester
+export float4 tester(int ix : IX, float vals[12] : VAL) {
+
+  // Vec2s will be preserved.
+  // CHECK-DAG: %dyloc2 = alloca <2 x float>, align 4
+  // CHECK-DAG: %dylar2 = alloca [4 x <2 x float>], align 4
+  // CHECK-DAG: %dylorc2 = alloca %struct.VectRec2, align 4
+
+  // Dynamic local vec1s will get replaced with dynamic vector to array.
+  // CHECK-DAG: %dyloc1 = alloca <1 x float>, align 4
+  // CHECK-DAG: %dylar1 = alloca [3 x <1 x float>], align 4
+  // CHECK-DAG: %dylorc1 = alloca %struct.VectRec1, align 4
+
+  // Vec2s will be preserved.
+  // CHECK-DAG: %stloc2 = alloca <2 x float>, align 4
+  // CHECK-DAG: %stlar2 = alloca [4 x <2 x float>], align 4
+  // CHECK-DAG: %stlorc2 = alloca %struct.VectRec2, align 4
+
+  // Static local vec1s will get replaced by various passes.
+  // CHECK-DAG: %stloc1 = alloca <1 x float>, align 4
+  // CHECK-DAG: %stlar1 = alloca [3 x <1 x float>], align 4
+  // CHECK-DAG: %stlorc1 = alloca %struct.VectRec1, align 4
+
+  float1 dyloc1;
+  float2 dyloc2;
+  float1 dylar1[3];
+  float2 dylar2[4];
+  VectRec1 dylorc1;
+  VectRec2 dylorc2;
+
+  float1 stloc1;
+  float2 stloc2;
+  float1 stlar1[3];
+  float2 stlar2[4];
+  VectRec1 stlorc1;
+  VectRec2 stlorc2;
+
+  if (ix > 0) {
+    stloc1[0] = dyloc1[ix] = vals[0];
+    stloc2[1] = dyloc2[ix] = vals[1];
+    stlar1[1][0] = dylar1[ix][ix] = vals[2];
+    stlar2[1][0] = dylar2[ix][ix] = vals[3];
+    stlorc1.f[0] = dylorc1.f[ix] = vals[4];
+    stlorc2.f[1] = dylorc2.f[ix] = vals[5];
+
+    stglob1[0] = dyglob1[ix] = vals[6];
+    stglob2[1] = dyglob2[ix] = vals[7];
+    stgar1[1][0] = dygar1[ix][ix] = vals[8];
+    stgar2[1][1] = dygar2[ix][ix] = vals[9];
+    stgrec1.f[0] = dygrec1.f[ix] = vals[10];
+    stgrec2.f[1] = dygrec2.f[ix] = vals[11];
+  }
+  return float4(dyloc1.x, dyloc2.y, stloc1.x, stloc2.y) + float4(dylar1[ix][ix], dylar2[ix][ix], stlar1[0].x, stlar2[0].y) +
+  float4(dyglob1.x, dyglob2.y, stglob1.x, stglob2.y) + float4(dygar1[ix][ix], dygar2[ix][ix], stgar1[0].x, stgar2[0].y) +
+    float4(stlorc1.f, stlorc2.f[1], dylorc1.f, dylorc2.f[ix]) + float4(stgrec1.f, stgrec2.f[1], dygrec1.f, dygrec2.f[ix]);
+}
+
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
new file mode 100644
index 0000000000..4e2852b86a
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
@@ -0,0 +1,804 @@
+; RUN: %dxopt %s -hlsl-passes-resume -scalarizer -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWStructuredBuffer<vector<float, 1> >" = type { <1 x float> }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
+
+@"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" = external global %"class.RWStructuredBuffer<vector<float, 1> >", align 4
+@llvm.used = appending global [1 x i8*] [i8* bitcast (%"class.RWStructuredBuffer<vector<float, 1> >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" to i8*)], section "llvm.metadata"
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?assignments
+define void @"\01?assignments@@YAXY09$$CAV?$vector@M$00@@@Z"([10 x <1 x float>]* noalias %things) #0 {
+bb:
+  %tmp = load %"class.RWStructuredBuffer<vector<float, 1> >", %"class.RWStructuredBuffer<vector<float, 1> >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A"
+  %tmp1 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 1> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 1> >" %tmp)
+  %tmp2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  %RawBufferLoad = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp2, i32 1, i32 0, i8 1, i32 4)
+  %tmp3 = extractvalue %dx.types.ResRet.f32 %RawBufferLoad, 0
+  %tmp4 = insertelement <1 x float> undef, float %tmp3, i64 0
+  %tmp5 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0
+  store <1 x float> %tmp4, <1 x float>* %tmp5, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]]
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[res1:%.*]] = fadd fast float [[val1]], [[val5]]
+  ; CHECK: [[vec1:%.*]] = insertelement <1 x float> undef, float [[res1]], i32 0
+  ; CHECK: store <1 x float> [[vec1]], <1 x float>* [[adr1]], align 4
+  %tmp6 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  %tmp7 = load <1 x float>, <1 x float>* %tmp6, align 4
+  %tmp8 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 1
+  %tmp9 = load <1 x float>, <1 x float>* %tmp8, align 4
+  %tmp10 = fadd fast <1 x float> %tmp9, %tmp7
+  store <1 x float> %tmp10, <1 x float>* %tmp8, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]]
+  ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[res2:%.*]] = fsub fast float [[val2]], [[val6]]
+  ; CHECK: [[vec2:%.*]] = insertelement <1 x float> undef, float [[res2]], i32 0
+  ; CHECK: store <1 x float> [[vec2]], <1 x float>* [[adr2]], align 4
+  %tmp11 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 6
+  %tmp12 = load <1 x float>, <1 x float>* %tmp11, align 4
+  %tmp13 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  %tmp14 = load <1 x float>, <1 x float>* %tmp13, align 4
+  %tmp15 = fsub fast <1 x float> %tmp14, %tmp12
+  store <1 x float> %tmp15, <1 x float>* %tmp13, align 4
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]]
+  ; CHECK: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]]
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[res3:%.*]] = fmul fast float [[val3]], [[val7]]
+  ; CHECK: [[vec3:%.*]] = insertelement <1 x float> undef, float [[res3]], i32 0
+  ; CHECK: store <1 x float> [[vec3]], <1 x float>* [[adr3]], align 4
+  %tmp16 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 7
+  %tmp17 = load <1 x float>, <1 x float>* %tmp16, align 4
+  %tmp18 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 3
+  %tmp19 = load <1 x float>, <1 x float>* %tmp18, align 4
+  %tmp20 = fmul fast <1 x float> %tmp19, %tmp17
+  store <1 x float> %tmp20, <1 x float>* %tmp18, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]]
+  ; CHECK: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]]
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[res4:%.*]] = fdiv fast float [[val4]], [[val8]]
+  ; CHECK: [[vec4:%.*]] = insertelement <1 x float> undef, float [[res4]], i32 0
+  ; CHECK: store <1 x float> [[vec4]], <1 x float>* [[adr4]], align 4
+  %tmp21 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 8
+  %tmp22 = load <1 x float>, <1 x float>* %tmp21, align 4
+  %tmp23 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 4
+  %tmp24 = load <1 x float>, <1 x float>* %tmp23, align 4
+  %tmp25 = fdiv fast <1 x float> %tmp24, %tmp22
+  store <1 x float> %tmp25, <1 x float>* %tmp23, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]]
+  ; CHECK: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[res5:%.*]] = frem fast float [[val5]], [[val9]]
+  ; CHECK: [[vec5:%.*]] = insertelement <1 x float> undef, float [[res5]], i32 0
+  ; CHECK: store <1 x float> [[vec5]], <1 x float>* [[adr5]], align 4
+  %tmp26 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 9
+  %tmp27 = load <1 x float>, <1 x float>* %tmp26, align 4
+  %tmp28 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  %tmp29 = load <1 x float>, <1 x float>* %tmp28, align 4
+  %tmp30 = frem fast <1 x float> %tmp29, %tmp27
+  store <1 x float> %tmp30, <1 x float>* %tmp28, align 4
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?arithmetic
+define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$00@@Y0L@$$CAV1@@Z"([11 x <1 x float>]* noalias sret %agg.result, [11 x <1 x float>]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4
+  ; CHECK: [[zero:%.*]] = extractelement <1 x float> <float -0.000000e+00>, i32 0
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0:%.*]], i32 0
+  ; CHECK: [[sub0:%.*]] = fsub fast float [[zero]], [[val0]]
+  ; CHECK: [[res0:%.*]] = insertelement <1 x float> undef, float [[sub0]], i32 0
+  %tmp = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  %tmp1 = load <1 x float>, <1 x float>* %tmp, align 4
+  %tmp2 = fsub fast <1 x float> <float -0.000000e+00>, %tmp1
+  %tmp3 = extractelement <1 x float> %tmp2, i64 0
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[res1:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[res1]], i64 0
+  %tmp4 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  %tmp5 = load <1 x float>, <1 x float>* %tmp4, align 4
+  %tmp6 = extractelement <1 x float> %tmp5, i64 0
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]], align 4
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]], align 4
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[add1:%.*]] = fadd fast float [[val1]], [[val2]]
+  ; CHECK: [[res1:%.*]] = insertelement <1 x float> undef, float [[add1]], i32 0
+  %tmp7 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 1
+  %tmp8 = load <1 x float>, <1 x float>* %tmp7, align 4
+  %tmp9 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  %tmp10 = load <1 x float>, <1 x float>* %tmp9, align 4
+  %tmp11 = fadd fast <1 x float> %tmp8, %tmp10
+  %tmp12 = extractelement <1 x float> %tmp11, i64 0
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]], align 4
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]], align 4
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[sub2:%.*]] = fsub fast float [[val2]], [[val3]]
+  ; CHECK: [[res2:%.*]] = insertelement <1 x float> undef, float [[sub2]], i32 0
+  %tmp13 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  %tmp14 = load <1 x float>, <1 x float>* %tmp13, align 4
+  %tmp15 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  %tmp16 = load <1 x float>, <1 x float>* %tmp15, align 4
+  %tmp17 = fsub fast <1 x float> %tmp14, %tmp16
+  %tmp18 = extractelement <1 x float> %tmp17, i64 0
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]], align 4
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]], align 4
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[mul3:%.*]] = fmul fast float [[val3]], [[val4]]
+  ; CHECK: [[res3:%.*]] = insertelement <1 x float> undef, float [[mul3]], i32 0
+  %tmp19 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  %tmp20 = load <1 x float>, <1 x float>* %tmp19, align 4
+  %tmp21 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  %tmp22 = load <1 x float>, <1 x float>* %tmp21, align 4
+  %tmp23 = fmul fast <1 x float> %tmp20, %tmp22
+  %tmp24 = extractelement <1 x float> %tmp23, i64 0
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]], align 4
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]], align 4
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[div4:%.*]] = fdiv fast float [[val4]], [[val5]]
+  ; CHECK: [[res4:%.*]] = insertelement <1 x float> undef, float [[div4]], i32 0
+  %tmp25 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  %tmp26 = load <1 x float>, <1 x float>* %tmp25, align 4
+  %tmp27 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  %tmp28 = load <1 x float>, <1 x float>* %tmp27, align 4
+  %tmp29 = fdiv fast <1 x float> %tmp26, %tmp28
+  %tmp30 = extractelement <1 x float> %tmp29, i64 0
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]], align 4
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]], align 4
+  ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0
+  ; CHECK: [[rem5:%.*]] = frem fast float [[val5]], [[val6]]
+  ; CHECK: [[res5:%.*]] = insertelement <1 x float> undef, float [[rem5]], i32 0
+  %tmp31 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  %tmp32 = load <1 x float>, <1 x float>* %tmp31, align 4
+  %tmp33 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 6
+  %tmp34 = load <1 x float>, <1 x float>* %tmp33, align 4
+  %tmp35 = frem fast <1 x float> %tmp32, %tmp34
+  %tmp36 = extractelement <1 x float> %tmp35, i64 0
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]], align 4
+  ; CHECK: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0
+  ; CHECK: [[pos1:%.*]] = extractelement <1 x float> <float 1.000000e+00>, i32 0
+  ; CHECK: [[add6:%.*]] = fadd fast float [[val7]], [[pos1]]
+  ; CHECK: [[res6:%.*]] = insertelement <1 x float> undef, float [[add6]], i32 0
+  %tmp37 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7
+  %tmp38 = load <1 x float>, <1 x float>* %tmp37, align 4
+  %tmp39 = fadd fast <1 x float> %tmp38, <float 1.000000e+00>
+  store <1 x float> %tmp39, <1 x float>* %tmp37, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]], align 4
+  ; CHECK: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0
+  ; CHECK: [[neg1:%.*]] = extractelement <1 x float> <float -1.000000e+00>, i32 0
+  ; CHECK: [[add7:%.*]] = fadd fast float [[val8]], [[neg1]]
+  ; CHECK: [[res7:%.*]] = insertelement <1 x float> undef, float [[add7]], i32 0
+  %tmp40 = extractelement <1 x float> %tmp38, i64 0
+  %tmp41 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8
+  %tmp42 = load <1 x float>, <1 x float>* %tmp41, align 4
+  %tmp43 = fadd fast <1 x float> %tmp42, <float -1.000000e+00>
+  store <1 x float> %tmp43, <1 x float>* %tmp41, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]], align 4
+  ; CHECK: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0
+  ; CHECK: [[pos1:%.*]] = extractelement <1 x float> <float 1.000000e+00>, i32 0
+  ; CHECK: [[add8:%.*]] = fadd fast float [[val9]], [[pos1]]
+  ; CHECK: [[res8:%.*]] = insertelement <1 x float> undef, float [[add8]], i32 0
+  %tmp44 = extractelement <1 x float> %tmp42, i64 0
+  %tmp45 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9
+  %tmp46 = load <1 x float>, <1 x float>* %tmp45, align 4
+  %tmp47 = fadd fast <1 x float> %tmp46, <float 1.000000e+00>
+  store <1 x float> %tmp47, <1 x float>* %tmp45, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load <1 x float>, <1 x float>* [[adr10]], align 4
+  ; CHECK: [[val10:%.*]] = extractelement <1 x float> [[ld10]], i32 0
+  ; CHECK: [[neg1:%.*]] = extractelement <1 x float> <float -1.000000e+00>, i32 0
+  ; CHECK: [[add9:%.*]] = fadd fast float [[val10]], [[neg1]]
+  ; CHECK: [[res9:%.*]] = insertelement <1 x float> undef, float [[add9]], i32 0
+  %tmp48 = extractelement <1 x float> %tmp47, i64 0
+  %tmp49 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10
+  %tmp50 = load <1 x float>, <1 x float>* %tmp49, align 4
+  %tmp51 = fadd fast <1 x float> %tmp50, <float -1.000000e+00>
+  store <1 x float> %tmp51, <1 x float>* %tmp49, align 4
+
+  %tmp52 = extractelement <1 x float> %tmp51, i64 0
+  %tmp53 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 0
+  %insert20 = insertelement <1 x float> undef, float %tmp3, i64 0
+  store <1 x float> %insert20, <1 x float>* %tmp53
+  %tmp54 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 1
+  %insert18 = insertelement <1 x float> undef, float %tmp6, i64 0
+  store <1 x float> %insert18, <1 x float>* %tmp54
+  %tmp55 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 2
+  %insert16 = insertelement <1 x float> undef, float %tmp12, i64 0
+  store <1 x float> %insert16, <1 x float>* %tmp55
+  %tmp56 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 3
+  %insert14 = insertelement <1 x float> undef, float %tmp18, i64 0
+  store <1 x float> %insert14, <1 x float>* %tmp56
+  %tmp57 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 4
+  %insert12 = insertelement <1 x float> undef, float %tmp24, i64 0
+  store <1 x float> %insert12, <1 x float>* %tmp57
+  %tmp58 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 5
+  %insert10 = insertelement <1 x float> undef, float %tmp30, i64 0
+  store <1 x float> %insert10, <1 x float>* %tmp58
+  %tmp59 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 6
+  %insert8 = insertelement <1 x float> undef, float %tmp36, i64 0
+  store <1 x float> %insert8, <1 x float>* %tmp59
+  %tmp60 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 7
+  %insert6 = insertelement <1 x float> undef, float %tmp40, i64 0
+  store <1 x float> %insert6, <1 x float>* %tmp60
+  %tmp61 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 8
+  %insert4 = insertelement <1 x float> undef, float %tmp44, i64 0
+  store <1 x float> %insert4, <1 x float>* %tmp61
+  %tmp62 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 9
+  %insert2 = insertelement <1 x float> undef, float %tmp48, i64 0
+  store <1 x float> %insert2, <1 x float>* %tmp62
+  %tmp63 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 10
+  %insert = insertelement <1 x float> undef, float %tmp52, i64 0
+  store <1 x float> %insert, <1 x float>* %tmp63
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?logic
+define void @"\01?logic@@YA$$BY09_NY09_NY09V?$vector@M$00@@@Z"([10 x i32]* noalias sret %agg.result, [10 x i32]* %truth, [10 x <1 x float>]* %consequences) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load i32, i32* [[adr0]], align 4
+  ; CHECK: [[cmp0:%.*]] = icmp ne i32 [[ld0]], 0
+  ; CHECK: [[bres0:%.*]] = xor i1 [[cmp0]], true
+  ; CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32
+  %tmp = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = xor i1 %tmp2, true
+  %tmp4 = zext i1 %tmp3 to i32
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4
+  ; CHECK: [[cmp1:%.*]] = icmp ne i32 [[ld1]], 0
+  %tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  %tmp6 = load i32, i32* %tmp5, align 4
+  %tmp7 = icmp ne i32 %tmp6, 0
+  br i1 %tmp7, label %bb12, label %bb8
+
+bb8:                                              ; preds = %bb
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
+  ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
+  %tmp9 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  %tmp10 = load i32, i32* %tmp9, align 4
+  %tmp11 = icmp ne i32 %tmp10, 0
+  br label %bb12
+
+bb12:                                             ; preds = %bb8, %bb
+  ; CHECK: [[bres1:%.*]] = phi i1 [ true, %bb ], [ [[cmp2]], %bb8 ]
+  ; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
+  ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
+  %tmp13 = phi i1 [ true, %bb ], [ %tmp11, %bb8 ]
+  %tmp14 = zext i1 %tmp13 to i32
+  %tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp17 = icmp ne i32 %tmp16, 0
+  br i1 %tmp17, label %bb18, label %bb22
+
+bb18:                                             ; preds = %bb12
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
+  %tmp19 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  %tmp20 = load i32, i32* %tmp19, align 4
+  %tmp21 = icmp ne i32 %tmp20, 0
+  br label %bb22
+
+bb22:                                             ; preds = %bb18, %bb12
+
+  ; CHECK: [[bres2:%.*]] = phi i1 [ false, %bb12 ], [ [[cmp3]], %bb18 ]
+  ; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
+  %tmp23 = phi i1 [ false, %bb12 ], [ %tmp21, %bb18 ]
+  %tmp24 = zext i1 %tmp23 to i32
+  %tmp25 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  %tmp26 = load i32, i32* %tmp25, align 4
+  %tmp27 = icmp ne i32 %tmp26, 0
+  br i1 %tmp27, label %bb28, label %bb31
+
+bb28:                                             ; preds = %bb22
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  %tmp29 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  %tmp30 = load i32, i32* %tmp29, align 4
+  br label %bb34
+
+bb31:                                             ; preds = %bb22
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  %tmp32 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  %tmp33 = load i32, i32* %tmp32, align 4
+  br label %bb34
+
+bb34:                                             ; preds = %bb31, %bb28
+  ; CHECK: [[res3:%.*]] = phi i32 [ [[ld4]], %bb28 ], [ [[ld5]], %bb31 ]
+  ; CHECK: [[bres3:%.*]] = icmp ne i32 [[res3]], 0
+  ; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  %.sink = phi i32 [ %tmp30, %bb28 ], [ %tmp33, %bb31 ]
+  %tmp35 = icmp ne i32 %.sink, 0
+  %tmp36 = zext i1 %tmp35 to i32
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]]
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]]
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[bres4:%.*]] = fcmp fast oeq float [[val0]], [[val1]]
+  ; CHECK: [[res4:%.*]] = zext i1 [[bres4]] to i32
+  %tmp37 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0
+  %tmp38 = load <1 x float>, <1 x float>* %tmp37, align 4
+  %tmp39 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  %tmp40 = load <1 x float>, <1 x float>* %tmp39, align 4
+  %tmp41 = fcmp fast oeq <1 x float> %tmp38, %tmp40
+  %tmp42 = extractelement <1 x i1> %tmp41, i64 0
+  %tmp43 = zext i1 %tmp42 to i32
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]]
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[bres5:%.*]] = fcmp fast une float [[val1]], [[val2]]
+  ; CHECK: [[res5:%.*]] = zext i1 [[bres5]] to i32
+  %tmp44 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  %tmp45 = load <1 x float>, <1 x float>* %tmp44, align 4
+  %tmp46 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  %tmp47 = load <1 x float>, <1 x float>* %tmp46, align 4
+  %tmp48 = fcmp fast une <1 x float> %tmp45, %tmp47
+  %tmp49 = extractelement <1 x i1> %tmp48, i64 0
+  %tmp50 = zext i1 %tmp49 to i32
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]]
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[bres6:%.*]] = fcmp fast olt float [[val2]], [[val3]]
+  ; CHECK: [[res6:%.*]] = zext i1 [[bres6]] to i32
+  %tmp51 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  %tmp52 = load <1 x float>, <1 x float>* %tmp51, align 4
+  %tmp53 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  %tmp54 = load <1 x float>, <1 x float>* %tmp53, align 4
+  %tmp55 = fcmp fast olt <1 x float> %tmp52, %tmp54
+  %tmp56 = extractelement <1 x i1> %tmp55, i64 0
+  %tmp57 = zext i1 %tmp56 to i32
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]]
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]]
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[bres7:%.*]] = fcmp fast ogt float [[val3]], [[val4]]
+  ; CHECK: [[res7:%.*]] = zext i1 [[bres7]] to i32
+  %tmp58 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  %tmp59 = load <1 x float>, <1 x float>* %tmp58, align 4
+  %tmp60 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  %tmp61 = load <1 x float>, <1 x float>* %tmp60, align 4
+  %tmp62 = fcmp fast ogt <1 x float> %tmp59, %tmp61
+  %tmp63 = extractelement <1 x i1> %tmp62, i64 0
+  %tmp64 = zext i1 %tmp63 to i32
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]]
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[bres8:%.*]] = fcmp fast ole float [[val4]], [[val5]]
+  ; CHECK: [[res8:%.*]] = zext i1 [[bres8]] to i32
+  %tmp65 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  %tmp66 = load <1 x float>, <1 x float>* %tmp65, align 4
+  %tmp67 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  %tmp68 = load <1 x float>, <1 x float>* %tmp67, align 4
+  %tmp69 = fcmp fast ole <1 x float> %tmp66, %tmp68
+  %tmp70 = extractelement <1 x i1> %tmp69, i64 0
+  %tmp71 = zext i1 %tmp70 to i32
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]]
+  ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0
+  ; CHECK: [[bres9:%.*]] = fcmp fast oge float [[val5]], [[val6]]
+  ; CHECK: [[res9:%.*]] = zext i1 [[bres9]] to i32
+  %tmp72 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  %tmp73 = load <1 x float>, <1 x float>* %tmp72, align 4
+  %tmp74 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 6
+  %tmp75 = load <1 x float>, <1 x float>* %tmp74, align 4
+  %tmp76 = fcmp fast oge <1 x float> %tmp73, %tmp75
+  %tmp77 = extractelement <1 x i1> %tmp76, i64 0
+  %tmp78 = zext i1 %tmp77 to i32
+
+  %tmp79 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0
+  store i32 %tmp4, i32* %tmp79
+  %tmp80 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1
+  store i32 %tmp14, i32* %tmp80
+  %tmp81 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2
+  store i32 %tmp24, i32* %tmp81
+  %tmp82 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3
+  store i32 %tmp36, i32* %tmp82
+  %tmp83 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4
+  store i32 %tmp43, i32* %tmp83
+  %tmp84 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5
+  store i32 %tmp50, i32* %tmp84
+  %tmp85 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6
+  store i32 %tmp57, i32* %tmp85
+  %tmp86 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7
+  store i32 %tmp64, i32* %tmp86
+  %tmp87 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8
+  store i32 %tmp71, i32* %tmp87
+  %tmp88 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9
+  store i32 %tmp78, i32* %tmp88
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?index
+define void @"\01?index@@YA$$BY09V?$vector@M$00@@Y09V1@H@Z"([10 x <1 x float>]* noalias sret %agg.result, [10 x <1 x float>]* %things, i32 %i) #0 {
+bb:
+  ; CHECK: %res.0 = alloca [10 x float]
+  %res.0 = alloca [10 x float]
+
+  ; CHECK: [[adr0:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0
+  ; CHECK: store float 0.000000e+00, float* [[adr0]]
+  %tmp1 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0
+  store float 0.000000e+00, float* %tmp1
+
+  ; CHECK: [[adri:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 %i
+  ; CHECK: store float 1.000000e+00, float* [[adri]]
+  %tmp2 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 %i
+  store float 1.000000e+00, float* %tmp2
+
+  ; CHECK: [[adr2:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2
+  ; CHECK: store float 2.000000e+00, float* [[adr2]]
+  %tmp3 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2
+  store float 2.000000e+00, float* %tmp3
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]]
+  ; CHECK: [[adr3:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i64 0
+  ; CHECK: store float [[val0]], float* [[adr3]]
+  %tmp4 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0
+  %tmp5 = load <1 x float>, <1 x float>* %tmp4, align 4
+  %tmp6 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3
+  %tmp7 = extractelement <1 x float> %tmp5, i64 0
+  store float %tmp7, float* %tmp6
+
+  ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 %i
+  ; CHECK: [[ldi:%.*]] = load <1 x float>, <1 x float>* [[adri]]
+  ; CHECK: [[adr4:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4
+  ; CHECK: [[vali:%.*]] = extractelement <1 x float> [[ldi]], i64 0
+  ; CHECK: store float [[vali]], float* [[adr4]]
+  %tmp8 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 %i
+  %tmp9 = load <1 x float>, <1 x float>* %tmp8, align 4
+  %tmp10 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4
+  %tmp11 = extractelement <1 x float> %tmp9, i64 0
+  store float %tmp11, float* %tmp10
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[adr5:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i64 0
+  ; CHECK: store float [[val2]], float* [[adr5]]
+  %tmp12 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  %tmp13 = load <1 x float>, <1 x float>* %tmp12, align 4
+  %tmp14 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5
+  %tmp15 = extractelement <1 x float> %tmp13, i64 0
+  store float %tmp15, float* %tmp14
+
+  %tmp16 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 0
+  %tmp17 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0
+  %load17 = load float, float* %tmp17
+  %insert18 = insertelement <1 x float> undef, float %load17, i64 0
+  store <1 x float> %insert18, <1 x float>* %tmp16
+
+  %tmp18 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 1
+  %tmp19 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 1
+  %load15 = load float, float* %tmp19
+  %insert16 = insertelement <1 x float> undef, float %load15, i64 0
+  store <1 x float> %insert16, <1 x float>* %tmp18
+
+  %tmp20 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 2
+  %tmp21 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2
+  %load13 = load float, float* %tmp21
+  %insert14 = insertelement <1 x float> undef, float %load13, i64 0
+  store <1 x float> %insert14, <1 x float>* %tmp20
+
+  %tmp22 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 3
+  %tmp23 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3
+  %load11 = load float, float* %tmp23
+  %insert12 = insertelement <1 x float> undef, float %load11, i64 0
+  store <1 x float> %insert12, <1 x float>* %tmp22
+
+  %tmp24 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 4
+  %tmp25 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4
+  %load9 = load float, float* %tmp25
+  %insert10 = insertelement <1 x float> undef, float %load9, i64 0
+  store <1 x float> %insert10, <1 x float>* %tmp24
+
+  %tmp26 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 5
+  %tmp27 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5
+  %load7 = load float, float* %tmp27
+  %insert8 = insertelement <1 x float> undef, float %load7, i64 0
+  store <1 x float> %insert8, <1 x float>* %tmp26
+
+  %tmp28 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 6
+  %tmp29 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 6
+  %load5 = load float, float* %tmp29
+  %insert6 = insertelement <1 x float> undef, float %load5, i64 0
+  store <1 x float> %insert6, <1 x float>* %tmp28
+
+  %tmp30 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 7
+  %tmp31 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 7
+  %load3 = load float, float* %tmp31
+  %insert4 = insertelement <1 x float> undef, float %load3, i64 0
+  store <1 x float> %insert4, <1 x float>* %tmp30
+
+  %tmp32 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 8
+  %tmp33 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 8
+  %load1 = load float, float* %tmp33
+  %insert2 = insertelement <1 x float> undef, float %load1, i64 0
+  store <1 x float> %insert2, <1 x float>* %tmp32
+
+  %tmp34 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 9
+  %tmp35 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 9
+  %load = load float, float* %tmp35
+  %insert = insertelement <1 x float> undef, float %load, i64 0
+  store <1 x float> %insert, <1 x float>* %tmp34
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?bittwiddlers
+define void @"\01?bittwiddlers@@YAXY0L@$$CAI@Z"([11 x i32]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4
+  ; CHECK: [[res0:%.*]] = xor i32 [[ld1]], -1
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0
+  ; CHECK: store i32 [[res0]], i32* [[adr0]], align 4
+  %tmp = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = xor i32 %tmp1, -1
+  %tmp3 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0
+  store i32 %tmp2, i32* %tmp3, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[res1:%.*]] = or i32 [[ld2]], [[ld3]]
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  ; CHECK: store i32 [[res1]], i32* [[adr1]], align 4
+  %tmp4 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  %tmp5 = load i32, i32* %tmp4, align 4
+  %tmp6 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  %tmp7 = load i32, i32* %tmp6, align 4
+  %tmp8 = or i32 %tmp5, %tmp7
+  %tmp9 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  store i32 %tmp8, i32* %tmp9, align 4
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  ; CHECK: [[res2:%.*]] = and i32 [[ld3]], [[ld4]]
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  ; CHECK: store i32 [[res2]], i32* [[adr2]], align 4
+  %tmp10 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  %tmp11 = load i32, i32* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  %tmp13 = load i32, i32* %tmp12, align 4
+  %tmp14 = and i32 %tmp11, %tmp13
+  %tmp15 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  store i32 %tmp14, i32* %tmp15, align 4
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  ; CHECK: [[res3:%.*]] = xor i32 [[ld4]], [[ld5]]
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  ; CHECK: store i32 [[res3]], i32* [[adr3]], align 4
+  %tmp16 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  %tmp17 = load i32, i32* %tmp16, align 4
+  %tmp18 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  %tmp19 = load i32, i32* %tmp18, align 4
+  %tmp20 = xor i32 %tmp17, %tmp19
+  %tmp21 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  store i32 %tmp20, i32* %tmp21, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4
+  ; CHECK: [[and4:%.*]] = and i32 [[ld6]], 31
+  ; CHECK: [[res4:%.*]] = shl i32 [[ld5]], [[and4]]
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  ; CHECK: store i32 [[res4]], i32* [[adr4]], align 4
+  %tmp22 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  %tmp23 = load i32, i32* %tmp22, align 4
+  %tmp24 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  %tmp25 = load i32, i32* %tmp24, align 4
+  %tmp26 = and i32 %tmp25, 31
+  %tmp27 = shl i32 %tmp23, %tmp26
+  %tmp28 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  store i32 %tmp27, i32* %tmp28, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]], align 4
+  ; CHECK: [[and5:%.*]] = and i32 [[ld7]], 31
+  ; CHECK: [[res5:%.*]] = lshr i32 [[ld6]], [[and5]]
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  ; CHECK: store i32 [[res5]], i32* [[adr5]], align 4
+  %tmp29 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  %tmp30 = load i32, i32* %tmp29, align 4
+  %tmp31 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  %tmp32 = load i32, i32* %tmp31, align 4
+  %tmp33 = and i32 %tmp32, 31
+  %tmp34 = lshr i32 %tmp30, %tmp33
+  %tmp35 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  store i32 %tmp34, i32* %tmp35, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4
+  ; CHECK: [[res6:%.*]] = or i32 [[ld6]], [[ld8]]
+  ; CHECK: store i32 [[res6]], i32* [[adr6]], align 4
+  %tmp36 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  %tmp37 = load i32, i32* %tmp36, align 4
+  %tmp38 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  %tmp39 = load i32, i32* %tmp38, align 4
+  %tmp40 = or i32 %tmp39, %tmp37
+  store i32 %tmp40, i32* %tmp38, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load i32, i32* [[adr9]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]], align 4
+  ; CHECK: [[res7:%.*]] = and i32 [[ld7]], [[ld9]]
+  ; CHECK: store i32 [[res7]], i32* [[adr7]], align 4
+  %tmp41 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9
+  %tmp42 = load i32, i32* %tmp41, align 4
+  %tmp43 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  %tmp44 = load i32, i32* %tmp43, align 4
+  %tmp45 = and i32 %tmp44, %tmp42
+  store i32 %tmp45, i32* %tmp43, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load i32, i32* [[adr10]], align 4
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]], align 4
+  ; CHECK: [[res8:%.*]] = xor i32 [[ld8]], [[ld10]]
+  ; CHECK: store i32 [[res8]], i32* [[adr8]], align 4
+  %tmp46 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10
+  %tmp47 = load i32, i32* %tmp46, align 4
+  %tmp48 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  %tmp49 = load i32, i32* %tmp48, align 4
+  %tmp50 = xor i32 %tmp49, %tmp47
+  store i32 %tmp50, i32* %tmp48, align 4
+
+  ret void
+}
+
+declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #2
+declare %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 1> >"(i32, %"class.RWStructuredBuffer<vector<float, 1> >") #2
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.resources = !{!5}
+!dx.typeAnnotations = !{!9, !15}
+!dx.entryPoints = !{!35}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"}
+!2 = !{!"dxc(private) 1.8.0.4807 (longvec_bab_ldst, 88cfe61c3-dirty)"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"lib", i32 6, i32 9}
+!5 = !{null, !6, null, null}
+!6 = !{!7}
+!7 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 1> >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A", !"buf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !8}
+!8 = !{i32 1, i32 4}
+!9 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 1> >" undef, !10}
+!10 = !{i32 4, !11, !12}
+!11 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9, i32 13, i32 1}
+!12 = !{i32 0, !13}
+!13 = !{!14}
+!14 = !{i32 0, <1 x float> undef}
+!15 = !{i32 1, void ([10 x <1 x float>]*)* @"\01?assignments@@YAXY09$$CAV?$vector@M$00@@@Z", !16, void ([11 x <1 x float>]*, [11 x <1 x float>]*)* @"\01?arithmetic@@YA$$BY0L@V?$vector@M$00@@Y0L@$$CAV1@@Z", !21, void ([10 x i32]*, [10 x i32]*, [10 x <1 x float>]*)* @"\01?logic@@YA$$BY09_NY09_NY09V?$vector@M$00@@@Z", !24, void ([10 x <1 x float>]*, [10 x <1 x float>]*, i32)* @"\01?index@@YA$$BY09V?$vector@M$00@@Y09V1@H@Z", !29, void ([11 x i32]*)* @"\01?bittwiddlers@@YAXY0L@$$CAI@Z", !32}
+!16 = !{!17, !19}
+!17 = !{i32 1, !18, !18}
+!18 = !{}
+!19 = !{i32 2, !20, !18}
+!20 = !{i32 7, i32 9, i32 13, i32 1}
+!21 = !{!22, !23, !19}
+!22 = !{i32 0, !18, !18}
+!23 = !{i32 1, !20, !18}
+!24 = !{!22, !25, !27, !28}
+!25 = !{i32 1, !26, !18}
+!26 = !{i32 7, i32 1}
+!27 = !{i32 0, !26, !18}
+!28 = !{i32 0, !20, !18}
+!29 = !{!22, !23, !28, !30}
+!30 = !{i32 0, !31, !18}
+!31 = !{i32 7, i32 4}
+!32 = !{!17, !33}
+!33 = !{i32 2, !34, !18}
+!34 = !{i32 7, i32 5}
+!35 = !{null, !"", null, !5, null}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl
new file mode 100644
index 0000000000..66382af2d5
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl
@@ -0,0 +1,425 @@
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float1 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int1      %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=double1   -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float16_t1 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int16_t1  -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Mainly a source for the vec1 scalarizer IR test.
+// Serves to verify some codegen as well.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Need to capture once for the full vector type, again for the element type.
+// CHECK-DAG: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:<[0-9]* x [a-z0-9_]*>]] }
+// CHECK-DAG: %"class.RWStructuredBuffer<{{.*}}>" = type { <{{[0-9]*}} x [[ELTY:[a-z0-9_]*]]> }
+RWStructuredBuffer<TYPE> buf;
+
+export void assignments(inout TYPE things[10], TYPE scales[10]);
+export TYPE arithmetic(inout TYPE things[11])[11];
+export bool logic(bool truth[10], TYPE consequences[10])[10];
+export TYPE index(TYPE things[10], int i, TYPE val)[10];
+
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout TYPE things[10]) {
+
+  // CHECK: [[res0:%.*]] =  call [[TYPE]] @"dx.hl.op.ro.[[TYPE]] (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle {{%.*}}, i32 1)
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  things[0] = buf.Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]] [[TYPE]] [[vec1]], [[vec5]]
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]] [[TYPE]] [[vec2]], [[vec6]]
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]] [[TYPE]] [[vec3]], [[vec7]]
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]] [[TYPE]] [[vec4]], [[vec8]]
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] /= things[8];
+
+#ifndef DBL
+  // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9
+  // NODBL: [[vec9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[vec5]], [[vec9]]
+  // NODBL: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  things[5] %= things[9];
+#endif
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export TYPE arithmetic(inout TYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[res1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[res0:%.*]] = [[SUB]] [[TYPE]]
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[ADD]] [[TYPE]] [[vec1]], [[vec2]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 2
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[SUB]] [[TYPE]] [[vec2]], [[vec3]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 3
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[MUL]] [[TYPE]] [[vec3]], [[vec4]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 4
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[res5:%.*]] = [[DIV]] [[TYPE]] [[vec4]], [[vec5]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 5
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  res[5] = things[4] / things[5];
+
+#ifndef DBL
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // NODBL: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[vec5]], [[vec6]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 6
+  // NODBL: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[res7:%.*]] = [[ADD]] [[TYPE]] [[vec7]], <[[ELTY]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]]>
+  // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 7
+  // CHECK: store [[TYPE]] [[vec7]], [[TYPE]]* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[res8:%.*]] = [[ADD]] [[TYPE]] [[vec8]]
+  // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 8
+  // CHECK: store [[TYPE]] [[vec8]], [[TYPE]]* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // CHECK: [[res9:%.*]] = [[ADD]] [[TYPE]] [[vec9]]
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 9
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // CHECK: [[res10:%.*]] = [[ADD]] [[TYPE]] [[vec10]]
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 10
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  res[10] = --things[10];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [11 x [[TYPE]]]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [11 x [[TYPE]]]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export bool logic(bool truth[10], TYPE consequences[10])[10] {
+  bool res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load i32, i32* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne i32 [[vec0]], 0
+  // CHECK: [[bres0:%.*]] = xor i1 [[bvec0]], true
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32
+  // CHECK: store i32 [[res0]], i32* [[adr0]]
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne i32 [[vec1]], 0
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne i32 [[vec2]], 0
+  // CHECK: [[bres1:%.*]] = or i1 [[bvec1]], [[bvec2]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 1
+  // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne i32 [[vec2]], 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne i32 [[vec3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bvec2]], [[bvec3]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 2
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne i32 [[vec3]], 0
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[bvec4:%.*]] = icmp ne i32 [[vec4]], 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[bvec5:%.*]] = icmp ne i32 [[vec5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bvec3]], i1 [[bvec4]], i1 [[bvec5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[vec0]], [[vec1]]
+  // CHECK: [[bres4:%.*]] = extractelement <1 x i1> [[cmp4]], i64 0
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = zext i1 [[bres4]] to i32
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[vec1]], [[vec2]]
+  // CHECK: [[bres5:%.*]] = extractelement <1 x i1> [[cmp5]], i64 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 5
+  // CHECK: [[res5:%.*]] = zext i1 [[bres5]] to i32
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[vec2]], [[vec3]]
+  // CHECK: [[bres6:%.*]] = extractelement <1 x i1> [[cmp6]], i64 0
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 6
+  // CHECK: [[res6:%.*]] = zext i1 [[bres6]] to i32
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[vec3]], [[vec4]]
+  // CHECK: [[bres7:%.*]] = extractelement <1 x i1> [[cmp7]], i64 0
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 7
+  // CHECK: [[res7:%.*]] = zext i1 [[bres7]] to i32
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[vec4]], [[vec5]]
+  // CHECK: [[bres8:%.*]] = extractelement <1 x i1> [[cmp8]], i64 0
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 8
+  // CHECK: [[res8:%.*]] = zext i1 [[bres8]] to i32
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[vec5]], [[vec6]]
+  // CHECK: [[bres9:%.*]] = extractelement <1 x i1> [[cmp9]], i64 0
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 9
+  // CHECK: [[res9:%.*]] = zext i1 [[bres9]] to i32
+  // CHECK: store i32 [[res9]], i32* [[adr9]]
+  res[9] = consequences[5] >= consequences[6];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [10 x i32]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [10 x i32]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export TYPE index(TYPE things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x [[TYPE]]]
+  // CHECK: store i32 %i, i32* [[iadd:%.[0-9]*]]
+  TYPE res[10];
+
+  // CHECK: [[res0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 0
+  // CHECK: store [[TYPE]] zeroinitializer, [[TYPE]]* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 [[i]]
+  // CHECK: store [[TYPE]] <[[ELTY]] {{(1|1\.0*e\+0*|0xH3C00).*}}>, [[TYPE]]* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 2
+  // CHECK: store [[TYPE]] <[[ELTY]] {{(2|2\.0*e\+0*|0xH4000).*}}>, [[TYPE]]* [[res2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[res3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 3
+  // CHECK: store [[TYPE]] [[thg0]], [[TYPE]]* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 [[i]]
+  // CHECK: [[thgi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]]
+  // CHECK: [[res4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 4
+  // CHECK: store [[TYPE]] [[thgi]], [[TYPE]]* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 5
+  // CHECK: store [[TYPE]] [[thg2]], [[TYPE]]* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout uint things[11]) {
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[res1:%.*]] = xor i32 [[ld1]], -1
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0
+  // CHECK: store i32 [[res1]], i32* [[adr0]]
+  things[0] = ~things[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[res1:%.*]] = or i32 [[ld2]], [[ld3]]
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[res2:%.*]] = and i32 [[ld3]], [[ld4]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[res3:%.*]] = xor i32 [[ld4]], [[ld5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]]
+  // CHECK: [[shv6:%.*]] = and i32 [[ld6]], 31
+  // CHECK: [[res4:%.*]] = shl i32 [[ld5]], [[shv6]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]]
+  // CHECK: [[shv7:%.*]] = and i32 [[ld7]], 31
+  // CHECK: [[res5:%.*]] = lshr i32 [[ld6]], [[shv7]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]]
+  // CHECK: [[res6:%.*]] = or i32 [[ld6]], [[ld8]]
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  things[6] |= things[8];
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9
+  // CHECK: [[ld9:%.*]] = load i32, i32* [[adr9]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]]
+  // CHECK: [[res7:%.*]] = and i32 [[ld7]], [[ld9]]
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  things[7] &= things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10
+  // CHECK: [[ld10:%.*]] = load i32, i32* [[adr10]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]]
+  // CHECK: [[res8:%.*]] = xor i32 [[ld8]], [[ld10]]
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  things[8] ^= things[10];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
index 35fd0d6b1d..a7b7a90012 100644
--- a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
+++ b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
@@ -31,3 +31,25 @@ declare float @"\01?foo@@YAMY02V?$vector@M$02@@@Z"([3 x <3 x float>]*)
 
 attributes #0 = { nounwind }
 
+!dx.version = !{!3}
+!dx.valver = !{!4}
+!dx.shaderModel = !{!5}
+!dx.typeAnnotations = !{!6}
+!dx.entryPoints = !{!12}
+!dx.fnprops = !{}
+!dx.options = !{!13, !14}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!3 = !{i32 1, i32 6}
+!4 = !{i32 1, i32 6}
+!5 = !{!"lib", i32 6, i32 6}
+!6 = !{i32 1, float ([3 x <3 x float>]*)* @"\01?bar@@YAMY02V?$vector@M$02@@@Z", !7, float ([3 x <3 x float>]*)* @"\01?foo@@YAMY02V?$vector@M$02@@@Z", !7}
+!7 = !{!8, !11}
+!8 = !{i32 1, !9, !10}
+!9 = !{i32 7, i32 9}
+!10 = !{}
+!11 = !{i32 0, !9, !10}
+!12 = !{null, !"", null, null, null}
+!13 = !{i32 64}
+!14 = !{i32 -1}

From f1de6172a5cf13c3bc1235d4e7e68d5c4136b8bb Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Thu, 20 Mar 2025 14:57:06 -0600
Subject: [PATCH 04/17] clang-format

---
 lib/HLSL/HLMatrixBitcastLowerPass.cpp         |  4 +-
 .../Scalar/ScalarReplAggregatesHLSL.cpp       | 44 +++++++++----------
 lib/Transforms/Scalar/Scalarizer.cpp          |  3 +-
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
index 99784e5079..adf35c1538 100644
--- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp
+++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
@@ -77,6 +77,7 @@ Type *TryLowerMatTy(Type *Ty) {
 
 class MatrixBitcastLowerPass : public FunctionPass {
   bool SupportsVectors = false;
+
 public:
   static char ID; // Pass identification, replacement for typeid
   explicit MatrixBitcastLowerPass() : FunctionPass(ID) {}
@@ -84,7 +85,8 @@ class MatrixBitcastLowerPass : public FunctionPass {
   StringRef getPassName() const override { return "Matrix Bitcast lower"; }
   bool runOnFunction(Function &F) override {
     if (F.getParent()->HasDxilModule())
-      SupportsVectors = F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus();
+      SupportsVectors =
+          F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus();
     bool bUpdated = false;
     std::unordered_set<BitCastInst *> matCastSet;
     for (auto blkIt = F.begin(); blkIt != F.end(); ++blkIt) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
index 7ec297fb32..2c852e6c2f 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
@@ -81,17 +81,18 @@ class SROA_Helper {
   static bool DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
                                   Type *&BrokenUpTy, uint64_t &NumInstances,
                                   IRBuilder<> &Builder, bool bFlatVector,
-                                  bool SupportsVectors,
-                                  bool hasPrecise, DxilTypeSystem &typeSys,
-                                  const DataLayout &DL,
+                                  bool SupportsVectors, bool hasPrecise,
+                                  DxilTypeSystem &typeSys, const DataLayout &DL,
                                   SmallVector<Value *, 32> &DeadInsts,
                                   DominatorTree *DT);
 
-  static bool
-  DoScalarReplacement(GlobalVariable *GV, std::vector<Value *> &Elts,
-                      IRBuilder<> &Builder, bool bFlatVector, bool SupportsVectors, bool hasPrecise,
-                      DxilTypeSystem &typeSys, const DataLayout &DL,
-                      SmallVector<Value *, 32> &DeadInsts, DominatorTree *DT);
+  static bool DoScalarReplacement(GlobalVariable *GV,
+                                  std::vector<Value *> &Elts,
+                                  IRBuilder<> &Builder, bool bFlatVector,
+                                  bool SupportsVectors, bool hasPrecise,
+                                  DxilTypeSystem &typeSys, const DataLayout &DL,
+                                  SmallVector<Value *, 32> &DeadInsts,
+                                  DominatorTree *DT);
   static unsigned GetEltAlign(unsigned ValueAlign, const DataLayout &DL,
                               Type *EltTy, unsigned Offset);
   // Lower memcpy related to V.
@@ -1880,7 +1881,8 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         uint64_t NumInstances = 1;
         bool SROAed = SROA_Helper::DoScalarReplacement(
             AI, Elts, BrokenUpTy, NumInstances, Builder,
-            /*bFlatVector*/ true, SupportsVectors, hasPrecise, typeSys, DL, DeadInsts, &DT);
+            /*bFlatVector*/ true, SupportsVectors, hasPrecise, typeSys, DL,
+            DeadInsts, &DT);
 
         if (SROAed) {
           Type *Ty = AI->getAllocatedType();
@@ -1948,8 +1950,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
       }
 
       // Flatten Global vector if no dynamic vector indexing.
-      bool bFlatVector =
-        !hasDynamicVectorIndexing(GV);
+      bool bFlatVector = !hasDynamicVectorIndexing(GV);
 
       if (bFlatVector) {
         GVDbgOffset &dbgOffset = GVDbgOffsetMap[GV];
@@ -2923,8 +2924,8 @@ static ArrayType *CreateNestArrayTy(Type *FinalEltTy,
 bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
                                       Type *&BrokenUpTy, uint64_t &NumInstances,
                                       IRBuilder<> &Builder, bool bFlatVector,
-                                      bool SupportsVectors,
-                                      bool hasPrecise, DxilTypeSystem &typeSys,
+                                      bool SupportsVectors, bool hasPrecise,
+                                      DxilTypeSystem &typeSys,
                                       const DataLayout &DL,
                                       SmallVector<Value *, 32> &DeadInsts,
                                       DominatorTree *DT) {
@@ -3122,14 +3123,11 @@ unsigned SROA_Helper::GetEltAlign(unsigned ValueAlign, const DataLayout &DL,
 
 /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new
 /// AllocaInsts into Elts. Then do SROA on V.
-bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
-                                      std::vector<Value *> &Elts,
-                                      IRBuilder<> &Builder, bool bFlatVector,
-                                      bool SupportsVectors,
-                                      bool hasPrecise, DxilTypeSystem &typeSys,
-                                      const DataLayout &DL,
-                                      SmallVector<Value *, 32> &DeadInsts,
-                                      DominatorTree *DT) {
+bool SROA_Helper::DoScalarReplacement(
+    GlobalVariable *GV, std::vector<Value *> &Elts, IRBuilder<> &Builder,
+    bool bFlatVector, bool SupportsVectors, bool hasPrecise,
+    DxilTypeSystem &typeSys, const DataLayout &DL,
+    SmallVector<Value *, 32> &DeadInsts, DominatorTree *DT) {
   DEBUG(dbgs() << "Found inst to SROA: " << *GV << '\n');
   Type *Ty = GV->getType();
   // Skip none pointer types.
@@ -5369,8 +5367,8 @@ void SROA_Parameter_HLSL::flattenArgument(
       // DomTree isn't used by arguments
       SROAed = SROA_Helper::DoScalarReplacement(
           V, Elts, BrokenUpTy, NumInstances, Builder,
-          /*bFlatVector*/ false, SupportsVectors, annotation.IsPrecise(), dxilTypeSys, DL,
-          DeadInsts, /*DT*/ nullptr);
+          /*bFlatVector*/ false, SupportsVectors, annotation.IsPrecise(),
+          dxilTypeSys, DL, DeadInsts, /*DT*/ nullptr);
     }
 
     if (SROAed) {
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index d3c6d0e7e2..d936b17be9 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -568,12 +568,11 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
   if (!DstVT || !SrcVT)
     return false;
 
-
   unsigned DstNumElems = DstVT->getNumElements();
   unsigned SrcNumElems = SrcVT->getNumElements();
 
   // HLSL Change Begin - allow > 1 vectors where supported.
-  if (SupportsVectors &&  (DstNumElems > 1 || SrcNumElems > 1))
+  if (SupportsVectors && (DstNumElems > 1 || SrcNumElems > 1))
     return false;
   // HLSL Change End - allow > 1 vectors where supported.
 

From 47d42f014bed2172a51bd8122c535b3a762bf159 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Thu, 20 Mar 2025 15:40:39 -0600
Subject: [PATCH 05/17] simplify bitcast module retrieval

---
 lib/HLSL/HLMatrixBitcastLowerPass.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
index adf35c1538..3d71ba2ba2 100644
--- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp
+++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
@@ -84,9 +84,9 @@ class MatrixBitcastLowerPass : public FunctionPass {
 
   StringRef getPassName() const override { return "Matrix Bitcast lower"; }
   bool runOnFunction(Function &F) override {
-    if (F.getParent()->HasDxilModule())
-      SupportsVectors =
-          F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus();
+    DxilModule &DM = F.getParent()->GetOrCreateDxilModule();
+    SupportsVectors = DM.GetShaderModel()->IsSM69Plus();
+
     bool bUpdated = false;
     std::unordered_set<BitCastInst *> matCastSet;
     for (auto blkIt = F.begin(); blkIt != F.end(); ++blkIt) {
@@ -104,7 +104,6 @@ class MatrixBitcastLowerPass : public FunctionPass {
       }
     }
 
-    DxilModule &DM = F.getParent()->GetOrCreateDxilModule();
     // Remove bitcast which has CallInst user.
     if (DM.GetShaderModel()->IsLib()) {
       for (auto it = matCastSet.begin(); it != matCastSet.end();) {

From d480caa32e86411e967ef0d567f4673c799b5074 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Thu, 20 Mar 2025 21:46:11 -0600
Subject: [PATCH 06/17] Add long vec scalarizer test

Left it out of the original commit

Made a couple additions to the main test too
---
 .../hlsl/types/longvec-operators-vec1s.hlsl   |  43 +-
 .../hlsl/types/longvec-operators.hlsl         |  91 ++-
 .../passes/longvec-operators-scalarizer.ll    | 703 ++++++++++++++++++
 .../CodeGenDXIL/passes/longvec-operators.hlsl | 420 +++++++++++
 4 files changed, 1241 insertions(+), 16 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl

diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
index 377c797b93..c366261406 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
@@ -375,20 +375,20 @@ export TYPE index(TYPE things[10], int i)[10] {
 #ifdef INT
 // Test bit twiddling operators.
 // INT-LABEL: define void @"\01?bittwiddlers
-export void bittwiddlers(inout TYPE things[11]) {
-  // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+export void bittwiddlers(inout TYPE things[13]) {
+  // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 1
   // INT: [[ld1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
   // INT: [[val1:%[0-9]*]] = extractelement [[TYPE]] [[ld1]], i32 0
   // INT: [[xor1:%[0-9]*]] = xor [[ELTY]] [[val1]], -1
   // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor1]], i32 0
-  // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 0
   // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr0]]
   things[0] = ~things[1];
 
-  // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 2
   // INT: [[ld2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
   // INT: [[val2:%[0-9]*]] = extractelement [[TYPE]] [[ld2]], i32 0
-  // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 3
   // INT: [[ld3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
   // INT: [[val3:%[0-9]*]] = extractelement [[TYPE]] [[ld3]], i32 0
   // INT: [[or1:%[0-9]*]] = or [[ELTY]] [[val3]], [[val2]]
@@ -396,7 +396,7 @@ export void bittwiddlers(inout TYPE things[11]) {
   // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
   things[1] = things[2] | things[3];
 
-  // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 4
   // INT: [[ld4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
   // INT: [[val4:%[0-9]*]] = extractelement [[TYPE]] [[ld4]], i32 0
   // INT: [[and2:%[0-9]*]] = and [[ELTY]] [[val4]], [[val3]]
@@ -404,7 +404,7 @@ export void bittwiddlers(inout TYPE things[11]) {
   // INT: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
   things[2] = things[3] & things[4];
 
-  // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 5
   // INT: [[ld5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
   // INT: [[val5:%[0-9]*]] = extractelement [[TYPE]] [[ld5]], i32 0
   // INT: [[xor3:%[0-9]*]] = xor [[ELTY]] [[val5]], [[val4]]
@@ -412,7 +412,7 @@ export void bittwiddlers(inout TYPE things[11]) {
   // INT: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
   things[3] = things[4] ^ things[5];
 
-  // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 6
   // INT: [[ld6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
   // INT: [[val6:%[0-9]*]] = extractelement [[TYPE]] [[ld6]], i32 0
   // INT: [[shv6:%[0-9]*]] = and [[ELTY]] [[val6]]
@@ -421,7 +421,7 @@ export void bittwiddlers(inout TYPE things[11]) {
   // INT: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
   things[4] = things[5] << things[6];
 
-  // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 7
   // INT: [[ld7:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
   // INT: [[val7:%[0-9]*]] = extractelement [[TYPE]] [[ld7]], i32 0
   // INT: [[shv7:%[0-9]*]] = and [[ELTY]] [[val7]]
@@ -431,7 +431,7 @@ export void bittwiddlers(inout TYPE things[11]) {
   // INT: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
   things[5] = things[6] >> things[7];
 
-  // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 8
   // INT: [[ld8:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
   // INT: [[val8:%[0-9]*]] = extractelement [[TYPE]] [[ld8]], i32 0
   // INT: [[or6:%[0-9]*]] = or [[ELTY]] [[val8]], [[val6]]
@@ -439,7 +439,7 @@ export void bittwiddlers(inout TYPE things[11]) {
   // INT: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
   things[6] |= things[8];
 
-  // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 9
   // INT: [[ld9:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
   // INT: [[val9:%[0-9]*]] = extractelement [[TYPE]] [[ld9]], i32 0
   // INT: [[and7:%[0-9]*]] = and [[ELTY]] [[val9]], [[val7]]
@@ -447,7 +447,7 @@ export void bittwiddlers(inout TYPE things[11]) {
   // INT: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
   things[7] &= things[9];
 
-  // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 10
   // INT: [[ld10:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
   // INT: [[val10:%[0-9]*]] = extractelement [[TYPE]] [[ld10]], i32 0
   // INT: [[xor8:%[0-9]*]] = xor [[ELTY]] [[val10]], [[val8]]
@@ -455,6 +455,25 @@ export void bittwiddlers(inout TYPE things[11]) {
   // INT: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
   things[8] ^= things[10];
 
+  // INT: [[adr11:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 11
+  // INT: [[ld11:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr11]]
+  // INT: [[val11:%[0-9]*]] = extractelement [[TYPE]] [[ld11]], i32 0
+  // INT: [[shv11:%[0-9]*]] = and [[ELTY]] [[val11]]
+  // INT: [[shl9:%[0-9]*]] = shl [[ELTY]] [[val9]], [[shv11]]
+  // INT: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shl9]], i32 0
+  // INT: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  things[9] <<= things[11];
+
+  // INT: [[adr12:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 12
+  // INT: [[ld12:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr12]]
+  // INT: [[val12:%[0-9]*]] = extractelement [[TYPE]] [[ld12]], i32 0
+  // INT: [[shv12:%[0-9]*]] = and [[ELTY]] [[val12]]
+  // UNSIG: [[shr10:%[0-9]*]] = lshr [[ELTY]] [[val10]], [[shv12]]
+  // SIG: [[shr10:%[0-9]*]] = ashr [[ELTY]] [[val10]], [[shv12]]
+  // INT: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shr10]], i32 0
+  // INT: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  things[10] >>= things[12];
+
   // INT: ret void
 }
 #endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
index 789be0091e..ed7a2bff25 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
@@ -18,12 +18,12 @@
 // RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=128 %s | FileCheck %s --check-prefixes=CHECK,NODBL
 
 // Less exhaustive testing for some other types.
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int      -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint     -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int      -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint     -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
 // RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double   -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DNUM=9 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DNUM=9 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
 // RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t   -DNUM=177 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t   -DNUM=177 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
 
 // Test relevant operators on an assortment vector sizes and types with 6.9 native vectors.
 
@@ -496,3 +496,86 @@ export vector<TYPE, NUM> index(vector<TYPE, NUM> things[10], int i, TYPE val)[10
   // CHECK: ret void
   return res;
 }
+
+#ifdef INT
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout vector<TYPE, NUM> things[13]) {
+  // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // INT: [[ld1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // INT: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld1]], <[[TYPE]] -1
+  // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // INT: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr0]]
+  things[0] = ~things[1];
+
+  // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // INT: [[ld2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // INT: [[ld3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // INT: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[ld3]], [[ld2]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // INT: [[ld4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // INT: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld4]], [[ld3]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // INT: [[ld5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // INT: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld4]], [[ld5]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // INT: [[ld6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // INT: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld6]]
+  // INT: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[ld5]], [[shv6]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // INT: [[ld7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // INT: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld7]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[ld6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[ld6]], [[shv7]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // INT: [[ld8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // INT: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[ld8]], [[ld6]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]]
+  things[6] |= things[8];
+
+  // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // INT: [[ld9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // INT: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld9]], [[ld7]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  things[7] &= things[9];
+
+  // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // INT: [[ld10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // INT: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld8]], [[ld10]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  things[8] ^= things[10];
+
+  // INT: [[adr11:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 11
+  // INT: [[ld11:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr11]]
+  // INT: [[shv11:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld11]]
+  // INT: [[res9:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[ld9]], [[shv11]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]]
+  things[9] <<= things[11];
+
+  // INT: [[adr12:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 12
+  // INT: [[ld12:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr12]]
+  // INT: [[shv12:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld12]]
+  // UNSIG: [[res10:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[ld10]], [[shv12]]
+  // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[ld10]], [[shv12]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]]
+  things[10] >>= things[12];
+
+  // INT: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
new file mode 100644
index 0000000000..77a5c0681c
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
@@ -0,0 +1,703 @@
+; RUN: %dxopt %s -hlsl-passes-resume -scalarizer -S | FileCheck %s
+
+; Vectors of length greather than 1 should get no changes from scalarizer,
+; so this unusual test, verifies that the pass makes no changes at all.
+; Still justified because prior to 6.9, many changes would result.
+; Compiled mostly for float7 vectors with int7 for the integer specific parts.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWStructuredBuffer<float>" = type { float }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
+
+@"\01?buf@@3PAV?$RWStructuredBuffer@M@@A" = external global [7 x %"class.RWStructuredBuffer<float>"], align 4
+@llvm.used = appending global [1 x i8*] [i8* bitcast ([7 x %"class.RWStructuredBuffer<float>"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A" to i8*)], section "llvm.metadata"
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?assignments
+define void @"\01?assignments@@YAXY09$$CAV?$vector@M$06@@@Z"([10 x <7 x float>]* noalias %things) #0 {
+bb:
+  %tmp = load %"class.RWStructuredBuffer<float>", %"class.RWStructuredBuffer<float>"* getelementptr inbounds ([7 x %"class.RWStructuredBuffer<float>"], [7 x %"class.RWStructuredBuffer<float>"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A", i32 0, i32 0)
+  %tmp1 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<float>"(i32 160, %"class.RWStructuredBuffer<float>" %tmp)
+  %tmp2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+
+  ; CHECK: [[buf:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 1, i32 0, i8 1, i32 4)
+  ; CHECK: [[val:%.*]] = extractvalue %dx.types.ResRet.f32 [[buf]], 0
+  ; CHECK: [[vec:%.*]] = insertelement <7 x float> undef, float [[val]], i32 0
+  ; CHECK: [[res0:%.*]] = shufflevector <7 x float> [[vec]], <7 x float> undef, <7 x i32> zeroinitializer
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: store <7 x float> [[res0]], <7 x float>* [[adr0]], align 4
+  %RawBufferLoad = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp2, i32 1, i32 0, i8 1, i32 4)
+  %tmp3 = extractvalue %dx.types.ResRet.f32 %RawBufferLoad, 0
+  %tmp4 = insertelement <7 x float> undef, float %tmp3, i32 0
+  %tmp5 = shufflevector <7 x float> %tmp4, <7 x float> undef, <7 x i32> zeroinitializer
+  %tmp6 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  store <7 x float> %tmp5, <7 x float>* %tmp6, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[res1:%.*]] = fadd fast <7 x float> [[ld1]], [[ld5]]
+  ; CHECK: store <7 x float> [[res1]], <7 x float>* [[adr1]], align 4
+  %tmp7 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4
+  %tmp9 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 1
+  %tmp10 = load <7 x float>, <7 x float>* %tmp9, align 4
+  %tmp11 = fadd fast <7 x float> %tmp10, %tmp8
+  store <7 x float> %tmp11, <7 x float>* %tmp9, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[res2:%.*]] = fsub fast <7 x float> [[ld2]], [[ld6]]
+  ; CHECK: store <7 x float> [[res2]], <7 x float>* [[adr2]], align 4
+  %tmp12 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 6
+  %tmp13 = load <7 x float>, <7 x float>* %tmp12, align 4
+  %tmp14 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  %tmp15 = load <7 x float>, <7 x float>* %tmp14, align 4
+  %tmp16 = fsub fast <7 x float> %tmp15, %tmp13
+  store <7 x float> %tmp16, <7 x float>* %tmp14, align 4
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x float>, <7 x float>* [[adr7]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[res3:%.*]] = fmul fast <7 x float> [[ld3]], [[ld7]]
+  ; CHECK: store <7 x float> [[res3]], <7 x float>* [[adr3]], align 4
+  %tmp17 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 7
+  %tmp18 = load <7 x float>, <7 x float>* %tmp17, align 4
+  %tmp19 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 3
+  %tmp20 = load <7 x float>, <7 x float>* %tmp19, align 4
+  %tmp21 = fmul fast <7 x float> %tmp20, %tmp18
+  store <7 x float> %tmp21, <7 x float>* %tmp19, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x float>, <7 x float>* [[adr8]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[res4:%.*]] = fdiv fast <7 x float> [[ld4]], [[ld8]]
+  ; CHECK: store <7 x float> [[res4]], <7 x float>* [[adr4]], align 4
+  %tmp22 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 8
+  %tmp23 = load <7 x float>, <7 x float>* %tmp22, align 4
+  %tmp24 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 4
+  %tmp25 = load <7 x float>, <7 x float>* %tmp24, align 4
+  %tmp26 = fdiv fast <7 x float> %tmp25, %tmp23
+  store <7 x float> %tmp26, <7 x float>* %tmp24, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <7 x float>, <7 x float>* [[adr9]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[res5:%.*]] = frem fast <7 x float> [[ld5]], [[ld9]]
+  ; CHECK: store <7 x float> [[res5]], <7 x float>* [[adr5]], align 4
+  %tmp27 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 9
+  %tmp28 = load <7 x float>, <7 x float>* %tmp27, align 4
+  %tmp29 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  %tmp30 = load <7 x float>, <7 x float>* %tmp29, align 4
+  %tmp31 = frem fast <7 x float> %tmp30, %tmp28
+  store <7 x float> %tmp31, <7 x float>* %tmp29, align 4
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?arithmetic
+define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$06@@Y0L@$$CAV1@@Z"([11 x <7 x float>]* noalias sret %agg.result, [11 x <7 x float>]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  ; CHECK: [[res0:%.*]] = fsub fast <7 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ld0]]
+  %tmp = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  %tmp1 = load <7 x float>, <7 x float>* %tmp, align 4
+  %tmp2 = fsub fast <7 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp1
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[res1:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  %tmp3 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  %tmp4 = load <7 x float>, <7 x float>* %tmp3, align 4
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[res2:%.*]] = fadd fast <7 x float> [[ld1]], [[ld2]]
+  %tmp5 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 1
+  %tmp6 = load <7 x float>, <7 x float>* %tmp5, align 4
+  %tmp7 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4
+  %tmp9 = fadd fast <7 x float> %tmp6, %tmp8
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[res3:%.*]] = fsub fast <7 x float> [[ld2]], [[ld3]]
+  %tmp10 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  %tmp11 = load <7 x float>, <7 x float>* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  %tmp13 = load <7 x float>, <7 x float>* %tmp12, align 4
+  %tmp14 = fsub fast <7 x float> %tmp11, %tmp13
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[res4:%.*]] = fmul fast <7 x float> [[ld3]], [[ld4]]
+  %tmp15 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  %tmp16 = load <7 x float>, <7 x float>* %tmp15, align 4
+  %tmp17 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  %tmp18 = load <7 x float>, <7 x float>* %tmp17, align 4
+  %tmp19 = fmul fast <7 x float> %tmp16, %tmp18
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[res5:%.*]] = fdiv fast <7 x float> [[ld4]], [[ld5]]
+  %tmp20 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  %tmp21 = load <7 x float>, <7 x float>* %tmp20, align 4
+  %tmp22 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  %tmp23 = load <7 x float>, <7 x float>* %tmp22, align 4
+  %tmp24 = fdiv fast <7 x float> %tmp21, %tmp23
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4
+  ; CHECK: [[res6:%.*]] = frem fast <7 x float> [[ld5]], [[ld6]]
+  %tmp25 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  %tmp26 = load <7 x float>, <7 x float>* %tmp25, align 4
+  %tmp27 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 6
+  %tmp28 = load <7 x float>, <7 x float>* %tmp27, align 4
+  %tmp29 = frem fast <7 x float> %tmp26, %tmp28
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x float>, <7 x float>* [[adr7]], align 4
+  ; CHECK: [[res7:%.*]] = fadd fast <7 x float> [[ld7]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ; CHECK: store <7 x float> [[res7]], <7 x float>* [[adr7]], align 4
+  %tmp30 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 7
+  %tmp31 = load <7 x float>, <7 x float>* %tmp30, align 4
+  %tmp32 = fadd fast <7 x float> %tmp31, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  store <7 x float> %tmp32, <7 x float>* %tmp30, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x float>, <7 x float>* [[adr8]], align 4
+  ; CHECK: [[res8:%.*]] = fadd fast <7 x float> [[ld8]], <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  ; CHECK: store <7 x float> [[res8]], <7 x float>* [[adr8]], align 4
+  %tmp33 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 8
+  %tmp34 = load <7 x float>, <7 x float>* %tmp33, align 4
+  %tmp35 = fadd fast <7 x float> %tmp34, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  store <7 x float> %tmp35, <7 x float>* %tmp33, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <7 x float>, <7 x float>* [[adr9]], align 4
+  ; CHECK: [[res9:%.*]] = fadd fast <7 x float> [[ld9]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ; CHECK: store <7 x float> [[res9]], <7 x float>* [[adr9]], align 4
+  %tmp36 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 9
+  %tmp37 = load <7 x float>, <7 x float>* %tmp36, align 4
+  %tmp38 = fadd fast <7 x float> %tmp37, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  store <7 x float> %tmp38, <7 x float>* %tmp36, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load <7 x float>, <7 x float>* [[adr10]], align 4
+  ; CHECK: [[res10:%.*]] = fadd fast <7 x float> [[ld10]], <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  ; CHECK: store <7 x float> [[res10]], <7 x float>* [[adr10]], align 4
+  %tmp39 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 10
+  %tmp40 = load <7 x float>, <7 x float>* %tmp39, align 4
+  %tmp41 = fadd fast <7 x float> %tmp40, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  store <7 x float> %tmp41, <7 x float>* %tmp39, align 4
+
+  %tmp42 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 0
+  store <7 x float> %tmp2, <7 x float>* %tmp42
+  %tmp43 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 1
+  store <7 x float> %tmp4, <7 x float>* %tmp43
+  %tmp44 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 2
+  store <7 x float> %tmp9, <7 x float>* %tmp44
+  %tmp45 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 3
+  store <7 x float> %tmp14, <7 x float>* %tmp45
+  %tmp46 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 4
+  store <7 x float> %tmp19, <7 x float>* %tmp46
+  %tmp47 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 5
+  store <7 x float> %tmp24, <7 x float>* %tmp47
+  %tmp48 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 6
+  store <7 x float> %tmp29, <7 x float>* %tmp48
+  %tmp49 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 7
+  store <7 x float> %tmp31, <7 x float>* %tmp49
+  %tmp50 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 8
+  store <7 x float> %tmp34, <7 x float>* %tmp50
+  %tmp51 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 9
+  store <7 x float> %tmp38, <7 x float>* %tmp51
+  %tmp52 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 10
+  store <7 x float> %tmp41, <7 x float>* %tmp52
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?logic
+define void @"\01?logic@@YA$$BY09V?$vector@_N$06@@Y09V1@Y09V?$vector@M$06@@@Z"([10 x <7 x i32>]* noalias sret %agg.result, [10 x <7 x i32>]* %truth, [10 x <7 x float>]* %consequences) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <7 x i32>, <7 x i32>* [[adr0]], align 4
+  ; CHECK: [[nres0:%.*]] = icmp ne <7 x i32> [[ld0]], zeroinitializer
+  ; CHECK: [[bres0:%.*]] = icmp eq <7 x i1> [[nres0:%.*]], zeroinitializer
+  ; CHECK: [[res0:%.*]] = zext <7 x i1> [[bres0]] to <7 x i32>
+  %tmp = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 0
+  %tmp1 = load <7 x i32>, <7 x i32>* %tmp, align 4
+  %tmp2 = icmp ne <7 x i32> %tmp1, zeroinitializer
+  %tmp3 = icmp eq <7 x i1> %tmp2, zeroinitializer
+  %tmp4 = zext <7 x i1> %tmp3 to <7 x i32>
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x i32>, <7 x i32>* [[adr1]], align 4
+  ; CHECK: [[bld1:%.*]] = icmp ne <7 x i32> [[ld1]], zeroinitializer
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4
+  ; CHECK: [[bld2:%.*]] = icmp ne <7 x i32> [[ld2]], zeroinitializer
+  ; CHECK: [[bres1:%.*]] = or <7 x i1> [[bld1]], [[bld2]]
+  ; CHECK: [[res1:%.*]] = zext <7 x i1> [[bres1]] to <7 x i32>
+  %tmp5 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 1
+  %tmp6 = load <7 x i32>, <7 x i32>* %tmp5, align 4
+  %tmp7 = icmp ne <7 x i32> %tmp6, zeroinitializer
+  %tmp8 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  %tmp9 = load <7 x i32>, <7 x i32>* %tmp8, align 4
+  %tmp10 = icmp ne <7 x i32> %tmp9, zeroinitializer
+  %tmp11 = or <7 x i1> %tmp7, %tmp10
+  %tmp12 = zext <7 x i1> %tmp11 to <7 x i32>
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4
+  ; CHECK: [[bld2:%.*]] = icmp ne <7 x i32> [[ld2]], zeroinitializer
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[bld3:%.*]] = icmp ne <7 x i32> [[ld3]], zeroinitializer
+  ; CHECK: [[bres2:%.*]] = and <7 x i1> [[bld2]], [[bld3]]
+  ; CHECK: [[res2:%.*]] = zext <7 x i1> [[bres2]] to <7 x i32>
+  %tmp13 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  %tmp14 = load <7 x i32>, <7 x i32>* %tmp13, align 4
+  %tmp15 = icmp ne <7 x i32> %tmp14, zeroinitializer
+  %tmp16 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  %tmp17 = load <7 x i32>, <7 x i32>* %tmp16, align 4
+  %tmp18 = icmp ne <7 x i32> %tmp17, zeroinitializer
+  %tmp19 = and <7 x i1> %tmp15, %tmp18
+  %tmp20 = zext <7 x i1> %tmp19 to <7 x i32>
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[bld3:%.*]] = icmp ne <7 x i32> [[ld3]], zeroinitializer
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4
+  ; CHECK: [[bld4:%.*]] = icmp ne <7 x i32> [[ld4]], zeroinitializer
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4
+  ; CHECK: [[bld5:%.*]] = icmp ne <7 x i32> [[ld5]], zeroinitializer
+  ; CHECK: [[bres3:%.*]] = select <7 x i1> [[bld3]], <7 x i1> [[bld4]], <7 x i1> [[bld5]]
+  ; CHECK: [[res3:%.*]] = zext <7 x i1> [[bres3]] to <7 x i32>
+  %tmp21 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  %tmp22 = load <7 x i32>, <7 x i32>* %tmp21, align 4
+  %tmp23 = icmp ne <7 x i32> %tmp22, zeroinitializer
+  %tmp24 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 4
+  %tmp25 = load <7 x i32>, <7 x i32>* %tmp24, align 4
+  %tmp26 = icmp ne <7 x i32> %tmp25, zeroinitializer
+  %tmp27 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 5
+  %tmp28 = load <7 x i32>, <7 x i32>* %tmp27, align 4
+  %tmp29 = icmp ne <7 x i32> %tmp28, zeroinitializer
+  %tmp30 = select <7 x i1> %tmp23, <7 x i1> %tmp26, <7 x i1> %tmp29
+  %tmp31 = zext <7 x i1> %tmp30 to <7 x i32>
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[bres1:%.*]] = fcmp fast oeq <7 x float> [[ld0]], [[ld1]]
+  ; CHECK: [[res1:%.*]] = zext <7 x i1> [[bres1]] to <7 x i32>
+  %tmp32 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 0
+  %tmp33 = load <7 x float>, <7 x float>* %tmp32, align 4
+  %tmp34 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  %tmp35 = load <7 x float>, <7 x float>* %tmp34, align 4
+  %tmp36 = fcmp fast oeq <7 x float> %tmp33, %tmp35
+  %tmp37 = zext <7 x i1> %tmp36 to <7 x i32>
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[bres2:%.*]] = fcmp fast une <7 x float> [[ld1]], [[ld2]]
+  ; CHECK: [[res2:%.*]] = zext <7 x i1> [[bres2]] to <7 x i32>
+  %tmp38 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  %tmp39 = load <7 x float>, <7 x float>* %tmp38, align 4
+  %tmp40 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  %tmp41 = load <7 x float>, <7 x float>* %tmp40, align 4
+  %tmp42 = fcmp fast une <7 x float> %tmp39, %tmp41
+  %tmp43 = zext <7 x i1> %tmp42 to <7 x i32>
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[bres3:%.*]] = fcmp fast olt <7 x float> [[ld2]], [[ld3]]
+  ; CHECK: [[res3:%.*]] = zext <7 x i1> [[bres3]] to <7 x i32>
+  %tmp44 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  %tmp45 = load <7 x float>, <7 x float>* %tmp44, align 4
+  %tmp46 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  %tmp47 = load <7 x float>, <7 x float>* %tmp46, align 4
+  %tmp48 = fcmp fast olt <7 x float> %tmp45, %tmp47
+  %tmp49 = zext <7 x i1> %tmp48 to <7 x i32>
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[bres4:%.*]] = fcmp fast ogt <7 x float> [[ld3]], [[ld4]]
+  ; CHECK: [[res4:%.*]] = zext <7 x i1> [[bres4]] to <7 x i32>
+  %tmp50 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  %tmp51 = load <7 x float>, <7 x float>* %tmp50, align 4
+  %tmp52 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  %tmp53 = load <7 x float>, <7 x float>* %tmp52, align 4
+  %tmp54 = fcmp fast ogt <7 x float> %tmp51, %tmp53
+  %tmp55 = zext <7 x i1> %tmp54 to <7 x i32>
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[bres5:%.*]] = fcmp fast ole <7 x float> [[ld4]], [[ld5]]
+  ; CHECK: [[res5:%.*]] = zext <7 x i1> [[bres5]] to <7 x i32>
+  %tmp56 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  %tmp57 = load <7 x float>, <7 x float>* %tmp56, align 4
+  %tmp58 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  %tmp59 = load <7 x float>, <7 x float>* %tmp58, align 4
+  %tmp60 = fcmp fast ole <7 x float> %tmp57, %tmp59
+  %tmp61 = zext <7 x i1> %tmp60 to <7 x i32>
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4
+  ; CHECK: [[bres6:%.*]] = fcmp fast oge <7 x float> [[ld5]], [[ld6]]
+  ; CHECK: [[res6:%.*]] = zext <7 x i1> [[bres6]] to <7 x i32>
+  %tmp62 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  %tmp63 = load <7 x float>, <7 x float>* %tmp62, align 4
+  %tmp64 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 6
+  %tmp65 = load <7 x float>, <7 x float>* %tmp64, align 4
+  %tmp66 = fcmp fast oge <7 x float> %tmp63, %tmp65
+  %tmp67 = zext <7 x i1> %tmp66 to <7 x i32>
+
+  %tmp68 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 0
+  store <7 x i32> %tmp4, <7 x i32>* %tmp68
+  %tmp69 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 1
+  store <7 x i32> %tmp12, <7 x i32>* %tmp69
+  %tmp70 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 2
+  store <7 x i32> %tmp20, <7 x i32>* %tmp70
+  %tmp71 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 3
+  store <7 x i32> %tmp31, <7 x i32>* %tmp71
+  %tmp72 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 4
+  store <7 x i32> %tmp37, <7 x i32>* %tmp72
+  %tmp73 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 5
+  store <7 x i32> %tmp43, <7 x i32>* %tmp73
+  %tmp74 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 6
+  store <7 x i32> %tmp49, <7 x i32>* %tmp74
+  %tmp75 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 7
+  store <7 x i32> %tmp55, <7 x i32>* %tmp75
+  %tmp76 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 8
+  store <7 x i32> %tmp61, <7 x i32>* %tmp76
+  %tmp77 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 9
+  store <7 x i32> %tmp67, <7 x i32>* %tmp77
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?index
+define void @"\01?index@@YA$$BY09V?$vector@M$06@@Y09V1@H@Z"([10 x <7 x float>]* noalias sret %agg.result, [10 x <7 x float>]* %things, i32 %i) #0 {
+bb:
+  %res = alloca [10 x <7 x float>], align 4
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0
+  ; CHECK: store <7 x float> zeroinitializer, <7 x float>* [[adr0]], align 4
+  %tmp1 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0
+  store <7 x float> zeroinitializer, <7 x float>* %tmp1, align 4
+
+  ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 %i
+  ; CHECK: store <7 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <7 x float>* [[adri]], align 4
+  %tmp2 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 %i
+  store <7 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <7 x float>* %tmp2, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2
+  ; CHECK: store <7 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <7 x float>* [[adr2]], align 4
+  %tmp3 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2
+  store <7 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <7 x float>* %tmp3, align 4
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[res3:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3
+  ; CHECK: store <7 x float> [[res3]], <7 x float>* [[adr3]], align 4
+  %tmp4 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  %tmp5 = load <7 x float>, <7 x float>* %tmp4, align 4
+  %tmp6 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3
+  store <7 x float> %tmp5, <7 x float>* %tmp6, align 4
+
+  ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 %i
+  ; CHECK: [[res4:%.*]] = load <7 x float>, <7 x float>* [[adri]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4
+  ; CHECK: store <7 x float> [[res4]], <7 x float>* [[adr4]], align 4
+  %tmp7 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 %i
+  %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4
+  %tmp9 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4
+  store <7 x float> %tmp8, <7 x float>* %tmp9, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[res5:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5
+  ; CHECK: store <7 x float> [[res5]], <7 x float>* [[adr5]], align 4
+  %tmp10 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  %tmp11 = load <7 x float>, <7 x float>* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5
+  store <7 x float> %tmp11, <7 x float>* %tmp12, align 4
+
+  %tmp13 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 0
+  %tmp14 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0
+  %tmp15 = load <7 x float>, <7 x float>* %tmp14
+  store <7 x float> %tmp15, <7 x float>* %tmp13
+
+  %tmp16 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 1
+  %tmp17 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 1
+  %tmp18 = load <7 x float>, <7 x float>* %tmp17
+  store <7 x float> %tmp18, <7 x float>* %tmp16
+
+  %tmp19 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 2
+  %tmp20 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2
+  %tmp21 = load <7 x float>, <7 x float>* %tmp20
+  store <7 x float> %tmp21, <7 x float>* %tmp19
+
+  %tmp22 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 3
+  %tmp23 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3
+  %tmp24 = load <7 x float>, <7 x float>* %tmp23
+  store <7 x float> %tmp24, <7 x float>* %tmp22
+
+  %tmp25 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 4
+  %tmp26 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4
+  %tmp27 = load <7 x float>, <7 x float>* %tmp26
+  store <7 x float> %tmp27, <7 x float>* %tmp25
+
+  %tmp28 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 5
+  %tmp29 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5
+  %tmp30 = load <7 x float>, <7 x float>* %tmp29
+  store <7 x float> %tmp30, <7 x float>* %tmp28
+
+  %tmp31 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 6
+  %tmp32 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 6
+  %tmp33 = load <7 x float>, <7 x float>* %tmp32
+  store <7 x float> %tmp33, <7 x float>* %tmp31
+
+  %tmp34 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 7
+  %tmp35 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 7
+  %tmp36 = load <7 x float>, <7 x float>* %tmp35
+  store <7 x float> %tmp36, <7 x float>* %tmp34
+
+  %tmp37 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 8
+  %tmp38 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 8
+  %tmp39 = load <7 x float>, <7 x float>* %tmp38
+  store <7 x float> %tmp39, <7 x float>* %tmp37
+
+  %tmp40 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 9
+  %tmp41 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 9
+  %tmp42 = load <7 x float>, <7 x float>* %tmp41
+  store <7 x float> %tmp42, <7 x float>* %tmp40
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?bittwiddlers
+define void @"\01?bittwiddlers@@YAXY0L@$$CAV?$vector@I$06@@@Z"([11 x <7 x i32>]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x i32>, <7 x i32>* [[adr1]], align 4
+  ; CHECK: [[res0:%.*]] = xor <7 x i32> [[ld1]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 0
+  ; CHECK: store <7 x i32> [[res0]], <7 x i32>* [[adr0]], align 4
+  %tmp = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  %tmp1 = load <7 x i32>, <7 x i32>* %tmp, align 4
+  %tmp2 = xor <7 x i32> %tmp1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %tmp3 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 0
+  store <7 x i32> %tmp2, <7 x i32>* %tmp3, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[res1:%.*]] = or <7 x i32> [[ld2]], [[ld3]]
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  ; CHECK: store <7 x i32> [[res1]], <7 x i32>* [[adr1]], align 4
+  %tmp4 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  %tmp5 = load <7 x i32>, <7 x i32>* %tmp4, align 4
+  %tmp6 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  %tmp7 = load <7 x i32>, <7 x i32>* %tmp6, align 4
+  %tmp8 = or <7 x i32> %tmp5, %tmp7
+  %tmp9 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  store <7 x i32> %tmp8, <7 x i32>* %tmp9, align 4
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4
+  ; CHECK: [[res2:%.*]] = and <7 x i32> [[ld3]], [[ld4]]
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  ; CHECK: store <7 x i32> [[res2]], <7 x i32>* [[adr2]], align 4
+  %tmp10 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  %tmp11 = load <7 x i32>, <7 x i32>* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  %tmp13 = load <7 x i32>, <7 x i32>* %tmp12, align 4
+  %tmp14 = and <7 x i32> %tmp11, %tmp13
+  %tmp15 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  store <7 x i32> %tmp14, <7 x i32>* %tmp15, align 4
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4
+  ; CHECK: [[res3:%.*]] = xor <7 x i32> [[ld4]], [[ld5]]
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  ; CHECK: store <7 x i32> [[res3]], <7 x i32>* [[adr3]], align 4
+  %tmp16 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  %tmp17 = load <7 x i32>, <7 x i32>* %tmp16, align 4
+  %tmp18 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  %tmp19 = load <7 x i32>, <7 x i32>* %tmp18, align 4
+  %tmp20 = xor <7 x i32> %tmp17, %tmp19
+  %tmp21 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  store <7 x i32> %tmp20, <7 x i32>* %tmp21, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4
+  ; CHECK: [[shv6:%.*]] = and <7 x i32> [[ld6]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  ; CHECK: [[res4:%.*]] = shl <7 x i32> [[ld5]], [[shv6]]
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  ; CHECK: store <7 x i32> [[res4]], <7 x i32>* [[adr4]], align 4
+  %tmp22 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  %tmp23 = load <7 x i32>, <7 x i32>* %tmp22, align 4
+  %tmp24 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  %tmp25 = load <7 x i32>, <7 x i32>* %tmp24, align 4
+  %tmp26 = and <7 x i32> %tmp25, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %tmp27 = shl <7 x i32> %tmp23, %tmp26
+  %tmp28 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  store <7 x i32> %tmp27, <7 x i32>* %tmp28, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x i32>, <7 x i32>* [[adr7]], align 4
+  ; CHECK: [[shv7:%.*]] = and <7 x i32> [[ld7]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  ; CHECK: [[res5:%.*]] = lshr <7 x i32> [[ld6]], [[shv7]]
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  ; CHECK: store <7 x i32> [[res5]], <7 x i32>* [[adr5]], align 4
+  %tmp29 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  %tmp30 = load <7 x i32>, <7 x i32>* %tmp29, align 4
+  %tmp31 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  %tmp32 = load <7 x i32>, <7 x i32>* %tmp31, align 4
+  %tmp33 = and <7 x i32> %tmp32, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %tmp34 = lshr <7 x i32> %tmp30, %tmp33
+  %tmp35 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  store <7 x i32> %tmp34, <7 x i32>* %tmp35, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x i32>, <7 x i32>* [[adr8]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4
+  ; CHECK: [[res6:%.*]] = or <7 x i32> [[ld6]], [[ld8]]
+  ; CHECK: store <7 x i32> [[res6]], <7 x i32>* [[adr6]], align 4
+  %tmp36 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  %tmp37 = load <7 x i32>, <7 x i32>* %tmp36, align 4
+  %tmp38 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  %tmp39 = load <7 x i32>, <7 x i32>* %tmp38, align 4
+  %tmp40 = or <7 x i32> %tmp39, %tmp37
+  store <7 x i32> %tmp40, <7 x i32>* %tmp38, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <7 x i32>, <7 x i32>* [[adr9]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x i32>, <7 x i32>* [[adr7]], align 4
+  ; CHECK: [[res7:%.*]] = and <7 x i32> [[ld7]], [[ld9]]
+  ; CHECK: store <7 x i32> [[res7]], <7 x i32>* [[adr7]], align 4
+  %tmp41 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 9
+  %tmp42 = load <7 x i32>, <7 x i32>* %tmp41, align 4
+  %tmp43 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  %tmp44 = load <7 x i32>, <7 x i32>* %tmp43, align 4
+  %tmp45 = and <7 x i32> %tmp44, %tmp42
+  store <7 x i32> %tmp45, <7 x i32>* %tmp43, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load <7 x i32>, <7 x i32>* [[adr10]], align 4
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x i32>, <7 x i32>* [[adr8]], align 4
+  ; CHECK: [[res8:%.*]] = xor <7 x i32> [[ld8]], [[ld10]]
+  ; CHECK: store <7 x i32> [[res8]], <7 x i32>* [[adr8]], align 4
+  %tmp46 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 10
+  %tmp47 = load <7 x i32>, <7 x i32>* %tmp46, align 4
+  %tmp48 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  %tmp49 = load <7 x i32>, <7 x i32>* %tmp48, align 4
+  %tmp50 = xor <7 x i32> %tmp49, %tmp47
+  store <7 x i32> %tmp50, <7 x i32>* %tmp48, align 4
+
+  ret void
+}
+
+declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
+declare %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<float>"(i32, %"class.RWStructuredBuffer<float>") #1
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+!pauseresume = !{!1}
+!llvm.ident = !{!2}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.resources = !{!5}
+!dx.typeAnnotations = !{!9, !15}
+!dx.entryPoints = !{!35}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"}
+!2 = !{!"dxc(private) 1.8.0.4845 (disable_disble_spirv, 2514104b9-dirty)"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"lib", i32 6, i32 9}
+!5 = !{null, !6, null, null}
+!6 = !{!7}
+!7 = !{i32 0, [7 x %"class.RWStructuredBuffer<float>"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A", !"buf", i32 -1, i32 -1, i32 7, i32 12, i1 false, i1 false, i1 false, !8}
+!8 = !{i32 1, i32 4}
+!9 = !{i32 0, %"class.RWStructuredBuffer<float>" undef, !10}
+!10 = !{i32 4, !11, !12}
+!11 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9}
+!12 = !{i32 0, !13}
+!13 = !{!14}
+!14 = !{i32 0, float undef}
+!15 = !{i32 1, void ([10 x <7 x float>]*)* @"\01?assignments@@YAXY09$$CAV?$vector@M$06@@@Z", !16, void ([11 x <7 x float>]*, [11 x <7 x float>]*)* @"\01?arithmetic@@YA$$BY0L@V?$vector@M$06@@Y0L@$$CAV1@@Z", !21, void ([10 x <7 x i32>]*, [10 x <7 x i32>]*, [10 x <7 x float>]*)* @"\01?logic@@YA$$BY09V?$vector@_N$06@@Y09V1@Y09V?$vector@M$06@@@Z", !24, void ([10 x <7 x float>]*, [10 x <7 x float>]*, i32)* @"\01?index@@YA$$BY09V?$vector@M$06@@Y09V1@H@Z", !29, void ([11 x <7 x i32>]*)* @"\01?bittwiddlers@@YAXY0L@$$CAV?$vector@I$06@@@Z", !32}
+!16 = !{!17, !19}
+!17 = !{i32 1, !18, !18}
+!18 = !{}
+!19 = !{i32 2, !20, !18}
+!20 = !{i32 7, i32 9, i32 13, i32 7}
+!21 = !{!22, !23, !19}
+!22 = !{i32 0, !18, !18}
+!23 = !{i32 1, !20, !18}
+!24 = !{!22, !25, !27, !28}
+!25 = !{i32 1, !26, !18}
+!26 = !{i32 7, i32 1, i32 13, i32 7}
+!27 = !{i32 0, !26, !18}
+!28 = !{i32 0, !20, !18}
+!29 = !{!22, !23, !28, !30}
+!30 = !{i32 0, !31, !18}
+!31 = !{i32 7, i32 4}
+!32 = !{!17, !33}
+!33 = !{i32 2, !34, !18}
+!34 = !{i32 7, i32 5, i32 13, i32 7}
+!35 = !{null, !"", null, !5, null}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl
new file mode 100644
index 0000000000..c1ec22cdcb
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl
@@ -0,0 +1,420 @@
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float     -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int       -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=double    -DNUM=16 -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=uint64_t  -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=34 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int16_t   -DNUM=129 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Mainly a source for the longvec scalarizer IR test.
+// Serves to verify some codegen as well.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:[a-z0-9]*]] }
+// CHECK: external global {{\[}}[[NUM:[0-9]*]] x %"class.RWStructuredBuffer
+RWStructuredBuffer<TYPE> buf[NUM];
+
+
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout vector<TYPE, NUM> things[10]) {
+
+  // CHECK: [[res0:%.*]] =  call [[TYPE]] @"dx.hl.op.ro.[[TYPE]] (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle {{%.*}}, i32 1)
+  // CHECK: [[vec0:%.*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[res0]], i32 0
+  // CHECK: [[res0:%.*]] = shufflevector <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[adr0]]
+  things[0] = buf[0].Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  things[4] /= things[8];
+
+#ifndef DBL
+  // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // NODBL: [[vec9:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]]
+  // NODBL: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  things[5] %= things[9];
+#endif
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11] {
+  vector<TYPE, NUM> res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[res1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]]
+  // CHECK: [[res0:%.*]] = [[SUB]] <[[NUM]] x [[TYPE]]>
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[res5:%.*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  res[5] = things[4] / things[5];
+
+#ifndef DBL
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // NODBL: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // NODBL: [[res6:%.*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 6
+  // NODBL: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // CHECK: [[res7:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 7
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // CHECK: [[res8:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 8
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // CHECK: [[res9:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 9
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // CHECK: [[res10:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 10
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]]
+  res[10] = --things[10];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [11 x <[[NUM]] x [[TYPE]]>]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [11 x <[[NUM]] x [[TYPE]]>]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export vector<bool,NUM> logic(vector<bool,NUM> truth[10], vector<TYPE, NUM> consequences[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[bres0:%.*]] = icmp eq <[[NUM]] x i1> %4, zeroinitializer
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[adr0]]
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[val1:%.*]] = icmp ne <[[NUM]] x i1> [[bvec1]], zeroinitializer
+  // CHECK: [[val2:%.*]] = icmp ne <[[NUM]] x i1> [[bvec2]], zeroinitializer
+  // CHECK: [[bres1:%.*]] = or <[[NUM]] x i1> [[val1]], [[val2]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 1
+  // CHECK: [[res1:%.*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[val2:%.*]] = icmp ne <[[NUM]] x i1> [[bvec2]], zeroinitializer
+  // CHECK: [[val3:%.*]] = icmp ne <[[NUM]] x i1> [[bvec3]], zeroinitializer
+  // CHECK: [[bres2:%.*]] = and <[[NUM]] x i1> [[val2]], [[val3]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 2
+  // CHECK: [[res2:%.*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[adr2]]
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]]
+  // CHECK: [[bvec4:%.*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]]
+  // CHECK: [[bvec5:%.*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[bres3:%.*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[adr3]]
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[bres4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[adr4]]
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[bres5:%.*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 5
+  // CHECK: [[res5:%.*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[adr5]]
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[bres6:%.*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 6
+  // CHECK: [[res6:%.*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[adr6]]
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[bres7:%.*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 7
+  // CHECK: [[res7:%.*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[adr7]]
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[bres8:%.*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 8
+  // CHECK: [[res8:%.*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[adr8]]
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // CHECK: [[bres9:%.*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 9
+  // CHECK: [[res9:%.*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[adr9]]
+  res[9] = consequences[5] >= consequences[6];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [10 x <[[NUM]] x i32>]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [10 x <[[NUM]] x i32>]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export vector<TYPE, NUM> index(vector<TYPE, NUM> things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x <[[NUM]] x [[TYPE]]>]
+  // CHECK: store i32 %i, i32* [[iadd:%.[0-9]*]]
+  vector<TYPE, NUM> res[10];
+
+  // CHECK: [[res0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 [[i]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|1\.0*e\+0*|0xH3C00).*}}, <[[NUM]] x [[TYPE]]>* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|2\.0*e\+0*|0xH4000).*}}, <[[NUM]] x [[TYPE]]>* [[res2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]]
+  // CHECK: [[res3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 [[i]]
+  // CHECK: [[thgi:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adri]]
+  // CHECK: [[res4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[res5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout vector<uint, NUM> things[11]) {
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr1]]
+  // CHECK: [[res1:%.*]] = xor <[[NUM]] x i32> [[ld1]], <i32 -1
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr0]]
+  things[0] = ~things[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[res1:%.*]] = or <[[NUM]] x i32> [[ld2]], [[ld3]]
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]]
+  // CHECK: [[res2:%.*]] = and <[[NUM]] x i32> [[ld3]], [[ld4]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]]
+  // CHECK: [[res3:%.*]] = xor <[[NUM]] x i32> [[ld4]], [[ld5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]]
+  // CHECK: [[shv6:%.*]] = and <[[NUM]] x i32> [[ld6]], <i32 31
+  // CHECK: [[res4:%.*]] = shl <[[NUM]] x i32> [[ld5]], [[shv6]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr7]]
+  // CHECK: [[shv7:%.*]] = and <[[NUM]] x i32> [[ld7]], <i32 31
+  // CHECK: [[res5:%.*]] = lshr <[[NUM]] x i32> [[ld6]], [[shv7]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr8]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]]
+  // CHECK: [[res6:%.*]] = or <[[NUM]] x i32> [[ld6]], [[ld8]]
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[adr6]]
+  things[6] |= things[8];
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 9
+  // CHECK: [[ld9:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr9]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr7]]
+  // CHECK: [[res7:%.*]] = and <[[NUM]] x i32> [[ld7]], [[ld9]]
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[adr7]]
+  things[7] &= things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 10
+  // CHECK: [[ld10:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr10]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr8]]
+  // CHECK: [[res8:%.*]] = xor <[[NUM]] x i32> [[ld8]], [[ld10]]
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[adr8]]
+  things[8] ^= things[10];
+
+  // CHECK: ret void
+}

From c2ee61b823c3965ccfff7defbf0c0a3261abd8bf Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Fri, 21 Mar 2025 15:46:05 -0600
Subject: [PATCH 07/17] Clean up test mistakes that failed on other platforms

I don't know why the order of the extractelements varies, but it does
even for the same build depending on whether a debugger is attached.
It would seem to be an unordered container object, but there is no such.
It just generates them as it goes and never touches them again which
would suggest the opposite order in a few cases.

They are constant expressions, that's probably why they move around, but
I can't find where to make them consistent.
---
 .../longvec-operators-vec1-scalarizer.ll       | 18 +++++++++---------
 .../CodeGenDXIL/passes/longvec-operators.hlsl  |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
index 4e2852b86a..90c88d3cf7 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
@@ -114,7 +114,7 @@ bb:
   ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
   ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4
   ; CHECK: [[zero:%.*]] = extractelement <1 x float> <float -0.000000e+00>, i32 0
-  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0:%.*]], i32 0
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0
   ; CHECK: [[sub0:%.*]] = fsub fast float [[zero]], [[val0]]
   ; CHECK: [[res0:%.*]] = insertelement <1 x float> undef, float [[sub0]], i32 0
   %tmp = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
@@ -206,8 +206,8 @@ bb:
 
   ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7
   ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]], align 4
-  ; CHECK: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0
-  ; CHECK: [[pos1:%.*]] = extractelement <1 x float> <float 1.000000e+00>, i32 0
+  ; CHECK-DAG: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0
+  ; CHECK-DAG: [[pos1:%.*]] = extractelement <1 x float> <float 1.000000e+00>, i32 0
   ; CHECK: [[add6:%.*]] = fadd fast float [[val7]], [[pos1]]
   ; CHECK: [[res6:%.*]] = insertelement <1 x float> undef, float [[add6]], i32 0
   %tmp37 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7
@@ -217,8 +217,8 @@ bb:
 
   ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8
   ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]], align 4
-  ; CHECK: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0
-  ; CHECK: [[neg1:%.*]] = extractelement <1 x float> <float -1.000000e+00>, i32 0
+  ; CHECK-DAG: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0
+  ; CHECK-DAG: [[neg1:%.*]] = extractelement <1 x float> <float -1.000000e+00>, i32 0
   ; CHECK: [[add7:%.*]] = fadd fast float [[val8]], [[neg1]]
   ; CHECK: [[res7:%.*]] = insertelement <1 x float> undef, float [[add7]], i32 0
   %tmp40 = extractelement <1 x float> %tmp38, i64 0
@@ -229,8 +229,8 @@ bb:
 
   ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9
   ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]], align 4
-  ; CHECK: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0
-  ; CHECK: [[pos1:%.*]] = extractelement <1 x float> <float 1.000000e+00>, i32 0
+  ; CHECK-DAG: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0
+  ; CHECK-DAG: [[pos1:%.*]] = extractelement <1 x float> <float 1.000000e+00>, i32 0
   ; CHECK: [[add8:%.*]] = fadd fast float [[val9]], [[pos1]]
   ; CHECK: [[res8:%.*]] = insertelement <1 x float> undef, float [[add8]], i32 0
   %tmp44 = extractelement <1 x float> %tmp42, i64 0
@@ -241,8 +241,8 @@ bb:
 
   ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10
   ; CHECK: [[ld10:%.*]] = load <1 x float>, <1 x float>* [[adr10]], align 4
-  ; CHECK: [[val10:%.*]] = extractelement <1 x float> [[ld10]], i32 0
-  ; CHECK: [[neg1:%.*]] = extractelement <1 x float> <float -1.000000e+00>, i32 0
+  ; CHECK-DAG: [[val10:%.*]] = extractelement <1 x float> [[ld10]], i32 0
+  ; CHECK-DAG: [[neg1:%.*]] = extractelement <1 x float> <float -1.000000e+00>, i32 0
   ; CHECK: [[add9:%.*]] = fadd fast float [[val10]], [[neg1]]
   ; CHECK: [[res9:%.*]] = insertelement <1 x float> undef, float [[add9]], i32 0
   %tmp48 = extractelement <1 x float> %tmp47, i64 0
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl
index c1ec22cdcb..2c2ef01b8a 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl
@@ -175,7 +175,7 @@ export vector<bool,NUM> logic(vector<bool,NUM> truth[10], vector<TYPE, NUM> cons
   // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0
   // CHECK: [[vec0:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr0]]
   // CHECK: [[bvec0:%.*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
-  // CHECK: [[bres0:%.*]] = icmp eq <[[NUM]] x i1> %4, zeroinitializer
+  // CHECK: [[bres0:%.*]] = icmp eq <[[NUM]] x i1> [[bvec0]], zeroinitializer
   // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 0
   // CHECK: [[res0:%.*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32>
   // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[adr0]]

From a3a39b819831e753ddded2aef4ab4075811e4ad4 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Fri, 21 Mar 2025 15:52:09 -0600
Subject: [PATCH 08/17] another constant case I caught just in case

---
 .../CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
index 90c88d3cf7..cead71acb0 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
@@ -113,8 +113,8 @@ define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$00@@Y0L@$$CAV1@@Z"([11 x <1 x
 bb:
   ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
   ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4
-  ; CHECK: [[zero:%.*]] = extractelement <1 x float> <float -0.000000e+00>, i32 0
-  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0
+  ; CHECK-DAG: [[zero:%.*]] = extractelement <1 x float> <float -0.000000e+00>, i32 0
+  ; CHECK-DAG: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0
   ; CHECK: [[sub0:%.*]] = fsub fast float [[zero]], [[val0]]
   ; CHECK: [[res0:%.*]] = insertelement <1 x float> undef, float [[sub0]], i32 0
   %tmp = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0

From 6f7f9ec824759544219ba2d5f151d4a13e782732 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 24 Mar 2025 04:20:49 -0600
Subject: [PATCH 09/17] Respond to feedback

Initialize mattype with check for matrixness

remove leftover include

reword comment

refactor SM69 conditional to avoid double parent retrieval

Add test that confirms no short-circuiting with native vector logic ops and HLSL
2018.

Revise vec1 scalarizer test that was mistakenly generated with HLSL 2021
which included short-circuiting.

Add validation check for vector operations in pre-6.9
---
 lib/DxilValidation/DxilValidation.cpp         | 15 ++++
 lib/HLSL/HLMatrixBitcastLowerPass.cpp         |  4 +-
 lib/Transforms/Scalar/DxilEliminateVector.cpp |  2 -
 .../Scalar/ScalarReplAggregatesHLSL.cpp       |  2 +-
 lib/Transforms/Scalar/Scalarizer.cpp          |  4 +-
 .../types/longvec-operators-shortcircuit.hlsl | 57 ++++++++++++++++
 .../longvec-operators-vec1-scalarizer.ll      | 68 +++++++------------
 7 files changed, 102 insertions(+), 50 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl

diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index d068262674..d9f48cc7e3 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -2672,6 +2672,21 @@ static bool IsLLVMInstructionAllowedForLib(Instruction &I,
   }
 }
 
+// Shader model specific checks for valid LLVM instructions.
+// Currently only checks for pre 6.9 usage of vector operations.
+// Returns false if shader model is pre 6.9 and I represents a vector
+// operation. Returns true otherwise.
+static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I,
+                                                   ValidationContext &ValCtx) {
+  if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
+    return true;
+  Instruction OpCode = I.getOpcode();
+  if (OpCode == Instruction::InsertElement ||
+      OpCode == Instruction::ExtractElement ||
+      OpCode == Instruction::ShuffleVector)
+    return false;
+}
+
 static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
   bool SupportsMinPrecision =
       ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision;
diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
index 3d71ba2ba2..2cb3824740 100644
--- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp
+++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
@@ -189,7 +189,7 @@ void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
     User *U = *(it++);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       Type *EltTy = GEP->getType()->getPointerElementType();
-      if (HLMatrixType::isa(EltTy)) {
+      if (HLMatrixType MatTy =  HLMatrixType::dyn_cast(EltTy)) {
         // Change gep matrixArray, 0, index
         // into
         //   gep oneDimArray, 0, index * matSize
@@ -197,7 +197,7 @@ void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
         SmallVector<Value *, 2> idxList(GEP->idx_begin(), GEP->idx_end());
         DXASSERT(idxList.size() == 2,
                  "else not one dim matrix array index to matrix");
-        unsigned NumElts = HLMatrixType::cast(EltTy).getNumElements();
+        unsigned NumElts = MatTy.getNumElements();
         if (!SupportsVectors || NumElts == 1) {
           Value *MatSize = Builder.getInt32(NumElts);
           idxList.back() = Builder.CreateMul(idxList.back(), MatSize);
diff --git a/lib/Transforms/Scalar/DxilEliminateVector.cpp b/lib/Transforms/Scalar/DxilEliminateVector.cpp
index 3ebd48e420..366f011dae 100644
--- a/lib/Transforms/Scalar/DxilEliminateVector.cpp
+++ b/lib/Transforms/Scalar/DxilEliminateVector.cpp
@@ -10,8 +10,6 @@
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
-#include "dxc/DXIL/DxilModule.h"
-
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
index 2c852e6c2f..ec17fce9c8 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
@@ -1949,7 +1949,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         continue;
       }
 
-      // Flatten Global vector if no dynamic vector indexing.
+      // Flatten global vector if it has no dynamic vector indexing.
       bool bFlatVector = !hasDynamicVectorIndexing(GV);
 
       if (bFlatVector) {
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index d936b17be9..83a63d105e 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -293,8 +293,8 @@ bool Scalarizer::doInitialization(Module &M) {
 }
 
 bool Scalarizer::runOnFunction(Function &F) {
-  if (F.getParent()->HasDxilModule())
-    if (F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus())
+  Module *M = F.getParent();
+  if (M->HasDxilModule() && M->GetDxilModule().GetShaderModel()->IsSM69Plus())
       SupportsVectors = true;
 
   for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl
new file mode 100644
index 0000000000..cb2fd5f781
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl
@@ -0,0 +1,57 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s --check-prefix=NOBR
+
+// Test that no short-circuiting takes place for logic ops with native vectors.
+// First run verifies that side effects result in stores.
+// Second runline just makes sure there are no branches nor phis at all.
+
+// NOBR-NOT: br i1
+// NOBR-NOT: = phi
+
+export int4 logic(inout bool4 truth[5], inout int4 consequences[4]) {
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]]
+  // CHECK: [[add:%.*]] = add <4 x i32> [[vec1]], <i32 1, i32 1, i32 1, i32 1>
+  // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer
+  // CHECK: [[bres3:%.*]] = or <4 x i1> [[bvec1]], [[bvec0]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = zext <4 x i1> [[bres3]] to <4 x i32>
+  // CHECK: store <4 x i32> [[res3]], <4 x i32>* [[adr3]]
+  truth[3] = truth[0] || consequences[1]++;
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]]
+  // CHECK: [[sub:%.*]] = add <4 x i32> [[vec0]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer
+  // CHECK: [[bres4:%.*]] = and <4 x i1> [[bvec0]], [[bvec1]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = zext <4 x i1> [[bres4]] to <4 x i32>
+  // CHECK: store <4 x i32> [[res4]], <4 x i32>* [[adr4]]
+  truth[4] = truth[1] && consequences[0]--;
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]]
+  // CHECK: [[bcond:%.*]] = icmp ne <4 x i32> [[vec2]], zeroinitializer
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]]
+  // CHECK: [[add:%.*]] = add <4 x i32> %25, <i32 1, i32 1, i32 1, i32 1>
+  // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <4 x i32>, <4 x i32>* [[adr3]]
+  // CHECK: [[sub:%.*]] = add <4 x i32> [[vec3]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr3]]
+  // CHECK: [[res:%.*]] = select <4 x i1> [[bcond]], <4 x i32> [[vec2]], <4 x i32> [[vec3]]
+  int4 res = truth[2] ? consequences[2]++ : consequences[3]--;
+
+  // CHECK: ret <4 x i32> %30
+  return res;
+}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
index cead71acb0..e5b6cf2dda 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
@@ -306,76 +306,58 @@ bb:
   ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
   ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4
   ; CHECK: [[cmp1:%.*]] = icmp ne i32 [[ld1]], 0
-  %tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
-  %tmp6 = load i32, i32* %tmp5, align 4
-  %tmp7 = icmp ne i32 %tmp6, 0
-  br i1 %tmp7, label %bb12, label %bb8
-
-bb8:                                              ; preds = %bb
   ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
   ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
   ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
+  ; CHECK: [[bres1:%.*]] = or i1 [[cmp1]], [[cmp2]]
+  ; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  %tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  %tmp6 = load i32, i32* %tmp5, align 4
+  %tmp7 = icmp ne i32 %tmp6, 0
   %tmp9 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
   %tmp10 = load i32, i32* %tmp9, align 4
   %tmp11 = icmp ne i32 %tmp10, 0
-  br label %bb12
+  %tmp13 = or i1 %tmp7, %tmp11
+  %tmp14 = zext i1 %tmp13 to i32
 
-bb12:                                             ; preds = %bb8, %bb
-  ; CHECK: [[bres1:%.*]] = phi i1 [ true, %bb ], [ [[cmp2]], %bb8 ]
-  ; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
   ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
   ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
   ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
-  %tmp13 = phi i1 [ true, %bb ], [ %tmp11, %bb8 ]
-  %tmp14 = zext i1 %tmp13 to i32
-  %tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
-  %tmp16 = load i32, i32* %tmp15, align 4
-  %tmp17 = icmp ne i32 %tmp16, 0
-  br i1 %tmp17, label %bb18, label %bb22
-
-bb18:                                             ; preds = %bb12
   ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
   ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
   ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
+  ; CHECK: [[bres2:%.*]] = and i1 [[cmp2]], [[cmp3]]
+  ; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  %tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp17 = icmp ne i32 %tmp16, 0
   %tmp19 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
   %tmp20 = load i32, i32* %tmp19, align 4
   %tmp21 = icmp ne i32 %tmp20, 0
-  br label %bb22
-
-bb22:                                             ; preds = %bb18, %bb12
+  %tmp23 = and i1 %tmp17, %tmp21
+  %tmp24 = zext i1 %tmp23 to i32
 
-  ; CHECK: [[bres2:%.*]] = phi i1 [ false, %bb12 ], [ [[cmp3]], %bb18 ]
-  ; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
   ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
   ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
   ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
-  %tmp23 = phi i1 [ false, %bb12 ], [ %tmp21, %bb18 ]
-  %tmp24 = zext i1 %tmp23 to i32
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  ; CHECK: [[cmp4:%.*]] = icmp ne i32 [[ld4]], 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  ; CHECK: [[cmp5:%.*]] = icmp ne i32 [[ld5]], 0
+  ; CHECK: [[bres3:%.*]] = select i1 [[cmp3]], i1 [[cmp4]], i1 [[cmp5]]
+  ; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
   %tmp25 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
   %tmp26 = load i32, i32* %tmp25, align 4
   %tmp27 = icmp ne i32 %tmp26, 0
-  br i1 %tmp27, label %bb28, label %bb31
-
-bb28:                                             ; preds = %bb22
-  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
-  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
   %tmp29 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
   %tmp30 = load i32, i32* %tmp29, align 4
-  br label %bb34
-
-bb31:                                             ; preds = %bb22
-  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
-  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  %tmp31 = icmp ne i32 %tmp30, 0
   %tmp32 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
   %tmp33 = load i32, i32* %tmp32, align 4
-  br label %bb34
-
-bb34:                                             ; preds = %bb31, %bb28
-  ; CHECK: [[res3:%.*]] = phi i32 [ [[ld4]], %bb28 ], [ [[ld5]], %bb31 ]
-  ; CHECK: [[bres3:%.*]] = icmp ne i32 [[res3]], 0
-  ; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
-  %.sink = phi i32 [ %tmp30, %bb28 ], [ %tmp33, %bb31 ]
-  %tmp35 = icmp ne i32 %.sink, 0
+  %tmp34 = icmp ne i32 %tmp33, 0
+  %tmp35 = select i1 %tmp27, i1 %tmp31, i1 %tmp34
   %tmp36 = zext i1 %tmp35 to i32
 
   ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0

From 1596d99a8a9d52093a6944b68b639be62aa943ea Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 24 Mar 2025 04:38:40 -0600
Subject: [PATCH 10/17] clang-format

---
 lib/HLSL/HLMatrixBitcastLowerPass.cpp | 2 +-
 lib/Transforms/Scalar/Scalarizer.cpp  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
index 2cb3824740..90cdd73cdd 100644
--- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp
+++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
@@ -189,7 +189,7 @@ void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
     User *U = *(it++);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       Type *EltTy = GEP->getType()->getPointerElementType();
-      if (HLMatrixType MatTy =  HLMatrixType::dyn_cast(EltTy)) {
+      if (HLMatrixType MatTy = HLMatrixType::dyn_cast(EltTy)) {
         // Change gep matrixArray, 0, index
         // into
         //   gep oneDimArray, 0, index * matSize
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 83a63d105e..df33a5f570 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -295,7 +295,7 @@ bool Scalarizer::doInitialization(Module &M) {
 bool Scalarizer::runOnFunction(Function &F) {
   Module *M = F.getParent();
   if (M->HasDxilModule() && M->GetDxilModule().GetShaderModel()->IsSM69Plus())
-      SupportsVectors = true;
+    SupportsVectors = true;
 
   for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
     BasicBlock *BB = BBI;

From 411b921b28ab43688ae5ef23c387073ddd182cd6 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 24 Mar 2025 05:10:04 -0600
Subject: [PATCH 11/17] fixup mistakenly submitted validation code

---
 lib/DxilValidation/DxilValidation.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index d9f48cc7e3..9ef5e17f80 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -2680,11 +2680,13 @@ static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I,
                                                    ValidationContext &ValCtx) {
   if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
     return true;
-  Instruction OpCode = I.getOpcode();
+  unsigned OpCode = I.getOpcode();
   if (OpCode == Instruction::InsertElement ||
       OpCode == Instruction::ExtractElement ||
       OpCode == Instruction::ShuffleVector)
     return false;
+
+  return true;
 }
 
 static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
@@ -2709,7 +2711,7 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
       }
 
       // Instructions must be allowed.
-      if (!IsLLVMInstructionAllowed(I)) {
+      if (!IsLLVMInstructionAllowed(I) || !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) {
         if (!IsLLVMInstructionAllowedForLib(I, ValCtx)) {
           ValCtx.EmitInstrError(&I, ValidationRule::InstrAllowed);
           continue;

From 8dc2a3e657d2ebf4a85842ecb6d8eee43a2a48c4 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 24 Mar 2025 05:11:21 -0600
Subject: [PATCH 12/17] clang-format

---
 lib/DxilValidation/DxilValidation.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index 9ef5e17f80..4622256dfe 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -2711,7 +2711,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
       }
 
       // Instructions must be allowed.
-      if (!IsLLVMInstructionAllowed(I) || !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) {
+      if (!IsLLVMInstructionAllowed(I) ||
+          !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) {
         if (!IsLLVMInstructionAllowedForLib(I, ValCtx)) {
           ValCtx.EmitInstrError(&I, ValidationRule::InstrAllowed);
           continue;

From 46829dd9aaf32dac0c41f092bb0087ea2bbb2a28 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 25 Mar 2025 11:21:03 -0600
Subject: [PATCH 13/17] Respond to feedback

Remove some unneeded parts of IR tests. Add some back. I discovered that
the pauseresume line is at least sometimes needed to construct the
module with the shader model information. I fear some of the tests were
passing before becuase they expected no changes, but were operating in
non-6.9 mode. So they passed, but for the wrong reason.

Remove unused DM param to matrixbitcastlowerpass lower matrix call.

reword confusing dynindexingvectortoarray comment
---
 lib/HLSL/HLMatrixBitcastLowerPass.cpp                | 11 +++++------
 lib/Transforms/Scalar/LowerTypePasses.cpp            |  5 +++--
 .../passes/longvec-alloca-gv-dynvec2array.ll         | 12 ++----------
 .../CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll     |  8 ++------
 .../passes/longvec-operators-scalarizer.ll           |  4 ----
 .../passes/longvec-operators-vec1-scalarizer.ll      |  3 +--
 .../passes/dxil/lower_type/vec_array_param.ll        |  2 +-
 7 files changed, 14 insertions(+), 31 deletions(-)

diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
index 90cdd73cdd..db20d8a324 100644
--- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp
+++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
@@ -116,13 +116,13 @@ class MatrixBitcastLowerPass : public FunctionPass {
 
     // Lower matrix first.
     for (BitCastInst *BCI : matCastSet) {
-      lowerMatrix(DM, BCI, BCI->getOperand(0));
+      lowerMatrix(BCI, BCI->getOperand(0));
     }
     return bUpdated;
   }
 
 private:
-  void lowerMatrix(DxilModule &DM, Instruction *M, Value *A);
+  void lowerMatrix(Instruction *M, Value *A);
   bool hasCallUser(Instruction *M);
 };
 
@@ -183,8 +183,7 @@ Value *CreateEltGEP(Value *A, unsigned i, Value *zeroIdx,
 }
 } // namespace
 
-void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
-                                         Value *A) {
+void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
   for (auto it = M->user_begin(); it != M->user_end();) {
     User *U = *(it++);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
@@ -203,14 +202,14 @@ void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
           idxList.back() = Builder.CreateMul(idxList.back(), MatSize);
         }
         Value *NewGEP = Builder.CreateGEP(A, idxList);
-        lowerMatrix(DM, GEP, NewGEP);
+        lowerMatrix(GEP, NewGEP);
         DXASSERT(GEP->user_empty(), "else lower matrix fail");
         GEP->eraseFromParent();
       } else {
         DXASSERT(0, "invalid GEP for matrix");
       }
     } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
-      lowerMatrix(DM, BCI, A);
+      lowerMatrix(BCI, A);
       DXASSERT(BCI->user_empty(), "else lower matrix fail");
       BCI->eraseFromParent();
     } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp
index 7dada4277e..6ce7ba0431 100644
--- a/lib/Transforms/Scalar/LowerTypePasses.cpp
+++ b/lib/Transforms/Scalar/LowerTypePasses.cpp
@@ -216,10 +216,11 @@ void DynamicIndexingVectorToArray::initialize(Module &M) {
   //  - From standard compile before dxilgen.
   //  - When linking, where dxmodule is available.
   //  - In isolated dxopt, where the module will need to be created.
-  // Since HL module can't be created when linking, check for that first.
+  // When linking, we expect a dxil module and can't create an HL module,
+  //  so we try for the dxil module first.
   // Otherwise, either retrieve or generate the HL module.
   if (M.HasDxilModule()) {
-    SupportsVectors = M.GetDxilModule().GetShaderModel()->IsSM69Plus();
+    SupportsVectors = M.GetOrCreateDxilModule().GetShaderModel()->IsSM69Plus();
   } else {
     HLModule &HLM = M.GetOrCreateHLModule();
     SupportsVectors = HLM.GetShaderModel()->IsSM69Plus();
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
index a811ff9f47..5245f1e223 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
@@ -74,7 +74,7 @@ bb:
   %stloc1 = alloca <1 x float>, align 4
   %stloc2 = alloca <2 x float>, align 4
   %stlar2 = alloca [4 x <2 x float>], align 4
-  store i32 %ix, i32* %tmp, align 4, !tbaa !22
+  store i32 %ix, i32* %tmp, align 4
 
   %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7
   %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10
@@ -265,6 +265,7 @@ bb76:                                             ; preds = %bb17, %bb
 
 attributes #0 = { nounwind }
 
+!pauseresume = !{!1}
 !dx.version = !{!3}
 !dx.valver = !{!3}
 !dx.shaderModel = !{!4}
@@ -273,9 +274,7 @@ attributes #0 = { nounwind }
 !dx.fnprops = !{}
 !dx.options = !{!20, !21}
 
-!0 = !{i32 2, !"Debug Info Version", i32 3}
 !1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
-!2 = !{!"dxc(private) 1.8.0.4845 (disable_disble_spirv, 2514104b9-dirty)"}
 !3 = !{i32 1, i32 9}
 !4 = !{!"lib", i32 6, i32 9}
 !5 = !{i32 0, %struct.VectRec1 undef, !6, %struct.VectRec2 undef, !8}
@@ -295,10 +294,3 @@ attributes #0 = { nounwind }
 !19 = !{null, !"", null, null, null}
 !20 = !{i32 64}
 !21 = !{i32 -1}
-!22 = !{!23, !23, i64 0}
-!23 = !{!"int", !24, i64 0}
-!24 = !{!"omnipotent char", !25, i64 0}
-!25 = !{!"Simple C/C++ TBAA"}
-!44 = !{!45, !45, i64 0}
-!45 = !{!"float", !24, i64 0}
-!148 = !{!24, !24, i64 0}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
index a3ba294c62..95a64a17d4 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
@@ -82,7 +82,7 @@ bb:
   %stlorc1 = alloca %struct.VectRec1, align 4
   %stlorc2 = alloca %struct.VectRec2, align 4
 
-  store i32 %ix, i32* %tmp, align 4, !tbaa !25
+  store i32 %ix, i32* %tmp, align 4
   %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7
   %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10
   %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10
@@ -286,6 +286,7 @@ bb86:                                             ; preds = %bb17, %bb
 
 attributes #0 = { nounwind }
 
+!pauseresume = !{!1}
 !dx.version = !{!3}
 !dx.valver = !{!3}
 !dx.shaderModel = !{!4}
@@ -294,9 +295,7 @@ attributes #0 = { nounwind }
 !dx.fnprops = !{}
 !dx.options = !{!23, !24}
 
-!0 = !{i32 2, !"Debug Info Version", i32 3}
 !1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
-!2 = !{!"dxc(private) 1.8.0.4845 (disable_disble_spirv, 2514104b9-dirty)"}
 !3 = !{i32 1, i32 9}
 !4 = !{!"lib", i32 6, i32 9}
 !5 = !{i32 0, %struct.VectRec1 undef, !6, %struct.VectRec2 undef, !8}
@@ -323,6 +322,3 @@ attributes #0 = { nounwind }
 !26 = !{!"int", !27, i64 0}
 !27 = !{!"omnipotent char", !28, i64 0}
 !28 = !{!"Simple C/C++ TBAA"}
-!47 = !{!48, !48, i64 0}
-!48 = !{!"float", !27, i64 0}
-!155 = !{!27, !27, i64 0}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
index 77a5c0681c..4da59671bc 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
@@ -655,9 +655,7 @@ attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
 
-!llvm.module.flags = !{!0}
 !pauseresume = !{!1}
-!llvm.ident = !{!2}
 !dx.version = !{!3}
 !dx.valver = !{!3}
 !dx.shaderModel = !{!4}
@@ -665,9 +663,7 @@ attributes #2 = { nounwind readnone }
 !dx.typeAnnotations = !{!9, !15}
 !dx.entryPoints = !{!35}
 
-!0 = !{i32 2, !"Debug Info Version", i32 3}
 !1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"}
-!2 = !{!"dxc(private) 1.8.0.4845 (disable_disble_spirv, 2514104b9-dirty)"}
 !3 = !{i32 1, i32 9}
 !4 = !{!"lib", i32 6, i32 9}
 !5 = !{null, !6, null, null}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
index e5b6cf2dda..8ef8f34bcb 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
@@ -741,6 +741,7 @@ attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readonly }
 
+!pauseresume = !{!1}
 !dx.version = !{!3}
 !dx.valver = !{!3}
 !dx.shaderModel = !{!4}
@@ -748,9 +749,7 @@ attributes #2 = { nounwind readonly }
 !dx.typeAnnotations = !{!9, !15}
 !dx.entryPoints = !{!35}
 
-!0 = !{i32 2, !"Debug Info Version", i32 3}
 !1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"}
-!2 = !{!"dxc(private) 1.8.0.4807 (longvec_bab_ldst, 88cfe61c3-dirty)"}
 !3 = !{i32 1, i32 9}
 !4 = !{!"lib", i32 6, i32 9}
 !5 = !{null, !6, null, null}
diff --git a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
index a7b7a90012..cd182b1dfd 100644
--- a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
+++ b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
@@ -31,6 +31,7 @@ declare float @"\01?foo@@YAMY02V?$vector@M$02@@@Z"([3 x <3 x float>]*)
 
 attributes #0 = { nounwind }
 
+!pauseresume = !{!1}
 !dx.version = !{!3}
 !dx.valver = !{!4}
 !dx.shaderModel = !{!5}
@@ -39,7 +40,6 @@ attributes #0 = { nounwind }
 !dx.fnprops = !{}
 !dx.options = !{!13, !14}
 
-!0 = !{i32 2, !"Debug Info Version", i32 3}
 !1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
 !3 = !{i32 1, i32 6}
 !4 = !{i32 1, i32 6}

From 9002beeb96fb7a5e164e571fa2a5a594252b42b5 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 25 Mar 2025 17:03:04 -0600
Subject: [PATCH 14/17] Allow loading DXIL shader model without module

Adds a dxilutil function to retrieve shader model from metadata. To
enable this, the metadata helper implementation is split into a few
utility functions which will continue to throw the exceptions as they
did before, but allows passes to call them without fear of crashes as a
result of that. Passes will assume the default shader model where it
can't retrieve it for one reason or another.

Changes scalarizer to try to retrieve the shader module when no module
is found. Changes dyanmicvector to array to use the utility function and
never try to recreate the hlmodule. Instead it will try the dxilmodule
and if that isn't present, it will use the metadata for shader model
only.
---
 include/dxc/DXIL/DxilMetadataHelper.h     |  3 ++
 include/dxc/DXIL/DxilUtil.h               |  4 +++
 lib/DXIL/DxilMetadataHelper.cpp           | 44 ++++++++++++++++-------
 lib/DXIL/DxilUtil.cpp                     | 11 ++++++
 lib/Transforms/Scalar/LowerTypePasses.cpp | 22 +++++-------
 lib/Transforms/Scalar/Scalarizer.cpp      |  7 ++--
 6 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/include/dxc/DXIL/DxilMetadataHelper.h b/include/dxc/DXIL/DxilMetadataHelper.h
index fa13f6d766..f15c5084fc 100644
--- a/include/dxc/DXIL/DxilMetadataHelper.h
+++ b/include/dxc/DXIL/DxilMetadataHelper.h
@@ -435,6 +435,7 @@ class DxilMDHelper {
   // Shader model.
   void EmitDxilShaderModel(const ShaderModel *pSM);
   void LoadDxilShaderModel(const ShaderModel *&pSM);
+  static const ShaderModel *LoadDxilShaderModel(const llvm::Module *pModule);
 
   // Intermediate flags
   void EmitDxilIntermediateOptions(uint32_t flags);
@@ -687,6 +688,8 @@ class DxilMDHelper {
   static void
   CopyMetadata(llvm::Instruction &I, llvm::Instruction &SrcInst,
                llvm::ArrayRef<unsigned> WL = llvm::ArrayRef<unsigned>());
+  static bool
+  LoadShaderModelName(const llvm::Module *pModule, std::string &str);
 
 private:
   llvm::LLVMContext &m_Ctx;
diff --git a/include/dxc/DXIL/DxilUtil.h b/include/dxc/DXIL/DxilUtil.h
index 490f335db5..d026ce41bc 100644
--- a/include/dxc/DXIL/DxilUtil.h
+++ b/include/dxc/DXIL/DxilUtil.h
@@ -221,6 +221,10 @@ bool DeleteDeadAllocas(llvm::Function &F);
 llvm::Value *GEPIdxToOffset(llvm::GetElementPtrInst *GEP,
                             llvm::IRBuilder<> &Builder, hlsl::OP *OP,
                             const llvm::DataLayout &DL);
+
+// Returns shader model appropriate to given module.
+const ShaderModel *LoadShaderModel(const llvm::Module &M);
+
 } // namespace dxilutil
 
 } // namespace hlsl
diff --git a/lib/DXIL/DxilMetadataHelper.cpp b/lib/DXIL/DxilMetadataHelper.cpp
index fdd6d6b946..dbb4dd4d05 100644
--- a/lib/DXIL/DxilMetadataHelper.cpp
+++ b/lib/DXIL/DxilMetadataHelper.cpp
@@ -261,31 +261,49 @@ void DxilMDHelper::EmitDxilShaderModel(const ShaderModel *pSM) {
   SetShaderModel(pSM);
 }
 
-void DxilMDHelper::LoadDxilShaderModel(const ShaderModel *&pSM) {
+// Retrieve the name string of the shader model for the given module.
+// Returns true and passes shader model target string through str if valid.
+// Returns false if metadata is missing or invalid.
+bool DxilMDHelper::LoadShaderModelName(const Module *pModule, string &Name) {
   NamedMDNode *pShaderModelNamedMD =
-      m_pModule->getNamedMetadata(kDxilShaderModelMDName);
-  IFTBOOL(pShaderModelNamedMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
-  IFTBOOL(pShaderModelNamedMD->getNumOperands() == 1,
-          DXC_E_INCORRECT_DXIL_METADATA);
+      pModule->getNamedMetadata(kDxilShaderModelMDName);
+  IFRBOOL(pShaderModelNamedMD != nullptr, false);
+  IFRBOOL(pShaderModelNamedMD->getNumOperands() == 1, false);
 
   MDNode *pShaderModelMD = pShaderModelNamedMD->getOperand(0);
-  IFTBOOL(pShaderModelMD->getNumOperands() == kDxilShaderModelNumFields,
-          DXC_E_INCORRECT_DXIL_METADATA);
+  IFRBOOL(pShaderModelMD->getNumOperands() == kDxilShaderModelNumFields, false);
 
   MDString *pShaderTypeMD =
       dyn_cast<MDString>(pShaderModelMD->getOperand(kDxilShaderModelTypeIdx));
-  IFTBOOL(pShaderTypeMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  IFRBOOL(pShaderTypeMD != nullptr, false);
   unsigned Major =
       ConstMDToUint32(pShaderModelMD->getOperand(kDxilShaderModelMajorIdx));
   unsigned Minor =
       ConstMDToUint32(pShaderModelMD->getOperand(kDxilShaderModelMinorIdx));
-  string ShaderModelName = pShaderTypeMD->getString().str();
-  ShaderModelName +=
-      "_" + std::to_string(Major) + "_" +
-      (Minor == ShaderModel::kOfflineMinor ? "x" : std::to_string(Minor));
-  pSM = ShaderModel::GetByName(ShaderModelName.c_str());
+  Name = pShaderTypeMD->getString().str();
+  Name += "_" + std::to_string(Major) + "_" +
+          (Minor == ShaderModel::kOfflineMinor ? "x" : std::to_string(Minor));
+  return true;
+}
+
+// Load shader model object from metadata contained in pModule.
+// Throws exceptions if any metadata is invalid or the values
+// of the shader model are invalid.
+const ShaderModel *DxilMDHelper::LoadDxilShaderModel(const Module *pModule) {
+  string ShaderModelName;
+  IFRBOOL(LoadShaderModelName(pModule, ShaderModelName), nullptr);
+  return ShaderModel::GetByName(ShaderModelName.c_str());
+}
+
+// Load shader model object from metadata MDHelper's module
+// and set it as current for MDHelper.
+void DxilMDHelper::LoadDxilShaderModel(const ShaderModel *&pSM) {
+  pSM = LoadDxilShaderModel(m_pModule);
+  IFTBOOL(pSM != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
   if (!pSM->IsValidForDxil()) {
     char ErrorMsgTxt[40];
+    string ShaderModelName;
+    LoadShaderModelName(m_pModule, ShaderModelName);
     StringCchPrintfA(ErrorMsgTxt, _countof(ErrorMsgTxt),
                      "Unknown shader model '%s'", ShaderModelName.c_str());
     string ErrorMsg(ErrorMsgTxt);
diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp
index 865fad487c..415a585644 100644
--- a/lib/DXIL/DxilUtil.cpp
+++ b/lib/DXIL/DxilUtil.cpp
@@ -1394,5 +1394,16 @@ bool DeleteDeadAllocas(llvm::Function &F) {
   return Changed;
 }
 
+// Retrieve stored shader model in the given module.
+// Where the module doesn't have HL nor Dxil modules,
+// it identifies and returns the shader model from the module metatdata.
+// Returns nullptr where none of that works, but that shouldn't happen much.
+const ShaderModel *LoadShaderModel(const llvm::Module &M) {
+  if (M.HasDxilModule())
+    return M.GetDxilModule().GetShaderModel();
+
+  return DxilMDHelper::LoadDxilShaderModel(&M);
+}
+
 } // namespace dxilutil
 } // namespace hlsl
diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp
index 6ce7ba0431..c3f1f3b8e9 100644
--- a/lib/Transforms/Scalar/LowerTypePasses.cpp
+++ b/lib/Transforms/Scalar/LowerTypePasses.cpp
@@ -212,19 +212,15 @@ class DynamicIndexingVectorToArray : public LowerTypePass {
 };
 
 void DynamicIndexingVectorToArray::initialize(Module &M) {
-  // Can be invoked in a few places:
-  //  - From standard compile before dxilgen.
-  //  - When linking, where dxmodule is available.
-  //  - In isolated dxopt, where the module will need to be created.
-  // When linking, we expect a dxil module and can't create an HL module,
-  //  so we try for the dxil module first.
-  // Otherwise, either retrieve or generate the HL module.
-  if (M.HasDxilModule()) {
-    SupportsVectors = M.GetOrCreateDxilModule().GetShaderModel()->IsSM69Plus();
-  } else {
-    HLModule &HLM = M.GetOrCreateHLModule();
-    SupportsVectors = HLM.GetShaderModel()->IsSM69Plus();
-  }
+  // Set vector support according to available shader model.
+  // Use HLModule shader model if present.
+  // Otherwise retrieve from dxil module or metadata.
+  const ShaderModel *SM = nullptr;
+  if (M.HasHLModule())
+    SM = M.GetHLModule().GetShaderModel();
+  else
+    SM = dxilutil::LoadShaderModel(M);
+  SupportsVectors = SM && SM->IsSM69Plus();
 }
 
 void DynamicIndexingVectorToArray::applyOptions(PassOptions O) {
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index df33a5f570..36b31f0de4 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "dxc/DXIL/DxilModule.h"
+#include "dxc/DXIL/DxilUtil.h"
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/IRBuilder.h"
@@ -293,9 +294,9 @@ bool Scalarizer::doInitialization(Module &M) {
 }
 
 bool Scalarizer::runOnFunction(Function &F) {
-  Module *M = F.getParent();
-  if (M->HasDxilModule() && M->GetDxilModule().GetShaderModel()->IsSM69Plus())
-    SupportsVectors = true;
+  const Module *M = F.getParent();
+  const hlsl::ShaderModel *SM = hlsl::dxilutil::LoadShaderModel(*M);
+  SupportsVectors = SM && SM->IsSM69Plus();
 
   for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
     BasicBlock *BB = BBI;

From ba831c07d1788f9b74dc17b91029c9d0f9c020e5 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 25 Mar 2025 17:12:13 -0600
Subject: [PATCH 15/17] small clang-format change

---
 include/dxc/DXIL/DxilMetadataHelper.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/dxc/DXIL/DxilMetadataHelper.h b/include/dxc/DXIL/DxilMetadataHelper.h
index f15c5084fc..f7d78f346a 100644
--- a/include/dxc/DXIL/DxilMetadataHelper.h
+++ b/include/dxc/DXIL/DxilMetadataHelper.h
@@ -688,8 +688,8 @@ class DxilMDHelper {
   static void
   CopyMetadata(llvm::Instruction &I, llvm::Instruction &SrcInst,
                llvm::ArrayRef<unsigned> WL = llvm::ArrayRef<unsigned>());
-  static bool
-  LoadShaderModelName(const llvm::Module *pModule, std::string &str);
+  static bool LoadShaderModelName(const llvm::Module *pModule,
+                                  std::string &str);
 
 private:
   llvm::LLVMContext &m_Ctx;

From 7292edba3eb7cc56e08289e25090f50616e8ac0a Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 25 Mar 2025 18:42:03 -0600
Subject: [PATCH 16/17] Retrieve dxil version instead of shader model from
 metadata

---
 include/dxc/DXIL/DxilMetadataHelper.h     |  5 +-
 include/dxc/DXIL/DxilUtil.h               |  4 +-
 lib/DXIL/DxilMetadataHelper.cpp           | 67 ++++++++++-------------
 lib/DXIL/DxilUtil.cpp                     | 20 ++++---
 lib/Transforms/Scalar/LowerTypePasses.cpp | 12 ++--
 lib/Transforms/Scalar/Scalarizer.cpp      |  7 ++-
 6 files changed, 56 insertions(+), 59 deletions(-)

diff --git a/include/dxc/DXIL/DxilMetadataHelper.h b/include/dxc/DXIL/DxilMetadataHelper.h
index f7d78f346a..9df155e6e7 100644
--- a/include/dxc/DXIL/DxilMetadataHelper.h
+++ b/include/dxc/DXIL/DxilMetadataHelper.h
@@ -427,6 +427,8 @@ class DxilMDHelper {
   // Dxil version.
   void EmitDxilVersion(unsigned Major, unsigned Minor);
   void LoadDxilVersion(unsigned &Major, unsigned &Minor);
+  static bool LoadDxilVersion(const llvm::Module *pModule, unsigned &Major,
+                              unsigned &Minor);
 
   // Validator version.
   void EmitValidatorVersion(unsigned Major, unsigned Minor);
@@ -435,7 +437,6 @@ class DxilMDHelper {
   // Shader model.
   void EmitDxilShaderModel(const ShaderModel *pSM);
   void LoadDxilShaderModel(const ShaderModel *&pSM);
-  static const ShaderModel *LoadDxilShaderModel(const llvm::Module *pModule);
 
   // Intermediate flags
   void EmitDxilIntermediateOptions(uint32_t flags);
@@ -688,8 +689,6 @@ class DxilMDHelper {
   static void
   CopyMetadata(llvm::Instruction &I, llvm::Instruction &SrcInst,
                llvm::ArrayRef<unsigned> WL = llvm::ArrayRef<unsigned>());
-  static bool LoadShaderModelName(const llvm::Module *pModule,
-                                  std::string &str);
 
 private:
   llvm::LLVMContext &m_Ctx;
diff --git a/include/dxc/DXIL/DxilUtil.h b/include/dxc/DXIL/DxilUtil.h
index d026ce41bc..bffc79d6bf 100644
--- a/include/dxc/DXIL/DxilUtil.h
+++ b/include/dxc/DXIL/DxilUtil.h
@@ -222,8 +222,8 @@ llvm::Value *GEPIdxToOffset(llvm::GetElementPtrInst *GEP,
                             llvm::IRBuilder<> &Builder, hlsl::OP *OP,
                             const llvm::DataLayout &DL);
 
-// Returns shader model appropriate to given module.
-const ShaderModel *LoadShaderModel(const llvm::Module &M);
+// Passes back Dxil version of the given module on true return.
+bool LoadDxilVersion(const llvm::Module *M, unsigned &Major, unsigned &Minor);
 
 } // namespace dxilutil
 
diff --git a/lib/DXIL/DxilMetadataHelper.cpp b/lib/DXIL/DxilMetadataHelper.cpp
index dbb4dd4d05..19d199ee29 100644
--- a/lib/DXIL/DxilMetadataHelper.cpp
+++ b/lib/DXIL/DxilMetadataHelper.cpp
@@ -177,17 +177,28 @@ void DxilMDHelper::EmitDxilVersion(unsigned Major, unsigned Minor) {
   pDxilVersionMD->addOperand(MDNode::get(m_Ctx, MDVals));
 }
 
-void DxilMDHelper::LoadDxilVersion(unsigned &Major, unsigned &Minor) {
-  NamedMDNode *pDxilVersionMD = m_pModule->getNamedMetadata(kDxilVersionMDName);
-  IFTBOOL(pDxilVersionMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
-  IFTBOOL(pDxilVersionMD->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
+// Load dxil version from metadata contained in pModule.
+// Returns true and passes result through
+// the dxil major/minor version params if valid.
+// Returns false if metadata is missing or invalid.
+bool DxilMDHelper::LoadDxilVersion(const Module *pModule, unsigned &Major,
+                                   unsigned &Minor) {
+  NamedMDNode *pDxilVersionMD = pModule->getNamedMetadata(kDxilVersionMDName);
+  IFRBOOL(pDxilVersionMD != nullptr, false);
+  IFRBOOL(pDxilVersionMD->getNumOperands() == 1, false);
 
   MDNode *pVersionMD = pDxilVersionMD->getOperand(0);
-  IFTBOOL(pVersionMD->getNumOperands() == kDxilVersionNumFields,
-          DXC_E_INCORRECT_DXIL_METADATA);
+  IFRBOOL(pVersionMD->getNumOperands() == kDxilVersionNumFields, false);
 
   Major = ConstMDToUint32(pVersionMD->getOperand(kDxilVersionMajorIdx));
   Minor = ConstMDToUint32(pVersionMD->getOperand(kDxilVersionMinorIdx));
+
+  return true;
+}
+
+void DxilMDHelper::LoadDxilVersion(unsigned &Major, unsigned &Minor) {
+  IFTBOOL(LoadDxilVersion(m_pModule, Major, Minor),
+          DXC_E_INCORRECT_DXIL_METADATA);
 }
 
 //
@@ -261,49 +272,31 @@ void DxilMDHelper::EmitDxilShaderModel(const ShaderModel *pSM) {
   SetShaderModel(pSM);
 }
 
-// Retrieve the name string of the shader model for the given module.
-// Returns true and passes shader model target string through str if valid.
-// Returns false if metadata is missing or invalid.
-bool DxilMDHelper::LoadShaderModelName(const Module *pModule, string &Name) {
+void DxilMDHelper::LoadDxilShaderModel(const ShaderModel *&pSM) {
   NamedMDNode *pShaderModelNamedMD =
-      pModule->getNamedMetadata(kDxilShaderModelMDName);
-  IFRBOOL(pShaderModelNamedMD != nullptr, false);
-  IFRBOOL(pShaderModelNamedMD->getNumOperands() == 1, false);
+      m_pModule->getNamedMetadata(kDxilShaderModelMDName);
+  IFTBOOL(pShaderModelNamedMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  IFTBOOL(pShaderModelNamedMD->getNumOperands() == 1,
+          DXC_E_INCORRECT_DXIL_METADATA);
 
   MDNode *pShaderModelMD = pShaderModelNamedMD->getOperand(0);
-  IFRBOOL(pShaderModelMD->getNumOperands() == kDxilShaderModelNumFields, false);
+  IFTBOOL(pShaderModelMD->getNumOperands() == kDxilShaderModelNumFields,
+          DXC_E_INCORRECT_DXIL_METADATA);
 
   MDString *pShaderTypeMD =
       dyn_cast<MDString>(pShaderModelMD->getOperand(kDxilShaderModelTypeIdx));
-  IFRBOOL(pShaderTypeMD != nullptr, false);
+  IFTBOOL(pShaderTypeMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
   unsigned Major =
       ConstMDToUint32(pShaderModelMD->getOperand(kDxilShaderModelMajorIdx));
   unsigned Minor =
       ConstMDToUint32(pShaderModelMD->getOperand(kDxilShaderModelMinorIdx));
-  Name = pShaderTypeMD->getString().str();
-  Name += "_" + std::to_string(Major) + "_" +
-          (Minor == ShaderModel::kOfflineMinor ? "x" : std::to_string(Minor));
-  return true;
-}
-
-// Load shader model object from metadata contained in pModule.
-// Throws exceptions if any metadata is invalid or the values
-// of the shader model are invalid.
-const ShaderModel *DxilMDHelper::LoadDxilShaderModel(const Module *pModule) {
-  string ShaderModelName;
-  IFRBOOL(LoadShaderModelName(pModule, ShaderModelName), nullptr);
-  return ShaderModel::GetByName(ShaderModelName.c_str());
-}
-
-// Load shader model object from metadata MDHelper's module
-// and set it as current for MDHelper.
-void DxilMDHelper::LoadDxilShaderModel(const ShaderModel *&pSM) {
-  pSM = LoadDxilShaderModel(m_pModule);
-  IFTBOOL(pSM != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  string ShaderModelName = pShaderTypeMD->getString().str();
+  ShaderModelName +=
+      "_" + std::to_string(Major) + "_" +
+      (Minor == ShaderModel::kOfflineMinor ? "x" : std::to_string(Minor));
+  pSM = ShaderModel::GetByName(ShaderModelName.c_str());
   if (!pSM->IsValidForDxil()) {
     char ErrorMsgTxt[40];
-    string ShaderModelName;
-    LoadShaderModelName(m_pModule, ShaderModelName);
     StringCchPrintfA(ErrorMsgTxt, _countof(ErrorMsgTxt),
                      "Unknown shader model '%s'", ShaderModelName.c_str());
     string ErrorMsg(ErrorMsgTxt);
diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp
index 415a585644..7d57895491 100644
--- a/lib/DXIL/DxilUtil.cpp
+++ b/lib/DXIL/DxilUtil.cpp
@@ -1394,15 +1394,17 @@ bool DeleteDeadAllocas(llvm::Function &F) {
   return Changed;
 }
 
-// Retrieve stored shader model in the given module.
-// Where the module doesn't have HL nor Dxil modules,
-// it identifies and returns the shader model from the module metatdata.
-// Returns nullptr where none of that works, but that shouldn't happen much.
-const ShaderModel *LoadShaderModel(const llvm::Module &M) {
-  if (M.HasDxilModule())
-    return M.GetDxilModule().GetShaderModel();
-
-  return DxilMDHelper::LoadDxilShaderModel(&M);
+// Retrieve dxil version in the given module.
+// Where the module doesn't already have a Dxil module,
+// it identifies and returns the version info from the metatdata.
+// Returns false where none of that works, but that shouldn't happen much.
+bool LoadDxilVersion(const Module *M, unsigned &Major, unsigned &Minor) {
+  if (M->HasDxilModule()) {
+    M->GetDxilModule().GetShaderModel()->GetDxilVersion(Major, Minor);
+    return true;
+  }
+  // No module, try metadata.
+  return DxilMDHelper::LoadDxilVersion(M, Major, Minor);
 }
 
 } // namespace dxilutil
diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp
index c3f1f3b8e9..d2438c7e22 100644
--- a/lib/Transforms/Scalar/LowerTypePasses.cpp
+++ b/lib/Transforms/Scalar/LowerTypePasses.cpp
@@ -212,15 +212,15 @@ class DynamicIndexingVectorToArray : public LowerTypePass {
 };
 
 void DynamicIndexingVectorToArray::initialize(Module &M) {
-  // Set vector support according to available shader model.
-  // Use HLModule shader model if present.
+  // Set vector support according to available Dxil version.
+  // Use HLModule or metadata for version info.
   // Otherwise retrieve from dxil module or metadata.
-  const ShaderModel *SM = nullptr;
+  unsigned Major = 0, Minor = 0;
   if (M.HasHLModule())
-    SM = M.GetHLModule().GetShaderModel();
+    M.GetHLModule().GetShaderModel()->GetDxilVersion(Major, Minor);
   else
-    SM = dxilutil::LoadShaderModel(M);
-  SupportsVectors = SM && SM->IsSM69Plus();
+    dxilutil::LoadDxilVersion(&M, Major, Minor);
+  SupportsVectors = (Major == 1 && Minor >= 9);
 }
 
 void DynamicIndexingVectorToArray::applyOptions(PassOptions O) {
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 36b31f0de4..730354af99 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -294,9 +294,12 @@ bool Scalarizer::doInitialization(Module &M) {
 }
 
 bool Scalarizer::runOnFunction(Function &F) {
+  // HLSL Change start - set SupportsVectors
   const Module *M = F.getParent();
-  const hlsl::ShaderModel *SM = hlsl::dxilutil::LoadShaderModel(*M);
-  SupportsVectors = SM && SM->IsSM69Plus();
+  unsigned Major = 0, Minor = 0;
+  if (hlsl::dxilutil::LoadDxilVersion(M, Major, Minor))
+    SupportsVectors = (Major == 1 && Minor >= 9);
+  // HLSL Change end - set SupportsVectors
 
   for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
     BasicBlock *BB = BBI;

From 96ac2e27781fbe04036b9ac5b50a210f1dd955ac Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 25 Mar 2025 18:58:19 -0600
Subject: [PATCH 17/17] trim down IR passes

With the minimal metadata version requirement, these can be more
succinct
---
 .../passes/longvec-alloca-gv-dynvec2array.ll  | 31 +-------------
 .../passes/longvec-operators-scalarizer.ll    | 41 +-----------------
 .../longvec-operators-vec1-scalarizer.ll      | 42 +------------------
 .../passes/dxil/lower_type/vec_array_param.ll | 23 ----------
 4 files changed, 4 insertions(+), 133 deletions(-)

diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
index 5245f1e223..987f997a2a 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
@@ -1,4 +1,4 @@
-; RUN: %dxopt %s -hlsl-passes-resume -dynamic-vector-to-array,ReplaceAllVectors=0 -S | FileCheck %s
+; RUN: %dxopt %s -dynamic-vector-to-array,ReplaceAllVectors=0 -S | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-ms-dx"
@@ -66,7 +66,7 @@ bb:
   %dylorc2.0 = alloca <2 x float>
   %stlorc2.0 = alloca <2 x float>
   %stlar1.0 = alloca [3 x float]
-  %tmp = alloca i32, align 4, !dx.temp !14
+  %tmp = alloca i32, align 4
   %dyloc1 = alloca <1 x float>, align 4
   %dyloc2 = alloca <2 x float>, align 4
   %dylar1 = alloca [3 x <1 x float>], align 4
@@ -265,32 +265,5 @@ bb76:                                             ; preds = %bb17, %bb
 
 attributes #0 = { nounwind }
 
-!pauseresume = !{!1}
 !dx.version = !{!3}
-!dx.valver = !{!3}
-!dx.shaderModel = !{!4}
-!dx.typeAnnotations = !{!5, !10}
-!dx.entryPoints = !{!19}
-!dx.fnprops = !{}
-!dx.options = !{!20, !21}
-
-!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
 !3 = !{i32 1, i32 9}
-!4 = !{!"lib", i32 6, i32 9}
-!5 = !{i32 0, %struct.VectRec1 undef, !6, %struct.VectRec2 undef, !8}
-!6 = !{i32 4, !7}
-!7 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC1", i32 7, i32 9, i32 13, i32 1}
-!8 = !{i32 8, !9}
-!9 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC2", i32 7, i32 9, i32 13, i32 2}
-!10 = !{i32 1, <4 x float> (i32, [12 x float]*)* @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z", !11}
-!11 = !{!12, !15, !17}
-!12 = !{i32 1, !13, !14}
-!13 = !{i32 7, i32 9, i32 13, i32 4}
-!14 = !{}
-!15 = !{i32 0, !16, !14}
-!16 = !{i32 4, !"IX", i32 7, i32 4}
-!17 = !{i32 0, !18, !14}
-!18 = !{i32 4, !"VAL", i32 7, i32 9}
-!19 = !{null, !"", null, null, null}
-!20 = !{i32 64}
-!21 = !{i32 -1}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
index 4da59671bc..1fe7c17621 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
@@ -1,4 +1,4 @@
-; RUN: %dxopt %s -hlsl-passes-resume -scalarizer -S | FileCheck %s
+; RUN: %dxopt %s -scalarizer -S | FileCheck %s
 
 ; Vectors of length greather than 1 should get no changes from scalarizer,
 ; so this unusual test, verifies that the pass makes no changes at all.
@@ -655,45 +655,6 @@ attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
 
-!pauseresume = !{!1}
 !dx.version = !{!3}
-!dx.valver = !{!3}
-!dx.shaderModel = !{!4}
-!dx.resources = !{!5}
-!dx.typeAnnotations = !{!9, !15}
-!dx.entryPoints = !{!35}
 
-!1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"}
 !3 = !{i32 1, i32 9}
-!4 = !{!"lib", i32 6, i32 9}
-!5 = !{null, !6, null, null}
-!6 = !{!7}
-!7 = !{i32 0, [7 x %"class.RWStructuredBuffer<float>"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A", !"buf", i32 -1, i32 -1, i32 7, i32 12, i1 false, i1 false, i1 false, !8}
-!8 = !{i32 1, i32 4}
-!9 = !{i32 0, %"class.RWStructuredBuffer<float>" undef, !10}
-!10 = !{i32 4, !11, !12}
-!11 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9}
-!12 = !{i32 0, !13}
-!13 = !{!14}
-!14 = !{i32 0, float undef}
-!15 = !{i32 1, void ([10 x <7 x float>]*)* @"\01?assignments@@YAXY09$$CAV?$vector@M$06@@@Z", !16, void ([11 x <7 x float>]*, [11 x <7 x float>]*)* @"\01?arithmetic@@YA$$BY0L@V?$vector@M$06@@Y0L@$$CAV1@@Z", !21, void ([10 x <7 x i32>]*, [10 x <7 x i32>]*, [10 x <7 x float>]*)* @"\01?logic@@YA$$BY09V?$vector@_N$06@@Y09V1@Y09V?$vector@M$06@@@Z", !24, void ([10 x <7 x float>]*, [10 x <7 x float>]*, i32)* @"\01?index@@YA$$BY09V?$vector@M$06@@Y09V1@H@Z", !29, void ([11 x <7 x i32>]*)* @"\01?bittwiddlers@@YAXY0L@$$CAV?$vector@I$06@@@Z", !32}
-!16 = !{!17, !19}
-!17 = !{i32 1, !18, !18}
-!18 = !{}
-!19 = !{i32 2, !20, !18}
-!20 = !{i32 7, i32 9, i32 13, i32 7}
-!21 = !{!22, !23, !19}
-!22 = !{i32 0, !18, !18}
-!23 = !{i32 1, !20, !18}
-!24 = !{!22, !25, !27, !28}
-!25 = !{i32 1, !26, !18}
-!26 = !{i32 7, i32 1, i32 13, i32 7}
-!27 = !{i32 0, !26, !18}
-!28 = !{i32 0, !20, !18}
-!29 = !{!22, !23, !28, !30}
-!30 = !{i32 0, !31, !18}
-!31 = !{i32 7, i32 4}
-!32 = !{!17, !33}
-!33 = !{i32 2, !34, !18}
-!34 = !{i32 7, i32 5, i32 13, i32 7}
-!35 = !{null, !"", null, !5, null}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
index 8ef8f34bcb..9734b85b12 100644
--- a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
@@ -1,4 +1,4 @@
-; RUN: %dxopt %s -hlsl-passes-resume -scalarizer -S | FileCheck %s
+; RUN: %dxopt %s -scalarizer -S | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-ms-dx"
@@ -741,45 +741,5 @@ attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readonly }
 
-!pauseresume = !{!1}
 !dx.version = !{!3}
-!dx.valver = !{!3}
-!dx.shaderModel = !{!4}
-!dx.resources = !{!5}
-!dx.typeAnnotations = !{!9, !15}
-!dx.entryPoints = !{!35}
-
-!1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"}
 !3 = !{i32 1, i32 9}
-!4 = !{!"lib", i32 6, i32 9}
-!5 = !{null, !6, null, null}
-!6 = !{!7}
-!7 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 1> >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A", !"buf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !8}
-!8 = !{i32 1, i32 4}
-!9 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 1> >" undef, !10}
-!10 = !{i32 4, !11, !12}
-!11 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9, i32 13, i32 1}
-!12 = !{i32 0, !13}
-!13 = !{!14}
-!14 = !{i32 0, <1 x float> undef}
-!15 = !{i32 1, void ([10 x <1 x float>]*)* @"\01?assignments@@YAXY09$$CAV?$vector@M$00@@@Z", !16, void ([11 x <1 x float>]*, [11 x <1 x float>]*)* @"\01?arithmetic@@YA$$BY0L@V?$vector@M$00@@Y0L@$$CAV1@@Z", !21, void ([10 x i32]*, [10 x i32]*, [10 x <1 x float>]*)* @"\01?logic@@YA$$BY09_NY09_NY09V?$vector@M$00@@@Z", !24, void ([10 x <1 x float>]*, [10 x <1 x float>]*, i32)* @"\01?index@@YA$$BY09V?$vector@M$00@@Y09V1@H@Z", !29, void ([11 x i32]*)* @"\01?bittwiddlers@@YAXY0L@$$CAI@Z", !32}
-!16 = !{!17, !19}
-!17 = !{i32 1, !18, !18}
-!18 = !{}
-!19 = !{i32 2, !20, !18}
-!20 = !{i32 7, i32 9, i32 13, i32 1}
-!21 = !{!22, !23, !19}
-!22 = !{i32 0, !18, !18}
-!23 = !{i32 1, !20, !18}
-!24 = !{!22, !25, !27, !28}
-!25 = !{i32 1, !26, !18}
-!26 = !{i32 7, i32 1}
-!27 = !{i32 0, !26, !18}
-!28 = !{i32 0, !20, !18}
-!29 = !{!22, !23, !28, !30}
-!30 = !{i32 0, !31, !18}
-!31 = !{i32 7, i32 4}
-!32 = !{!17, !33}
-!33 = !{i32 2, !34, !18}
-!34 = !{i32 7, i32 5}
-!35 = !{null, !"", null, !5, null}
diff --git a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
index cd182b1dfd..d5b0bbb2a7 100644
--- a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
+++ b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
@@ -30,26 +30,3 @@ entry:
 declare float @"\01?foo@@YAMY02V?$vector@M$02@@@Z"([3 x <3 x float>]*)
 
 attributes #0 = { nounwind }
-
-!pauseresume = !{!1}
-!dx.version = !{!3}
-!dx.valver = !{!4}
-!dx.shaderModel = !{!5}
-!dx.typeAnnotations = !{!6}
-!dx.entryPoints = !{!12}
-!dx.fnprops = !{}
-!dx.options = !{!13, !14}
-
-!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
-!3 = !{i32 1, i32 6}
-!4 = !{i32 1, i32 6}
-!5 = !{!"lib", i32 6, i32 6}
-!6 = !{i32 1, float ([3 x <3 x float>]*)* @"\01?bar@@YAMY02V?$vector@M$02@@@Z", !7, float ([3 x <3 x float>]*)* @"\01?foo@@YAMY02V?$vector@M$02@@@Z", !7}
-!7 = !{!8, !11}
-!8 = !{i32 1, !9, !10}
-!9 = !{i32 7, i32 9}
-!10 = !{}
-!11 = !{i32 0, !9, !10}
-!12 = !{null, !"", null, null, null}
-!13 = !{i32 64}
-!14 = !{i32 -1}