Respond to feedback

Greg Roth · Greg Roth · commit 6f7f9ec82475 · 2025-03-24T04:35:06.000-06:00
Initialize mattype with check for matrixness

remove leftover include

reword comment

refactor SM69 conditional to avoid double parent retrieval

Add test that confirms no short-circuiting with native vector logic ops and HLSL
2018.

Revise vec1 scalarizer test that was mistakenly generated with HLSL 2021
which included short-circuiting.

Add validation check for vector operations in pre-6.9
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
@@ -2672,6 +2672,21 @@ static bool IsLLVMInstructionAllowedForLib(Instruction &I,
   }
 }
 
+// Shader model specific checks for valid LLVM instructions.
+// Currently only checks for pre 6.9 usage of vector operations.
+// Returns false if shader model is pre 6.9 and I represents a vector
+// operation. Returns true otherwise.
+static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I,
+                                                   ValidationContext &ValCtx) {
+  if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
+    return true;
+  Instruction OpCode = I.getOpcode();
+  if (OpCode == Instruction::InsertElement ||
+      OpCode == Instruction::ExtractElement ||
+      OpCode == Instruction::ShuffleVector)
+    return false;
+}
+
 static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
   bool SupportsMinPrecision =
       ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision;
diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
@@ -189,15 +189,15 @@ void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
     User *U = *(it++);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       Type *EltTy = GEP->getType()->getPointerElementType();
-      if (HLMatrixType::isa(EltTy)) {
+      if (HLMatrixType MatTy =  HLMatrixType::dyn_cast(EltTy)) {
         // Change gep matrixArray, 0, index
         // into
         //   gep oneDimArray, 0, index * matSize
         IRBuilder<> Builder(GEP);
         SmallVector<Value *, 2> idxList(GEP->idx_begin(), GEP->idx_end());
         DXASSERT(idxList.size() == 2,
                  "else not one dim matrix array index to matrix");
-        unsigned NumElts = HLMatrixType::cast(EltTy).getNumElements();
+        unsigned NumElts = MatTy.getNumElements();
         if (!SupportsVectors || NumElts == 1) {
           Value *MatSize = Builder.getInt32(NumElts);
           idxList.back() = Builder.CreateMul(idxList.back(), MatSize);
diff --git a/lib/Transforms/Scalar/DxilEliminateVector.cpp b/lib/Transforms/Scalar/DxilEliminateVector.cpp
@@ -10,8 +10,6 @@
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
-#include "dxc/DXIL/DxilModule.h"
-
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
@@ -1949,7 +1949,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         continue;
       }
 
-      // Flatten Global vector if no dynamic vector indexing.
+      // Flatten global vector if it has no dynamic vector indexing.
       bool bFlatVector = !hasDynamicVectorIndexing(GV);
 
       if (bFlatVector) {
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -293,8 +293,8 @@ bool Scalarizer::doInitialization(Module &M) {
 }
 
 bool Scalarizer::runOnFunction(Function &F) {
-  if (F.getParent()->HasDxilModule())
-    if (F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus())
+  Module *M = F.getParent();
+  if (M->HasDxilModule() && M->GetDxilModule().GetShaderModel()->IsSM69Plus())
       SupportsVectors = true;
 
   for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl
@@ -0,0 +1,57 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s --check-prefix=NOBR
+
+// Test that no short-circuiting takes place for logic ops with native vectors.
+// First run verifies that side effects result in stores.
+// Second runline just makes sure there are no branches nor phis at all.
+
+// NOBR-NOT: br i1
+// NOBR-NOT: = phi
+
+export int4 logic(inout bool4 truth[5], inout int4 consequences[4]) {
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]]
+  // CHECK: [[add:%.*]] = add <4 x i32> [[vec1]], <i32 1, i32 1, i32 1, i32 1>
+  // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer
+  // CHECK: [[bres3:%.*]] = or <4 x i1> [[bvec1]], [[bvec0]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = zext <4 x i1> [[bres3]] to <4 x i32>
+  // CHECK: store <4 x i32> [[res3]], <4 x i32>* [[adr3]]
+  truth[3] = truth[0] || consequences[1]++;
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]]
+  // CHECK: [[sub:%.*]] = add <4 x i32> [[vec0]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer
+  // CHECK: [[bres4:%.*]] = and <4 x i1> [[bvec0]], [[bvec1]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = zext <4 x i1> [[bres4]] to <4 x i32>
+  // CHECK: store <4 x i32> [[res4]], <4 x i32>* [[adr4]]
+  truth[4] = truth[1] && consequences[0]--;
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]]
+  // CHECK: [[bcond:%.*]] = icmp ne <4 x i32> [[vec2]], zeroinitializer
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]]
+  // CHECK: [[add:%.*]] = add <4 x i32> %25, <i32 1, i32 1, i32 1, i32 1>
+  // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <4 x i32>, <4 x i32>* [[adr3]]
+  // CHECK: [[sub:%.*]] = add <4 x i32> [[vec3]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr3]]
+  // CHECK: [[res:%.*]] = select <4 x i1> [[bcond]], <4 x i32> [[vec2]], <4 x i32> [[vec3]]
+  int4 res = truth[2] ? consequences[2]++ : consequences[3]--;
+
+  // CHECK: ret <4 x i32> %30
+  return res;
+}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
@@ -306,76 +306,58 @@ bb:
   ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
   ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4
   ; CHECK: [[cmp1:%.*]] = icmp ne i32 [[ld1]], 0
-  %tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
-  %tmp6 = load i32, i32* %tmp5, align 4
-  %tmp7 = icmp ne i32 %tmp6, 0
-  br i1 %tmp7, label %bb12, label %bb8
-
-bb8:                                              ; preds = %bb
   ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
   ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
   ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
+  ; CHECK: [[bres1:%.*]] = or i1 [[cmp1]], [[cmp2]]
+  ; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  %tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  %tmp6 = load i32, i32* %tmp5, align 4
+  %tmp7 = icmp ne i32 %tmp6, 0
   %tmp9 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
   %tmp10 = load i32, i32* %tmp9, align 4
   %tmp11 = icmp ne i32 %tmp10, 0
-  br label %bb12
+  %tmp13 = or i1 %tmp7, %tmp11
+  %tmp14 = zext i1 %tmp13 to i32
 
-bb12:                                             ; preds = %bb8, %bb
-  ; CHECK: [[bres1:%.*]] = phi i1 [ true, %bb ], [ [[cmp2]], %bb8 ]
-  ; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
   ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
   ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
   ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
-  %tmp13 = phi i1 [ true, %bb ], [ %tmp11, %bb8 ]
-  %tmp14 = zext i1 %tmp13 to i32
-  %tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
-  %tmp16 = load i32, i32* %tmp15, align 4
-  %tmp17 = icmp ne i32 %tmp16, 0
-  br i1 %tmp17, label %bb18, label %bb22
-
-bb18:                                             ; preds = %bb12
   ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
   ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
   ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
+  ; CHECK: [[bres2:%.*]] = and i1 [[cmp2]], [[cmp3]]
+  ; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  %tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp17 = icmp ne i32 %tmp16, 0
   %tmp19 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
   %tmp20 = load i32, i32* %tmp19, align 4
   %tmp21 = icmp ne i32 %tmp20, 0
-  br label %bb22
-
-bb22:                                             ; preds = %bb18, %bb12
+  %tmp23 = and i1 %tmp17, %tmp21
+  %tmp24 = zext i1 %tmp23 to i32
 
-  ; CHECK: [[bres2:%.*]] = phi i1 [ false, %bb12 ], [ [[cmp3]], %bb18 ]
-  ; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
   ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
   ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
   ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
-  %tmp23 = phi i1 [ false, %bb12 ], [ %tmp21, %bb18 ]
-  %tmp24 = zext i1 %tmp23 to i32
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  ; CHECK: [[cmp4:%.*]] = icmp ne i32 [[ld4]], 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  ; CHECK: [[cmp5:%.*]] = icmp ne i32 [[ld5]], 0
+  ; CHECK: [[bres3:%.*]] = select i1 [[cmp3]], i1 [[cmp4]], i1 [[cmp5]]
+  ; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
   %tmp25 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
   %tmp26 = load i32, i32* %tmp25, align 4
   %tmp27 = icmp ne i32 %tmp26, 0
-  br i1 %tmp27, label %bb28, label %bb31
-
-bb28:                                             ; preds = %bb22
-  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
-  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
   %tmp29 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
   %tmp30 = load i32, i32* %tmp29, align 4
-  br label %bb34
-
-bb31:                                             ; preds = %bb22
-  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
-  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  %tmp31 = icmp ne i32 %tmp30, 0
   %tmp32 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
   %tmp33 = load i32, i32* %tmp32, align 4
-  br label %bb34
-
-bb34:                                             ; preds = %bb31, %bb28
-  ; CHECK: [[res3:%.*]] = phi i32 [ [[ld4]], %bb28 ], [ [[ld5]], %bb31 ]
-  ; CHECK: [[bres3:%.*]] = icmp ne i32 [[res3]], 0
-  ; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
-  %.sink = phi i32 [ %tmp30, %bb28 ], [ %tmp33, %bb31 ]
-  %tmp35 = icmp ne i32 %.sink, 0
+  %tmp34 = icmp ne i32 %tmp33, 0
+  %tmp35 = select i1 %tmp27, i1 %tmp31, i1 %tmp34
   %tmp36 = zext i1 %tmp35 to i32
 
   ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0

Original file line number	Diff line number	Diff line change
`@@ -1949,7 +1949,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {`
`1949`	`1949`	`continue;`
`1950`	`1950`	`}`
`1951`	`1951`
`1952`		`- // Flatten Global vector if no dynamic vector indexing.`
	`1952`	`+ // Flatten global vector if it has no dynamic vector indexing.`
`1953`	`1953`	`bool bFlatVector = !hasDynamicVectorIndexing(GV);`
`1954`	`1954`
`1955`	`1955`	`if (bFlatVector) {`
Original file line number	Diff line number	Diff line change
`@@ -293,8 +293,8 @@ bool Scalarizer::doInitialization(Module &M) {`
`293`	`293`	`}`
`294`	`294`
`295`	`295`	`bool Scalarizer::runOnFunction(Function &F) {`
`296`		`- if (F.getParent()->HasDxilModule())`
`297`		`- if (F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus())`
	`296`	`+ Module *M = F.getParent();`
	`297`	`+ if (M->HasDxilModule() && M->GetDxilModule().GetShaderModel()->IsSM69Plus())`
`298`	`298`	`SupportsVectors = true;`
`299`	`299`
`300`	`300`	`for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {`