Skip to content

Commit 6f7f9ec

Browse files
author
Greg Roth
committed
Respond to feedback
Initialize mattype with check for matrixness remove leftover include reword comment refactor SM69 conditional to avoid double parent retrieval Add test that confirms no short-circuiting with native vector logic ops and HLSL 2018. Revise vec1 scalarizer test that was mistakenly generated with HLSL 2021 which included short-circuiting. Add validation check for vector operations in pre-6.9
1 parent a3a39b8 commit 6f7f9ec

7 files changed

Lines changed: 102 additions & 50 deletions

File tree

lib/DxilValidation/DxilValidation.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2672,6 +2672,21 @@ static bool IsLLVMInstructionAllowedForLib(Instruction &I,
26722672
}
26732673
}
26742674

2675+
// Shader model specific checks for valid LLVM instructions.
2676+
// Currently only checks for pre 6.9 usage of vector operations.
2677+
// Returns false if shader model is pre 6.9 and I represents a vector
2678+
// operation. Returns true otherwise.
2679+
static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I,
2680+
ValidationContext &ValCtx) {
2681+
if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
2682+
return true;
2683+
Instruction OpCode = I.getOpcode();
2684+
if (OpCode == Instruction::InsertElement ||
2685+
OpCode == Instruction::ExtractElement ||
2686+
OpCode == Instruction::ShuffleVector)
2687+
return false;
2688+
}
2689+
26752690
static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
26762691
bool SupportsMinPrecision =
26772692
ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision;

lib/HLSL/HLMatrixBitcastLowerPass.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,15 +189,15 @@ void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M,
189189
User *U = *(it++);
190190
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
191191
Type *EltTy = GEP->getType()->getPointerElementType();
192-
if (HLMatrixType::isa(EltTy)) {
192+
if (HLMatrixType MatTy = HLMatrixType::dyn_cast(EltTy)) {
193193
// Change gep matrixArray, 0, index
194194
// into
195195
// gep oneDimArray, 0, index * matSize
196196
IRBuilder<> Builder(GEP);
197197
SmallVector<Value *, 2> idxList(GEP->idx_begin(), GEP->idx_end());
198198
DXASSERT(idxList.size() == 2,
199199
"else not one dim matrix array index to matrix");
200-
unsigned NumElts = HLMatrixType::cast(EltTy).getNumElements();
200+
unsigned NumElts = MatTy.getNumElements();
201201
if (!SupportsVectors || NumElts == 1) {
202202
Value *MatSize = Builder.getInt32(NumElts);
203203
idxList.back() = Builder.CreateMul(idxList.back(), MatSize);

lib/Transforms/Scalar/DxilEliminateVector.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
// //
1111
///////////////////////////////////////////////////////////////////////////////
1212

13-
#include "dxc/DXIL/DxilModule.h"
14-
1513
#include "llvm/IR/Dominators.h"
1614
#include "llvm/IR/Instructions.h"
1715
#include "llvm/Pass.h"

lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1949,7 +1949,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
19491949
continue;
19501950
}
19511951

1952-
// Flatten Global vector if no dynamic vector indexing.
1952+
// Flatten global vector if it has no dynamic vector indexing.
19531953
bool bFlatVector = !hasDynamicVectorIndexing(GV);
19541954

19551955
if (bFlatVector) {

lib/Transforms/Scalar/Scalarizer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,8 @@ bool Scalarizer::doInitialization(Module &M) {
293293
}
294294

295295
bool Scalarizer::runOnFunction(Function &F) {
296-
if (F.getParent()->HasDxilModule())
297-
if (F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus())
296+
Module *M = F.getParent();
297+
if (M->HasDxilModule() && M->GetDxilModule().GetShaderModel()->IsSM69Plus())
298298
SupportsVectors = true;
299299

300300
for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s
2+
// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s --check-prefix=NOBR
3+
4+
// Test that no short-circuiting takes place for logic ops with native vectors.
5+
// First run verifies that side effects result in stores.
6+
// Second runline just makes sure there are no branches nor phis at all.
7+
8+
// NOBR-NOT: br i1
9+
// NOBR-NOT: = phi
10+
11+
export int4 logic(inout bool4 truth[5], inout int4 consequences[4]) {
12+
// CHECK: [[adr0:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 0
13+
// CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]]
14+
// CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer
15+
16+
// CHECK: [[adr1:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 1
17+
// CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]]
18+
// CHECK: [[add:%.*]] = add <4 x i32> [[vec1]], <i32 1, i32 1, i32 1, i32 1>
19+
// CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr1]]
20+
// CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer
21+
// CHECK: [[bres3:%.*]] = or <4 x i1> [[bvec1]], [[bvec0]]
22+
// CHECK: [[adr3:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 3
23+
// CHECK: [[res3:%.*]] = zext <4 x i1> [[bres3]] to <4 x i32>
24+
// CHECK: store <4 x i32> [[res3]], <4 x i32>* [[adr3]]
25+
truth[3] = truth[0] || consequences[1]++;
26+
27+
// CHECK: [[adr1:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 1
28+
// CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]]
29+
// CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer
30+
// CHECK: [[adr0:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 0
31+
// CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]]
32+
// CHECK: [[sub:%.*]] = add <4 x i32> [[vec0]], <i32 -1, i32 -1, i32 -1, i32 -1>
33+
// CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr0]]
34+
// CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer
35+
// CHECK: [[bres4:%.*]] = and <4 x i1> [[bvec0]], [[bvec1]]
36+
// CHECK: [[adr4:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 4
37+
// CHECK: [[res4:%.*]] = zext <4 x i1> [[bres4]] to <4 x i32>
38+
// CHECK: store <4 x i32> [[res4]], <4 x i32>* [[adr4]]
39+
truth[4] = truth[1] && consequences[0]--;
40+
41+
// CHECK: [[adr2:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 2
42+
// CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]]
43+
// CHECK: [[bcond:%.*]] = icmp ne <4 x i32> [[vec2]], zeroinitializer
44+
// CHECK: [[adr2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 2
45+
// CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]]
46+
// CHECK: [[add:%.*]] = add <4 x i32> %25, <i32 1, i32 1, i32 1, i32 1>
47+
// CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr2]]
48+
// CHECK: [[adr3:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 3
49+
// CHECK: [[vec3:%.*]] = load <4 x i32>, <4 x i32>* [[adr3]]
50+
// CHECK: [[sub:%.*]] = add <4 x i32> [[vec3]], <i32 -1, i32 -1, i32 -1, i32 -1>
51+
// CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr3]]
52+
// CHECK: [[res:%.*]] = select <4 x i1> [[bcond]], <4 x i32> [[vec2]], <4 x i32> [[vec3]]
53+
int4 res = truth[2] ? consequences[2]++ : consequences[3]--;
54+
55+
// CHECK: ret <4 x i32> %30
56+
return res;
57+
}

tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll

Lines changed: 25 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -306,76 +306,58 @@ bb:
306306
; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
307307
; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4
308308
; CHECK: [[cmp1:%.*]] = icmp ne i32 [[ld1]], 0
309-
%tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
310-
%tmp6 = load i32, i32* %tmp5, align 4
311-
%tmp7 = icmp ne i32 %tmp6, 0
312-
br i1 %tmp7, label %bb12, label %bb8
313-
314-
bb8: ; preds = %bb
315309
; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
316310
; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
317311
; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
312+
; CHECK: [[bres1:%.*]] = or i1 [[cmp1]], [[cmp2]]
313+
; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
314+
%tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
315+
%tmp6 = load i32, i32* %tmp5, align 4
316+
%tmp7 = icmp ne i32 %tmp6, 0
318317
%tmp9 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
319318
%tmp10 = load i32, i32* %tmp9, align 4
320319
%tmp11 = icmp ne i32 %tmp10, 0
321-
br label %bb12
320+
%tmp13 = or i1 %tmp7, %tmp11
321+
%tmp14 = zext i1 %tmp13 to i32
322322

323-
bb12: ; preds = %bb8, %bb
324-
; CHECK: [[bres1:%.*]] = phi i1 [ true, %bb ], [ [[cmp2]], %bb8 ]
325-
; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
326323
; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
327324
; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
328325
; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
329-
%tmp13 = phi i1 [ true, %bb ], [ %tmp11, %bb8 ]
330-
%tmp14 = zext i1 %tmp13 to i32
331-
%tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
332-
%tmp16 = load i32, i32* %tmp15, align 4
333-
%tmp17 = icmp ne i32 %tmp16, 0
334-
br i1 %tmp17, label %bb18, label %bb22
335-
336-
bb18: ; preds = %bb12
337326
; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
338327
; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
339328
; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
329+
; CHECK: [[bres2:%.*]] = and i1 [[cmp2]], [[cmp3]]
330+
; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
331+
%tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
332+
%tmp16 = load i32, i32* %tmp15, align 4
333+
%tmp17 = icmp ne i32 %tmp16, 0
340334
%tmp19 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
341335
%tmp20 = load i32, i32* %tmp19, align 4
342336
%tmp21 = icmp ne i32 %tmp20, 0
343-
br label %bb22
344-
345-
bb22: ; preds = %bb18, %bb12
337+
%tmp23 = and i1 %tmp17, %tmp21
338+
%tmp24 = zext i1 %tmp23 to i32
346339

347-
; CHECK: [[bres2:%.*]] = phi i1 [ false, %bb12 ], [ [[cmp3]], %bb18 ]
348-
; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
349340
; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
350341
; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
351342
; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
352-
%tmp23 = phi i1 [ false, %bb12 ], [ %tmp21, %bb18 ]
353-
%tmp24 = zext i1 %tmp23 to i32
343+
; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
344+
; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
345+
; CHECK: [[cmp4:%.*]] = icmp ne i32 [[ld4]], 0
346+
; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
347+
; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
348+
; CHECK: [[cmp5:%.*]] = icmp ne i32 [[ld5]], 0
349+
; CHECK: [[bres3:%.*]] = select i1 [[cmp3]], i1 [[cmp4]], i1 [[cmp5]]
350+
; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
354351
%tmp25 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
355352
%tmp26 = load i32, i32* %tmp25, align 4
356353
%tmp27 = icmp ne i32 %tmp26, 0
357-
br i1 %tmp27, label %bb28, label %bb31
358-
359-
bb28: ; preds = %bb22
360-
; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
361-
; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
362354
%tmp29 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
363355
%tmp30 = load i32, i32* %tmp29, align 4
364-
br label %bb34
365-
366-
bb31: ; preds = %bb22
367-
; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
368-
; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
356+
%tmp31 = icmp ne i32 %tmp30, 0
369357
%tmp32 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
370358
%tmp33 = load i32, i32* %tmp32, align 4
371-
br label %bb34
372-
373-
bb34: ; preds = %bb31, %bb28
374-
; CHECK: [[res3:%.*]] = phi i32 [ [[ld4]], %bb28 ], [ [[ld5]], %bb31 ]
375-
; CHECK: [[bres3:%.*]] = icmp ne i32 [[res3]], 0
376-
; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
377-
%.sink = phi i32 [ %tmp30, %bb28 ], [ %tmp33, %bb31 ]
378-
%tmp35 = icmp ne i32 %.sink, 0
359+
%tmp34 = icmp ne i32 %tmp33, 0
360+
%tmp35 = select i1 %tmp27, i1 %tmp31, i1 %tmp34
379361
%tmp36 = zext i1 %tmp35 to i32
380362

381363
; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0

0 commit comments

Comments
 (0)