Skip to content

Commit 40e3d02

Browse files
authored
Revert "[DxbcConverter] Fix corruption of ICB integer values (#4790)" (#5253) (#5279)
This reverts commit 0a1f7a1. Use an array of 32-bit integer values instead of 32-bit float values for the "dx.icb" immediate constant buffer data. This prevents the FPU flipping the 22nd bit of integer constants, when their bit pattern represents a float32 signalling NaN on x86 architecture. This change is causing rendering issues on PCs with AMD GPU and is blocking an upcoming Windows release. The target branch of this revert is a release branch based on release-1.7.2212 at commit 83f8c6c and it will be used for the upcoming Windows release. The dxilconv change stays in main, at least for now. (cherry picked from commit 03df61d)
1 parent 8f3f77e commit 40e3d02

5 files changed

Lines changed: 97 additions & 202 deletions

File tree

projects/dxilconv/lib/DxbcConverter/DxbcConverter.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2127,9 +2127,9 @@ void DxbcConverter::ConvertInstructions(D3D10ShaderBinary::CShaderCodeParser &Pa
21272127
unsigned Size = Inst.m_CustomData.DataSizeInBytes >> 2;
21282128
DXASSERT_DXBC(m_pIcbGV == nullptr && Inst.m_CustomData.DataSizeInBytes == Size*4);
21292129

2130-
llvm::Constant *pIcbData = ConstantDataArray::get(m_Ctx, ArrayRef<uint32_t>((uint32_t*)Inst.m_CustomData.pData, Size));
2130+
llvm::Constant *pIcbData = ConstantDataArray::get(m_Ctx, ArrayRef<float>((float*)Inst.m_CustomData.pData, Size));
21312131
m_pIcbGV = new GlobalVariable(*m_pModule, pIcbData->getType(), true, GlobalValue::InternalLinkage,
2132-
pIcbData, "dx.icb", nullptr,
2132+
pIcbData, "dx.icb", nullptr,
21332133
GlobalVariable::NotThreadLocal, DXIL::kImmediateCBufferAddrSpace);
21342134
}
21352135
break;
@@ -6074,7 +6074,7 @@ void DxbcConverter::LoadOperand(OperandValue &SrcVal,
60746074
Value *pPtr = m_pBuilder->CreateGEP(m_pIcbGV, pGEPIndices);
60756075
LoadInst *pLoad = m_pBuilder->CreateLoad(pPtr);
60766076
pLoad->setAlignment(kRegCompAlignment);
6077-
Value *pValue = CastDxbcValue(pLoad, CompType::getU32(), ValueType);
6077+
Value *pValue = CastDxbcValue(pLoad, CompType::getF32(), ValueType);
60786078
pValue = ApplyOperandModifiers(pValue, O);
60796079

60806080
OVH.SetValue(pValue);

projects/dxilconv/test/dxbc2dxil/icb1.ref

Lines changed: 94 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
@dx.icb = internal addrspace(5) constant [16 x i32] [i32 1065353216, i32 0, i32 0, i32 0, i32 0, i32 1065353216, i32 0, i32 0, i32 0, i32 0, i32 1065353216, i32 0, i32 0, i32 0, i32 0, i32 1065353216]
2+
@dx.icb = internal addrspace(5) constant [16 x float] [float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00]
33

44
define void @main() {
55
entry:
@@ -11,122 +11,102 @@ entry:
1111
%5 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
1212
%6 = mul i32 %1, 4
1313
%7 = add i32 %6, 0
14-
%8 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %7
15-
%9 = load i32, i32 addrspace(5)* %8, align 4
16-
%10 = call float @dx.op.bitcastI32toF32(i32 126, i32 %9)
17-
%11 = mul i32 %1, 4
18-
%12 = add i32 %11, 1
19-
%13 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %12
20-
%14 = load i32, i32 addrspace(5)* %13, align 4
21-
%15 = call float @dx.op.bitcastI32toF32(i32 126, i32 %14)
22-
%16 = mul i32 %1, 4
23-
%17 = add i32 %16, 2
24-
%18 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %17
25-
%19 = load i32, i32 addrspace(5)* %18, align 4
26-
%20 = call float @dx.op.bitcastI32toF32(i32 126, i32 %19)
27-
%21 = mul i32 %1, 4
28-
%22 = add i32 %21, 3
29-
%23 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %22
30-
%24 = load i32, i32 addrspace(5)* %23, align 4
31-
%25 = call float @dx.op.bitcastI32toF32(i32 126, i32 %24)
32-
%26 = call float @dx.op.dot4.f32(i32 56, float %2, float %3, float %4, float %5, float %10, float %15, float %20, float %25)
33-
%27 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
34-
%28 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)
35-
%29 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 2, i32 undef)
36-
%30 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 3, i32 undef)
14+
%8 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %7
15+
%9 = load float, float addrspace(5)* %8, align 4
16+
%10 = mul i32 %1, 4
17+
%11 = add i32 %10, 1
18+
%12 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %11
19+
%13 = load float, float addrspace(5)* %12, align 4
20+
%14 = mul i32 %1, 4
21+
%15 = add i32 %14, 2
22+
%16 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %15
23+
%17 = load float, float addrspace(5)* %16, align 4
24+
%18 = mul i32 %1, 4
25+
%19 = add i32 %18, 3
26+
%20 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %19
27+
%21 = load float, float addrspace(5)* %20, align 4
28+
%22 = call float @dx.op.dot4.f32(i32 56, float %2, float %3, float %4, float %5, float %9, float %13, float %17, float %21)
29+
%23 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
30+
%24 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)
31+
%25 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 2, i32 undef)
32+
%26 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 3, i32 undef)
33+
%27 = mul i32 %1, 4
34+
%28 = add i32 %27, 0
35+
%29 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %28
36+
%30 = load float, float addrspace(5)* %29, align 4
3737
%31 = mul i32 %1, 4
38-
%32 = add i32 %31, 0
39-
%33 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %32
40-
%34 = load i32, i32 addrspace(5)* %33, align 4
41-
%35 = call float @dx.op.bitcastI32toF32(i32 126, i32 %34)
42-
%36 = mul i32 %1, 4
43-
%37 = add i32 %36, 1
44-
%38 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %37
45-
%39 = load i32, i32 addrspace(5)* %38, align 4
46-
%40 = call float @dx.op.bitcastI32toF32(i32 126, i32 %39)
47-
%41 = mul i32 %1, 4
48-
%42 = add i32 %41, 2
49-
%43 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %42
50-
%44 = load i32, i32 addrspace(5)* %43, align 4
51-
%45 = call float @dx.op.bitcastI32toF32(i32 126, i32 %44)
52-
%46 = mul i32 %1, 4
53-
%47 = add i32 %46, 3
54-
%48 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %47
55-
%49 = load i32, i32 addrspace(5)* %48, align 4
56-
%50 = call float @dx.op.bitcastI32toF32(i32 126, i32 %49)
57-
%51 = call float @dx.op.dot4.f32(i32 56, float %27, float %28, float %29, float %30, float %35, float %40, float %45, float %50)
58-
%52 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 undef)
59-
%53 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 undef)
60-
%54 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 undef)
61-
%55 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 3, i32 undef)
38+
%32 = add i32 %31, 1
39+
%33 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %32
40+
%34 = load float, float addrspace(5)* %33, align 4
41+
%35 = mul i32 %1, 4
42+
%36 = add i32 %35, 2
43+
%37 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %36
44+
%38 = load float, float addrspace(5)* %37, align 4
45+
%39 = mul i32 %1, 4
46+
%40 = add i32 %39, 3
47+
%41 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %40
48+
%42 = load float, float addrspace(5)* %41, align 4
49+
%43 = call float @dx.op.dot4.f32(i32 56, float %23, float %24, float %25, float %26, float %30, float %34, float %38, float %42)
50+
%44 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 undef)
51+
%45 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 undef)
52+
%46 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 undef)
53+
%47 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 3, i32 undef)
54+
%48 = mul i32 %1, 4
55+
%49 = add i32 %48, 0
56+
%50 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %49
57+
%51 = load float, float addrspace(5)* %50, align 4
58+
%52 = mul i32 %1, 4
59+
%53 = add i32 %52, 1
60+
%54 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %53
61+
%55 = load float, float addrspace(5)* %54, align 4
6262
%56 = mul i32 %1, 4
63-
%57 = add i32 %56, 0
64-
%58 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %57
65-
%59 = load i32, i32 addrspace(5)* %58, align 4
66-
%60 = call float @dx.op.bitcastI32toF32(i32 126, i32 %59)
67-
%61 = mul i32 %1, 4
68-
%62 = add i32 %61, 1
69-
%63 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %62
70-
%64 = load i32, i32 addrspace(5)* %63, align 4
71-
%65 = call float @dx.op.bitcastI32toF32(i32 126, i32 %64)
72-
%66 = mul i32 %1, 4
73-
%67 = add i32 %66, 2
74-
%68 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %67
75-
%69 = load i32, i32 addrspace(5)* %68, align 4
76-
%70 = call float @dx.op.bitcastI32toF32(i32 126, i32 %69)
77-
%71 = mul i32 %1, 4
78-
%72 = add i32 %71, 3
79-
%73 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %72
80-
%74 = load i32, i32 addrspace(5)* %73, align 4
81-
%75 = call float @dx.op.bitcastI32toF32(i32 126, i32 %74)
82-
%76 = call float @dx.op.dot4.f32(i32 56, float %52, float %53, float %54, float %55, float %60, float %65, float %70, float %75)
83-
%77 = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 0, i32 undef)
84-
%78 = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 1, i32 undef)
85-
%79 = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 2, i32 undef)
86-
%80 = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 3, i32 undef)
63+
%57 = add i32 %56, 2
64+
%58 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %57
65+
%59 = load float, float addrspace(5)* %58, align 4
66+
%60 = mul i32 %1, 4
67+
%61 = add i32 %60, 3
68+
%62 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %61
69+
%63 = load float, float addrspace(5)* %62, align 4
70+
%64 = call float @dx.op.dot4.f32(i32 56, float %44, float %45, float %46, float %47, float %51, float %55, float %59, float %63)
71+
%65 = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 0, i32 undef)
72+
%66 = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 1, i32 undef)
73+
%67 = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 2, i32 undef)
74+
%68 = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 3, i32 undef)
75+
%69 = mul i32 %1, 4
76+
%70 = add i32 %69, 0
77+
%71 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %70
78+
%72 = load float, float addrspace(5)* %71, align 4
79+
%73 = mul i32 %1, 4
80+
%74 = add i32 %73, 1
81+
%75 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %74
82+
%76 = load float, float addrspace(5)* %75, align 4
83+
%77 = mul i32 %1, 4
84+
%78 = add i32 %77, 2
85+
%79 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %78
86+
%80 = load float, float addrspace(5)* %79, align 4
8787
%81 = mul i32 %1, 4
88-
%82 = add i32 %81, 0
89-
%83 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %82
90-
%84 = load i32, i32 addrspace(5)* %83, align 4
91-
%85 = call float @dx.op.bitcastI32toF32(i32 126, i32 %84)
92-
%86 = mul i32 %1, 4
93-
%87 = add i32 %86, 1
94-
%88 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %87
95-
%89 = load i32, i32 addrspace(5)* %88, align 4
96-
%90 = call float @dx.op.bitcastI32toF32(i32 126, i32 %89)
97-
%91 = mul i32 %1, 4
98-
%92 = add i32 %91, 2
99-
%93 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %92
100-
%94 = load i32, i32 addrspace(5)* %93, align 4
101-
%95 = call float @dx.op.bitcastI32toF32(i32 126, i32 %94)
102-
%96 = mul i32 %1, 4
103-
%97 = add i32 %96, 3
104-
%98 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %97
105-
%99 = load i32, i32 addrspace(5)* %98, align 4
106-
%100 = call float @dx.op.bitcastI32toF32(i32 126, i32 %99)
107-
%101 = call float @dx.op.dot4.f32(i32 56, float %77, float %78, float %79, float %80, float %85, float %90, float %95, float %100)
108-
%102 = mul i32 %0, 4
109-
%103 = add i32 %102, 0
110-
%104 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %103
111-
%105 = load i32, i32 addrspace(5)* %104, align 4
112-
%106 = call float @dx.op.bitcastI32toF32(i32 126, i32 %105)
113-
%107 = mul i32 %0, 4
114-
%108 = add i32 %107, 1
115-
%109 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %108
116-
%110 = load i32, i32 addrspace(5)* %109, align 4
117-
%111 = call float @dx.op.bitcastI32toF32(i32 126, i32 %110)
118-
%112 = mul i32 %0, 4
119-
%113 = add i32 %112, 2
120-
%114 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %113
121-
%115 = load i32, i32 addrspace(5)* %114, align 4
122-
%116 = call float @dx.op.bitcastI32toF32(i32 126, i32 %115)
123-
%117 = mul i32 %0, 4
124-
%118 = add i32 %117, 3
125-
%119 = getelementptr [16 x i32], [16 x i32] addrspace(5)* @dx.icb, i32 0, i32 %118
126-
%120 = load i32, i32 addrspace(5)* %119, align 4
127-
%121 = call float @dx.op.bitcastI32toF32(i32 126, i32 %120)
128-
%122 = call float @dx.op.dot4.f32(i32 56, float %26, float %51, float %76, float %101, float %106, float %111, float %116, float %121)
129-
call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %122)
88+
%82 = add i32 %81, 3
89+
%83 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %82
90+
%84 = load float, float addrspace(5)* %83, align 4
91+
%85 = call float @dx.op.dot4.f32(i32 56, float %65, float %66, float %67, float %68, float %72, float %76, float %80, float %84)
92+
%86 = mul i32 %0, 4
93+
%87 = add i32 %86, 0
94+
%88 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %87
95+
%89 = load float, float addrspace(5)* %88, align 4
96+
%90 = mul i32 %0, 4
97+
%91 = add i32 %90, 1
98+
%92 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %91
99+
%93 = load float, float addrspace(5)* %92, align 4
100+
%94 = mul i32 %0, 4
101+
%95 = add i32 %94, 2
102+
%96 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %95
103+
%97 = load float, float addrspace(5)* %96, align 4
104+
%98 = mul i32 %0, 4
105+
%99 = add i32 %98, 3
106+
%100 = getelementptr [16 x float], [16 x float] addrspace(5)* @dx.icb, i32 0, i32 %99
107+
%101 = load float, float addrspace(5)* %100, align 4
108+
%102 = call float @dx.op.dot4.f32(i32 56, float %22, float %43, float %64, float %85, float %89, float %93, float %97, float %101)
109+
call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %102)
130110
ret void
131111
}
132112

@@ -145,9 +125,6 @@ declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
145125
; Function Attrs: nounwind readonly
146126
declare i32 @dx.op.tempRegLoad.i32(i32, i32) #2
147127

148-
; Function Attrs: nounwind readnone
149-
declare float @dx.op.bitcastI32toF32(i32, i32) #0
150-
151128
; Function Attrs: nounwind
152129
declare void @dx.op.tempRegStore.f32(i32, i32, float) #1
153130

-708 Bytes
Binary file not shown.

projects/dxilconv/test/dxbc2dxil/icb2.hlsl

Lines changed: 0 additions & 16 deletions
This file was deleted.

projects/dxilconv/test/dxbc2dxil/icb2.ref

Lines changed: 0 additions & 66 deletions
This file was deleted.

0 commit comments

Comments
 (0)