|
| 1 | +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=13 %s | FileCheck %s |
| 2 | + |
| 3 | +// Source for dxilgen test CodeGenDXIL/passes/longvec-intrinsics.ll. |
| 4 | +// Some targetted filecheck testing as an incidental. |
| 5 | + |
| 6 | +RWStructuredBuffer<vector<float16_t, NUM> > hBuf; |
| 7 | +RWStructuredBuffer<vector<float, NUM> > fBuf; |
| 8 | +RWStructuredBuffer<vector<double, NUM> > dBuf; |
| 9 | + |
| 10 | +RWStructuredBuffer<vector<bool, NUM> > bBuf; |
| 11 | +RWStructuredBuffer<vector<uint, NUM> > uBuf; |
| 12 | +RWStructuredBuffer<vector<int64_t, NUM> > lBuf; |
| 13 | + |
| 14 | +[numthreads(8,1,1)] |
| 15 | +void main() { |
| 16 | + |
| 17 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) |
| 18 | + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 |
| 19 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) |
| 20 | + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 |
| 21 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) |
| 22 | + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 |
| 23 | + vector<float, NUM> fVec1 = fBuf[11]; |
| 24 | + vector<float, NUM> fVec2 = fBuf[12]; |
| 25 | + vector<float, NUM> fVec3 = fBuf[13]; |
| 26 | + |
| 27 | + // CHECK: [[tmp:%.*]] = call <13 x float> @dx.op.binary.v13f32(i32 35, <13 x float> [[fvec1]], <13 x float> [[fvec2]]) ; FMax(a,b) |
| 28 | + // CHECK: call <13 x float> @dx.op.binary.v13f32(i32 36, <13 x float> [[tmp]], <13 x float> [[fvec3]]) ; FMin(a,b) |
| 29 | + vector<float, NUM> fRes = clamp(fVec1, fVec2, fVec3); |
| 30 | + |
| 31 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) |
| 32 | + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 |
| 33 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) |
| 34 | + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 |
| 35 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) |
| 36 | + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 |
| 37 | + vector<float16_t, NUM> hVec1 = hBuf[14]; |
| 38 | + vector<float16_t, NUM> hVec2 = hBuf[15]; |
| 39 | + vector<float16_t, NUM> hVec3 = hBuf[16]; |
| 40 | + |
| 41 | + // CHECK: [[tmp:%.*]] = fcmp fast olt <13 x half> [[hvec2]], [[hvec1]] |
| 42 | + // CHECK: select <13 x i1> [[tmp]], <13 x half> zeroinitializer, <13 x half> <half 0xH3C00 |
| 43 | + vector<float16_t, NUM> hRes = step(hVec1, hVec2); |
| 44 | + |
| 45 | + // CHECK: [[tmp:%.*]] = fmul fast <13 x float> [[fvec1]], <float 0x |
| 46 | + // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 21, <13 x float> [[tmp]]) ; Exp(value) |
| 47 | + fRes += exp(fVec1); |
| 48 | + |
| 49 | + // CHECK: [[tmp:%.*]] = call <13 x half> @dx.op.unary.v13f16(i32 23, <13 x half> [[hvec1]]) ; Log(value) |
| 50 | + // CHECK: fmul fast <13 x half> [[tmp]], <half 0xH398C |
| 51 | + hRes += log(hVec1); |
| 52 | + |
| 53 | + // CHECK: [[sub:%.*]] = fsub fast <13 x float> [[fvec2]], [[fvec1]] |
| 54 | + // CHECK: [[xsub:%.*]] = fsub fast <13 x float> [[fvec3]], [[fvec1]] |
| 55 | + // CHECK: [[div:%.*]] = fdiv fast <13 x float> [[xsub]], [[sub]] |
| 56 | + // CHECK: [[sat:%.*]] = call <13 x float> @dx.op.unary.v13f32(i32 7, <13 x float> [[div]]) ; Saturate(value) |
| 57 | + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], <float 2.000000e+00, |
| 58 | + // CHECK: [[sub:%.*]] = fsub fast <13 x float> <float 3.000000e+00, {{.*}}>, [[mul]] |
| 59 | + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], [[sat]] |
| 60 | + // CHECK: fmul fast <13 x float> [[mul]], [[sub]] |
| 61 | + fRes += smoothstep(fVec1, fVec2, fVec3); |
| 62 | + |
| 63 | + // Intrinsics that expand into llvm ops. |
| 64 | + |
| 65 | + // CHECK: fmul fast <13 x float> [[fvec3]], <float 0x3F91DF46A0000000 |
| 66 | + fRes += radians(fVec3); |
| 67 | + |
| 68 | + // CHECK: [[cmp:%.*]] = fcmp fast une <13 x float> [[fvec1]], zeroinitializer |
| 69 | + // CHECK: [[f2i:%.*]] = bitcast <13 x float> [[fvec1]] to <13 x i32> |
| 70 | + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], <i32 2139095040 |
| 71 | + // CHECK: [[add:%.*]] = add nsw <13 x i32> [[and]], <i32 -1056964608 |
| 72 | + // CHECK: [[shr:%.*]] = ashr <13 x i32> [[add]], <i32 23 |
| 73 | + // CHECK: [[i2f:%.*]] = sitofp <13 x i32> [[shr]] to <13 x float> |
| 74 | + // CHECK: [[sel:%.*]] = select <13 x i1> [[cmp]], <13 x float> [[i2f]], <13 x float> zeroinitializer |
| 75 | + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], <i32 8388607 |
| 76 | + // CHECK: or <13 x i32> [[and]], <i32 1056964608 |
| 77 | + vector<float, NUM> exp = fVec3; |
| 78 | + fRes += frexp(fVec1, exp); |
| 79 | + fRes += exp; |
| 80 | + |
| 81 | + // CHECK: [[tmp:%.*]] = fsub fast <13 x half> [[hvec3]], [[hvec2]] |
| 82 | + // CHECK: fmul fast <13 x half> [[tmp]], [[hvec1]] |
| 83 | + hRes += lerp(hVec2, hVec3, hVec1); |
| 84 | + |
| 85 | + |
| 86 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) |
| 87 | + // CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 |
| 88 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) |
| 89 | + // CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 |
| 90 | + vector<uint, NUM> uVec1 = uBuf[17]; |
| 91 | + vector<uint, NUM> uVec2 = uBuf[18]; |
| 92 | + |
| 93 | + vector<uint, NUM> signs = 1; |
| 94 | + // CHECK: [[cmp:%.*]] = icmp ne <13 x i32> [[uvec2]], zeroinitializer |
| 95 | + // CHECK: zext <13 x i1> [[cmp]] to <13 x i32> |
| 96 | + signs *= sign(uVec2); |
| 97 | + |
| 98 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) |
| 99 | + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 |
| 100 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) |
| 101 | + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 |
| 102 | + vector<int64_t, NUM> lVec1 = lBuf[19]; |
| 103 | + vector<int64_t, NUM> lVec2 = lBuf[20]; |
| 104 | + |
| 105 | + // CHECK: [[gt:%.*]] = icmp sgt <13 x i64> [[lvec2]], zeroinitializer |
| 106 | + // CHECK: [[lt:%.*]] = icmp slt <13 x i64> [[lvec2]], zeroinitializer |
| 107 | + // CHECK: [[igt:%.*]] = zext <13 x i1> [[gt]] to <13 x i32> |
| 108 | + // CHECK: [[ilt:%.*]] = zext <13 x i1> [[lt]] to <13 x i32> |
| 109 | + // CHECK: sub nsw <13 x i32> [[igt]], [[ilt]] |
| 110 | + signs *= sign(lVec2); |
| 111 | + |
| 112 | + vector<uint, NUM> uRes = signs; |
| 113 | + |
| 114 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) |
| 115 | + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 |
| 116 | + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer |
| 117 | + // CHECK: [[vec1:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> |
| 118 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) |
| 119 | + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 |
| 120 | + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer |
| 121 | + // CHECK: [[vec2:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> |
| 122 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) |
| 123 | + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 |
| 124 | + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer |
| 125 | + // CHECK: [[vec3:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> |
| 126 | + vector<bool, NUM> bVec1 = bBuf[21]; |
| 127 | + vector<bool, NUM> bVec2 = bBuf[22]; |
| 128 | + vector<bool, NUM> bVec3 = bBuf[23]; |
| 129 | + |
| 130 | + // CHECK: [[bvec2:%.*]] = icmp ne <13 x i32> [[vec2]], zeroinitializer |
| 131 | + // CHECK: [[bvec1:%.*]] = icmp ne <13 x i32> [[vec1]], zeroinitializer |
| 132 | + // CHECK: or <13 x i1> [[bvec2]], [[bvec1]] |
| 133 | + uRes += or(bVec1, bVec2); |
| 134 | + |
| 135 | + // CHECK: [[bvec3:%.*]] = icmp ne <13 x i32> [[vec3]], zeroinitializer |
| 136 | + // CHECK: and <13 x i1> [[bvec3]], [[bvec2]] |
| 137 | + uRes += and(bVec2, bVec3); |
| 138 | + |
| 139 | + // CHECK: select <13 x i1> [[bvec3]], <13 x i64> [[lvec1]], <13 x i64> [[lvec2]] |
| 140 | + vector<int64_t, NUM> lRes = select(bVec3, lVec1, lVec2); |
| 141 | + |
| 142 | + // CHECK: [[el1:%.*]] = extractelement <13 x float> [[fvec1]] |
| 143 | + // CHECK: [[el2:%.*]] = extractelement <13 x float> [[fvec2]] |
| 144 | + // CHECK: [[mul:%.*]] = fmul fast float [[el2]], [[el1]] |
| 145 | + // CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) |
| 146 | + // CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad1]]) ; FMad(a,b,c) |
| 147 | + // CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad2]]) ; FMad(a,b,c) |
| 148 | + // CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad3]]) ; FMad(a,b,c) |
| 149 | + // CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad4]]) ; FMad(a,b,c) |
| 150 | + // CHECK: [[mad6:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad5]]) ; FMad(a,b,c) |
| 151 | + // CHECK: [[mad7:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad6]]) ; FMad(a,b,c) |
| 152 | + // CHECK: [[mad8:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad7]]) ; FMad(a,b,c) |
| 153 | + // CHECK: [[mad9:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad8]]) ; FMad(a,b,c) |
| 154 | + // CHECK: [[mad10:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad9]]) ; FMad(a,b,c) |
| 155 | + // CHECK: [[mad11:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad10]]) ; FMad(a,b,c) |
| 156 | + // CHECK: [[mad12:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad11]]) ; FMad(a,b,c) |
| 157 | + fRes += dot(fVec1, fVec2); |
| 158 | + |
| 159 | + // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 17, <13 x float> [[fvec1]]) ; Atan(value) |
| 160 | + fRes += atan(fVec1); |
| 161 | + |
| 162 | + // CHECK: call <13 x i32> @dx.op.binary.v13i32(i32 40, <13 x i32> [[uvec1]], <13 x i32> [[uvec2]]) ; UMin(a,b) |
| 163 | + uRes += min(uVec1, uVec2); |
| 164 | + |
| 165 | + // CHECK: call <13 x float> @dx.op.tertiary.v13f32(i32 46, <13 x float> [[fvec1]], <13 x float> [[fvec2]], <13 x float> [[fvec3]]) ; FMad(a,b,c) |
| 166 | + fRes += mad(fVec1, fVec2, fVec3); |
| 167 | + |
| 168 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) |
| 169 | + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 |
| 170 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) |
| 171 | + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 |
| 172 | + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) |
| 173 | + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 |
| 174 | + vector<double, NUM> dVec1 = dBuf[24]; |
| 175 | + vector<double, NUM> dVec2 = dBuf[25]; |
| 176 | + vector<double, NUM> dVec3 = dBuf[26]; |
| 177 | + |
| 178 | + // CHECK: call <13 x double> @dx.op.tertiary.v13f64(i32 47, <13 x double> [[dvec1]], <13 x double> [[dvec2]], <13 x double> [[dvec3]]) |
| 179 | + vector<double, NUM> dRes = fma(dVec1, dVec2, dVec3); |
| 180 | + |
| 181 | + hBuf[0] = hRes; |
| 182 | + fBuf[0] = fRes; |
| 183 | + dBuf[0] = dRes; |
| 184 | + uBuf[0] = uRes; |
| 185 | + lBuf[0] = lRes; |
| 186 | +} |
0 commit comments