@@ -406,91 +406,9 @@ inline bool isnanFloat16(uint16_t val) {
406406 (val & FLOAT16_BIT_MANTISSA) != 0 ;
407407}
408408
409- inline uint16_t ConvertFloat32ToFloat16 (float val) {
410- union Bits {
411- uint32_t u_bits;
412- float f_bits;
413- };
414-
415- static const uint32_t SignMask = 0x8000 ;
416-
417- // Minimum f32 value representable in f16 format without denormalizing
418- static const uint32_t Min16in32 = 0x38800000 ;
419-
420- // Maximum f32 value (next to infinity)
421- static const uint32_t Max32 = 0x7f7FFFFF ;
422-
423- // Mask for f32 mantissa
424- static const uint32_t Fraction32Mask = 0x007FFFFF ;
425-
426- // pow(2,24)
427- static const uint32_t DenormalRatio = 0x4B800000 ;
428-
429- static const uint32_t NormalDelta = 0x38000000 ;
430-
431- Bits bits;
432- bits.f_bits = val;
433- uint32_t sign = bits.u_bits & (SignMask << 16 );
434- Bits Abs;
435- Abs.u_bits = bits.u_bits ^ sign;
436-
437- bool isLessThanNormal = Abs.f_bits < *(const float *)&Min16in32;
438- bool isInfOrNaN = Abs.u_bits > Max32;
439-
440- if (isLessThanNormal) {
441- // Compute Denormal result
442- return (uint16_t )(Abs.f_bits * *(const float *)(&DenormalRatio)) | (uint16_t )(sign >> 16 );
443- }
444- else if (isInfOrNaN) {
445- // Compute Inf or Nan result
446- uint32_t Fraction = Abs.u_bits & Fraction32Mask;
447- uint16_t IsNaN = Fraction == 0 ? 0 : 0xffff ;
448- return (IsNaN & FLOAT16_BIT_MANTISSA) | FLOAT16_BIT_EXP | (uint16_t )(sign >> 16 );
449- }
450- else {
451- // Compute Normal result
452- return (uint16_t )((Abs.u_bits - NormalDelta) >> 13 ) | (uint16_t )(sign >> 16 );
453- }
454- }
455-
456- inline float ConvertFloat16ToFloat32 (uint16_t x) {
457- union Bits {
458- float f_bits;
459- uint32_t u_bits;
460- };
461-
462- uint32_t Sign = (x & FLOAT16_BIT_SIGN) << 16 ;
463-
464- // nan -> exponent all set and mantisa is non zero
465- // +/-inf -> exponent all set and mantissa is zero
466- // denorm -> exponent zero and significand nonzero
467- uint32_t Abs = (x & 0x7fff );
468- uint32_t IsNormal = Abs > FLOAT16_BIGGEST_DENORM;
469- uint32_t IsInfOrNaN = Abs > FLOAT16_BIGGEST_NORMAL;
470-
471- // Signless Result for normals
472- uint32_t DenormRatio = 0x33800000 ;
473- float DenormResult = Abs * (*(float *)&DenormRatio);
474-
475- uint32_t AbsShifted = Abs << 13 ;
476- // Signless Result for normals
477- uint32_t NormalResult = AbsShifted + 0x38000000 ;
478- // Signless Result for int & nans
479- uint32_t InfResult = AbsShifted + 0x70000000 ;
480-
481- Bits bits;
482- bits.u_bits = 0 ;
483- if (IsInfOrNaN)
484- bits.u_bits |= InfResult;
485- else if (IsNormal)
486- bits.u_bits |= NormalResult;
487- else
488- bits.f_bits = DenormResult;
489- bits.u_bits |= Sign;
490- return bits.f_bits ;
491- }
492- uint16_t ConvertFloat32ToFloat16 (float val);
493- float ConvertFloat16ToFloat32 (uint16_t val);
409+ // These are defined in ShaderOpTest.cpp using DirectXPackedVector functions.
410+ uint16_t ConvertFloat32ToFloat16 (float val) throw();
411+ float ConvertFloat16ToFloat32 (uint16_t val) throw();
494412
495413inline bool CompareFloatULP (const float &fsrc, const float &fref, int ULPTolerance,
496414 hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) {
0 commit comments