|
26 | 26 | #include "WEXAdapter.h" |
27 | 27 | #endif |
28 | 28 | #include "dxc/Support/Unicode.h" |
29 | | -#include "dxc/Test/TestConfig.h" |
30 | 29 | #include "dxc/DXIL/DxilConstants.h" // DenormMode |
31 | 30 |
|
| 31 | +#ifdef _HLK_CONF |
| 32 | +#define DEFAULT_TEST_DIR "" |
| 33 | +#else |
| 34 | +#include "dxc/Test/TestConfig.h" |
| 35 | +#endif |
| 36 | + |
32 | 37 | using namespace std; |
33 | 38 |
|
34 | 39 | #ifndef HLSLDATAFILEPARAM |
@@ -406,91 +411,9 @@ inline bool isnanFloat16(uint16_t val) { |
406 | 411 | (val & FLOAT16_BIT_MANTISSA) != 0; |
407 | 412 | } |
408 | 413 |
|
409 | | -inline uint16_t ConvertFloat32ToFloat16(float val) { |
410 | | - union Bits { |
411 | | - uint32_t u_bits; |
412 | | - float f_bits; |
413 | | - }; |
414 | | - |
415 | | - static const uint32_t SignMask = 0x8000; |
416 | | - |
417 | | - // Minimum f32 value representable in f16 format without denormalizing |
418 | | - static const uint32_t Min16in32 = 0x38800000; |
419 | | - |
420 | | - // Maximum f32 value (next to infinity) |
421 | | - static const uint32_t Max32 = 0x7f7FFFFF; |
422 | | - |
423 | | - // Mask for f32 mantissa |
424 | | - static const uint32_t Fraction32Mask = 0x007FFFFF; |
425 | | - |
426 | | - // pow(2,24) |
427 | | - static const uint32_t DenormalRatio = 0x4B800000; |
428 | | - |
429 | | - static const uint32_t NormalDelta = 0x38000000; |
430 | | - |
431 | | - Bits bits; |
432 | | - bits.f_bits = val; |
433 | | - uint32_t sign = bits.u_bits & (SignMask << 16); |
434 | | - Bits Abs; |
435 | | - Abs.u_bits = bits.u_bits ^ sign; |
436 | | - |
437 | | - bool isLessThanNormal = Abs.f_bits < *(const float*)&Min16in32; |
438 | | - bool isInfOrNaN = Abs.u_bits > Max32; |
439 | | - |
440 | | - if (isLessThanNormal) { |
441 | | - // Compute Denormal result |
442 | | - return (uint16_t)(Abs.f_bits * *(const float*)(&DenormalRatio)) | (uint16_t)(sign >> 16); |
443 | | - } |
444 | | - else if (isInfOrNaN) { |
445 | | - // Compute Inf or Nan result |
446 | | - uint32_t Fraction = Abs.u_bits & Fraction32Mask; |
447 | | - uint16_t IsNaN = Fraction == 0 ? 0 : 0xffff; |
448 | | - return (IsNaN & FLOAT16_BIT_MANTISSA) | FLOAT16_BIT_EXP | (uint16_t)(sign >> 16); |
449 | | - } |
450 | | - else { |
451 | | - // Compute Normal result |
452 | | - return (uint16_t)((Abs.u_bits - NormalDelta) >> 13) | (uint16_t)(sign >> 16); |
453 | | - } |
454 | | -} |
455 | | - |
456 | | -inline float ConvertFloat16ToFloat32(uint16_t x) { |
457 | | - union Bits { |
458 | | - float f_bits; |
459 | | - uint32_t u_bits; |
460 | | - }; |
461 | | - |
462 | | - uint32_t Sign = (x & FLOAT16_BIT_SIGN) << 16; |
463 | | - |
464 | | - // nan -> exponent all set and mantisa is non zero |
465 | | - // +/-inf -> exponent all set and mantissa is zero |
466 | | - // denorm -> exponent zero and significand nonzero |
467 | | - uint32_t Abs = (x & 0x7fff); |
468 | | - uint32_t IsNormal = Abs > FLOAT16_BIGGEST_DENORM; |
469 | | - uint32_t IsInfOrNaN = Abs > FLOAT16_BIGGEST_NORMAL; |
470 | | - |
471 | | - // Signless Result for normals |
472 | | - uint32_t DenormRatio = 0x33800000; |
473 | | - float DenormResult = Abs * (*(float*)&DenormRatio); |
474 | | - |
475 | | - uint32_t AbsShifted = Abs << 13; |
476 | | - // Signless Result for normals |
477 | | - uint32_t NormalResult = AbsShifted + 0x38000000; |
478 | | - // Signless Result for int & nans |
479 | | - uint32_t InfResult = AbsShifted + 0x70000000; |
480 | | - |
481 | | - Bits bits; |
482 | | - bits.u_bits = 0; |
483 | | - if (IsInfOrNaN) |
484 | | - bits.u_bits |= InfResult; |
485 | | - else if (IsNormal) |
486 | | - bits.u_bits |= NormalResult; |
487 | | - else |
488 | | - bits.f_bits = DenormResult; |
489 | | - bits.u_bits |= Sign; |
490 | | - return bits.f_bits; |
491 | | -} |
492 | | -uint16_t ConvertFloat32ToFloat16(float val); |
493 | | -float ConvertFloat16ToFloat32(uint16_t val); |
| 414 | +// These are defined in ShaderOpTest.cpp using DirectXPackedVector functions. |
| 415 | +uint16_t ConvertFloat32ToFloat16(float val) throw(); |
| 416 | +float ConvertFloat16ToFloat32(uint16_t val) throw(); |
494 | 417 |
|
495 | 418 | inline bool CompareFloatULP(const float &fsrc, const float &fref, int ULPTolerance, |
496 | 419 | hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) { |
|
0 commit comments