@@ -237,6 +237,227 @@ struct HLSLHalf_t {
237237 DirectX::PackedVector::HALF Val = 0 ;
238238};
239239
240+ // Min precision wrapper types. Without -enable-16bit-types, min precision types
241+ // are 32-bit in DXIL storage. These thin wrappers provide distinct C++ types
242+ // that map to different HLSL type strings via DATA_TYPE.
243+ struct HLSLMin16Float_t {
244+ constexpr HLSLMin16Float_t () : Val(0 .0f ) {}
245+ constexpr HLSLMin16Float_t (float F) : Val(F) {}
246+ constexpr HLSLMin16Float_t (double D) : Val(static_cast <float >(D)) {}
247+ explicit constexpr HLSLMin16Float_t (int I) : Val(static_cast <float >(I)) {}
248+ explicit constexpr HLSLMin16Float_t (uint32_t U)
249+ : Val(static_cast <float >(U)) {}
250+
251+ constexpr operator float () const { return Val; }
252+
253+ bool operator ==(const HLSLMin16Float_t &O) const { return Val == O.Val ; }
254+ bool operator !=(const HLSLMin16Float_t &O) const { return Val != O.Val ; }
255+ bool operator <(const HLSLMin16Float_t &O) const { return Val < O.Val ; }
256+ bool operator >(const HLSLMin16Float_t &O) const { return Val > O.Val ; }
257+ bool operator <=(const HLSLMin16Float_t &O) const { return Val <= O.Val ; }
258+ bool operator >=(const HLSLMin16Float_t &O) const { return Val >= O.Val ; }
259+
260+ HLSLMin16Float_t operator +(const HLSLMin16Float_t &O) const {
261+ return HLSLMin16Float_t (Val + O.Val );
262+ }
263+ HLSLMin16Float_t operator -(const HLSLMin16Float_t &O) const {
264+ return HLSLMin16Float_t (Val - O.Val );
265+ }
266+ HLSLMin16Float_t operator *(const HLSLMin16Float_t &O) const {
267+ return HLSLMin16Float_t (Val * O.Val );
268+ }
269+ HLSLMin16Float_t operator /(const HLSLMin16Float_t &O) const {
270+ return HLSLMin16Float_t (Val / O.Val );
271+ }
272+ HLSLMin16Float_t operator %(const HLSLMin16Float_t &O) const {
273+ return HLSLMin16Float_t (std::fmod (Val, O.Val ));
274+ }
275+
276+ friend std::wostream &operator <<(std::wostream &Os,
277+ const HLSLMin16Float_t &Obj) {
278+ Os << Obj.Val ;
279+ return Os;
280+ }
281+ friend std::ostream &operator <<(std::ostream &Os,
282+ const HLSLMin16Float_t &Obj) {
283+ Os << Obj.Val ;
284+ return Os;
285+ }
286+
287+ float Val;
288+ };
289+ struct HLSLMin16Int_t {
290+ constexpr HLSLMin16Int_t () : Val(0 ) {}
291+ constexpr HLSLMin16Int_t (int32_t I) : Val(I) {}
292+ constexpr HLSLMin16Int_t (int64_t I) : Val(static_cast <int32_t >(I)) {}
293+ constexpr HLSLMin16Int_t (uint32_t U) : Val(static_cast <int32_t >(U)) {}
294+ constexpr HLSLMin16Int_t (uint64_t U) : Val(static_cast <int32_t >(U)) {}
295+ constexpr HLSLMin16Int_t (float F) : Val(static_cast <int32_t >(F)) {}
296+ constexpr HLSLMin16Int_t (double D) : Val(static_cast <int32_t >(D)) {}
297+
298+ constexpr operator int32_t () const { return Val; }
299+
300+ bool operator ==(const HLSLMin16Int_t &O) const { return Val == O.Val ; }
301+ bool operator !=(const HLSLMin16Int_t &O) const { return Val != O.Val ; }
302+ bool operator <(const HLSLMin16Int_t &O) const { return Val < O.Val ; }
303+ bool operator >(const HLSLMin16Int_t &O) const { return Val > O.Val ; }
304+ bool operator <=(const HLSLMin16Int_t &O) const { return Val <= O.Val ; }
305+ bool operator >=(const HLSLMin16Int_t &O) const { return Val >= O.Val ; }
306+
307+ HLSLMin16Int_t operator +(const HLSLMin16Int_t &O) const {
308+ return HLSLMin16Int_t (Val + O.Val );
309+ }
310+ HLSLMin16Int_t operator -(const HLSLMin16Int_t &O) const {
311+ return HLSLMin16Int_t (Val - O.Val );
312+ }
313+ HLSLMin16Int_t operator *(const HLSLMin16Int_t &O) const {
314+ return HLSLMin16Int_t (Val * O.Val );
315+ }
316+ HLSLMin16Int_t operator /(const HLSLMin16Int_t &O) const {
317+ return HLSLMin16Int_t (Val / O.Val );
318+ }
319+ HLSLMin16Int_t operator %(const HLSLMin16Int_t &O) const {
320+ return HLSLMin16Int_t (Val % O.Val );
321+ }
322+ HLSLMin16Int_t operator &(const HLSLMin16Int_t &O) const {
323+ return HLSLMin16Int_t (Val & O.Val );
324+ }
325+ HLSLMin16Int_t operator |(const HLSLMin16Int_t &O) const {
326+ return HLSLMin16Int_t (Val | O.Val );
327+ }
328+ HLSLMin16Int_t operator ^(const HLSLMin16Int_t &O) const {
329+ return HLSLMin16Int_t (Val ^ O.Val );
330+ }
331+ HLSLMin16Int_t operator <<(const HLSLMin16Int_t &O) const {
332+ return HLSLMin16Int_t (Val << O.Val );
333+ }
334+ HLSLMin16Int_t operator >>(const HLSLMin16Int_t &O) const {
335+ return HLSLMin16Int_t (Val >> O.Val );
336+ }
337+ HLSLMin16Int_t operator ~() const { return HLSLMin16Int_t (~Val); }
338+ HLSLMin16Int_t &operator <<=(const HLSLMin16Int_t &O) {
339+ Val <<= O.Val ;
340+ return *this ;
341+ }
342+ HLSLMin16Int_t &operator >>=(const HLSLMin16Int_t &O) {
343+ Val >>= O.Val ;
344+ return *this ;
345+ }
346+ HLSLMin16Int_t &operator |=(const HLSLMin16Int_t &O) {
347+ Val |= O.Val ;
348+ return *this ;
349+ }
350+ HLSLMin16Int_t &operator &=(const HLSLMin16Int_t &O) {
351+ Val &= O.Val ;
352+ return *this ;
353+ }
354+ HLSLMin16Int_t &operator ^=(const HLSLMin16Int_t &O) {
355+ Val ^= O.Val ;
356+ return *this ;
357+ }
358+ HLSLMin16Int_t operator &&(const HLSLMin16Int_t &O) const {
359+ return HLSLMin16Int_t (Val && O.Val );
360+ }
361+ HLSLMin16Int_t operator ||(const HLSLMin16Int_t &O) const {
362+ return HLSLMin16Int_t (Val || O.Val );
363+ }
364+ friend std::wostream &operator <<(std::wostream &Os,
365+ const HLSLMin16Int_t &Obj) {
366+ Os << Obj.Val ;
367+ return Os;
368+ }
369+ friend std::ostream &operator <<(std::ostream &Os, const HLSLMin16Int_t &Obj) {
370+ Os << Obj.Val ;
371+ return Os;
372+ }
373+
374+ int32_t Val;
375+ };
376+ struct HLSLMin16Uint_t {
377+ constexpr HLSLMin16Uint_t () : Val(0 ) {}
378+ constexpr HLSLMin16Uint_t (uint32_t U) : Val(U) {}
379+ constexpr HLSLMin16Uint_t (uint64_t U) : Val(static_cast <uint32_t >(U)) {}
380+ constexpr HLSLMin16Uint_t (int32_t I) : Val(static_cast <uint32_t >(I)) {}
381+ constexpr HLSLMin16Uint_t (float F) : Val(static_cast <uint32_t >(F)) {}
382+ constexpr HLSLMin16Uint_t (double D) : Val(static_cast <uint32_t >(D)) {}
383+
384+ constexpr operator uint32_t () const { return Val; }
385+
386+ bool operator ==(const HLSLMin16Uint_t &O) const { return Val == O.Val ; }
387+ bool operator !=(const HLSLMin16Uint_t &O) const { return Val != O.Val ; }
388+ bool operator <(const HLSLMin16Uint_t &O) const { return Val < O.Val ; }
389+ bool operator >(const HLSLMin16Uint_t &O) const { return Val > O.Val ; }
390+ bool operator <=(const HLSLMin16Uint_t &O) const { return Val <= O.Val ; }
391+ bool operator >=(const HLSLMin16Uint_t &O) const { return Val >= O.Val ; }
392+
393+ HLSLMin16Uint_t operator +(const HLSLMin16Uint_t &O) const {
394+ return HLSLMin16Uint_t (Val + O.Val );
395+ }
396+ HLSLMin16Uint_t operator -(const HLSLMin16Uint_t &O) const {
397+ return HLSLMin16Uint_t (Val - O.Val );
398+ }
399+ HLSLMin16Uint_t operator *(const HLSLMin16Uint_t &O) const {
400+ return HLSLMin16Uint_t (Val * O.Val );
401+ }
402+ HLSLMin16Uint_t operator /(const HLSLMin16Uint_t &O) const {
403+ return HLSLMin16Uint_t (Val / O.Val );
404+ }
405+ HLSLMin16Uint_t operator %(const HLSLMin16Uint_t &O) const {
406+ return HLSLMin16Uint_t (Val % O.Val );
407+ }
408+ HLSLMin16Uint_t operator &(const HLSLMin16Uint_t &O) const {
409+ return HLSLMin16Uint_t (Val & O.Val );
410+ }
411+ HLSLMin16Uint_t operator |(const HLSLMin16Uint_t &O) const {
412+ return HLSLMin16Uint_t (Val | O.Val );
413+ }
414+ HLSLMin16Uint_t operator ^(const HLSLMin16Uint_t &O) const {
415+ return HLSLMin16Uint_t (Val ^ O.Val );
416+ }
417+ HLSLMin16Uint_t operator <<(const HLSLMin16Uint_t &O) const {
418+ return HLSLMin16Uint_t (Val << O.Val );
419+ }
420+ HLSLMin16Uint_t operator >>(const HLSLMin16Uint_t &O) const {
421+ return HLSLMin16Uint_t (Val >> O.Val );
422+ }
423+ HLSLMin16Uint_t operator ~() const { return HLSLMin16Uint_t (~Val); }
424+ HLSLMin16Uint_t &operator <<=(const HLSLMin16Uint_t &O) {
425+ Val <<= O.Val ;
426+ return *this ;
427+ }
428+ HLSLMin16Uint_t &operator >>=(const HLSLMin16Uint_t &O) {
429+ Val >>= O.Val ;
430+ return *this ;
431+ }
432+ HLSLMin16Uint_t &operator |=(const HLSLMin16Uint_t &O) {
433+ Val |= O.Val ;
434+ return *this ;
435+ }
436+ HLSLMin16Uint_t &operator &=(const HLSLMin16Uint_t &O) {
437+ Val &= O.Val ;
438+ return *this ;
439+ }
440+ HLSLMin16Uint_t &operator ^=(const HLSLMin16Uint_t &O) {
441+ Val ^= O.Val ;
442+ return *this ;
443+ }
444+
445+ bool operator &&(const HLSLMin16Uint_t &O) const { return Val && O.Val ; }
446+ bool operator ||(const HLSLMin16Uint_t &O) const { return Val || O.Val ; }
447+ friend std::wostream &operator <<(std::wostream &Os,
448+ const HLSLMin16Uint_t &Obj) {
449+ Os << Obj.Val ;
450+ return Os;
451+ }
452+ friend std::ostream &operator <<(std::ostream &Os,
453+ const HLSLMin16Uint_t &Obj) {
454+ Os << Obj.Val ;
455+ return Os;
456+ }
457+
458+ uint32_t Val;
459+ };
460+
240461enum class InputSet {
241462#define INPUT_SET (SYMBOL ) SYMBOL,
242463#include " LongVectorOps.def"
@@ -450,6 +671,57 @@ INPUT_SET(InputSet::SelectCond, 0.0, 1.0);
450671INPUT_SET (InputSet::AllOnes, 1.0 );
451672END_INPUT_SETS ()
452673
674+ // Min precision input sets. All values are exactly representable in float16
675+ // to avoid precision mismatch between CPU-side expected values and GPU-side
676+ // min precision computation. No FP specials (INF/NaN/denorm) as min precision
677+ // types do not support them.
678+ BEGIN_INPUT_SETS(HLSLMin16Float_t)
679+ INPUT_SET(InputSet::Default1, -1 .0f , -1 .0f , 1 .0f , -0 .03125f , 1 .0f , -0 .03125f ,
680+ 1 .0f , -0 .03125f , 1 .0f , -0 .03125f );
681+ INPUT_SET (InputSet::Default2, 1 .0f , -1 .0f , 1 .0f , -1 .0f , 1 .0f , -1 .0f , 1 .0f ,
682+ -1 .0f , 1 .0f , -1 .0f );
683+ INPUT_SET (InputSet::Default3, -1 .0f , 1 .0f , -1 .0f , 1 .0f , -1 .0f , 1 .0f , -1 .0f ,
684+ 1 .0f , -1 .0f , 1 .0f );
685+ INPUT_SET (InputSet::Zero, 0 .0f );
686+ INPUT_SET (InputSet::RangeHalfPi, -1 .0625f , 0 .046875f , -1 .046875f , 0 .3125f ,
687+ 1 .4375f , -0 .875f , 1 .375f , -0 .71875f , -0 .8125f , 0 .5625f );
688+ INPUT_SET (InputSet::RangeOne, 0 .328125f , 0 .71875f , -0 .953125f , 0 .671875f ,
689+ -0 .03125f , 0 .5f , 0 .84375f , -0 .671875f , -0 .6875f , -0 .90625f );
690+ INPUT_SET (InputSet::Positive, 1 .0f , 1 .0f , 342 .0f , 0 .03125f , 5504 .0f , 0 .03125f ,
691+ 1 .0f , 0 .03125f , 331 .25f , 3250 .0f );
692+ INPUT_SET (InputSet::SelectCond, 0 .0f , 1 .0f );
693+ INPUT_SET (InputSet::AllOnes, 1 .0f );
694+ END_INPUT_SETS ()
695+
696+ // Values constrained to int16 range. Kept small to avoid overflow ambiguity.
697+ // Shift amounts limited so results fit in int16 (-32768..32767).
698+ BEGIN_INPUT_SETS(HLSLMin16Int_t)
699+ INPUT_SET(InputSet::Default1, -6 , 1 , 7 , 3 , 8 , 4 , -3 , 8 , 8 , -2 );
700+ INPUT_SET (InputSet::Default2, 5 , -6 , -3 , -2 , 9 , 3 , 1 , -3 , -7 , 2 );
701+ INPUT_SET (InputSet::Default3, -5 , 6 , 3 , 2 , -9 , -3 , -1 , 3 , 7 , -2 );
702+ INPUT_SET (InputSet::BitShiftRhs, 1 , 6 , 3 , 0 , 9 , 3 , 12 , 11 , 11 , 14 );
703+ INPUT_SET (InputSet::Zero, 0 );
704+ INPUT_SET (InputSet::NoZero, 1 );
705+ INPUT_SET (InputSet::SelectCond, 0 , 1 );
706+ INPUT_SET (InputSet::AllOnes, 1 );
707+ INPUT_SET (InputSet::WaveMultiPrefixBitwise, 0x0 , 0x1 , 0x3 , 0x4 , 0x10 , 0x12 , 0xF ,
708+ -1 );
709+ END_INPUT_SETS ()
710+
711+ // Values constrained so results stay below 0x8000 (bit 15 clear). WARP may
712+ // compute min precision at 16-bit and sign-extend bit 15 on 32-bit store.
713+ BEGIN_INPUT_SETS(HLSLMin16Uint_t)
714+ INPUT_SET(InputSet::Default1, 3 , 7 , 3 , 5 , 5 , 10 , 4 , 8 , 9 , 10 );
715+ INPUT_SET (InputSet::Default2, 2 , 6 , 3 , 4 , 5 , 9 , 3 , 8 , 9 , 10 );
716+ INPUT_SET (InputSet::Default3, 4 , 5 , 4 , 5 , 3 , 7 , 3 , 1 , 11 , 9 );
717+ INPUT_SET (InputSet::Zero, 0 );
718+ INPUT_SET (InputSet::BitShiftRhs, 1 , 6 , 3 , 0 , 9 , 3 , 8 , 8 , 8 , 8 );
719+ INPUT_SET (InputSet::SelectCond, 0 , 1 );
720+ INPUT_SET (InputSet::AllOnes, 1 );
721+ INPUT_SET (InputSet::WaveMultiPrefixBitwise, 0x0 , 0x1 , 0x3 , 0x4 , 0x10 , 0x12 , 0xF ,
722+ 0x7FFF );
723+ END_INPUT_SETS ()
724+
453725#undef BEGIN_INPUT_SETS
454726#undef INPUT_SET
455727#undef END_INPUT_SETS
0 commit comments