33#include < cstdio>
44#include < cstdlib>
55#include < cstring>
6+ #include < limits>
67#include < memory>
78
89#define HIP_CHECK (call ) \
1516 } \
1617 } while (0 )
1718
18- static double ulp_distance (double a, double b) {
19- if (std::isnan (a) && std::isnan (b))
19+ template <typename T> struct IntRep ;
20+ template <> struct IntRep <double > { using type = int64_t ; };
21+ template <> struct IntRep <float > { using type = int32_t ; };
22+
23+ // Compute ULP distance by reinterpreting same-sign IEEE 754 values as
24+ // integers and taking the absolute difference. This gives the ULP
25+ // distance directly: equal values differ by 0, adjacent values by 1.
26+ // See: https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
27+ template <typename T> static double ulp_distance (T a, T b) {
28+ double da = static_cast <double >(a), db = static_cast <double >(b);
29+ if (std::isnan (da) && std::isnan (db))
2030 return 0.0 ;
21- if (std::isnan (a ) || std::isnan (b ))
31+ if (std::isnan (da ) || std::isnan (db ))
2232 return INFINITY;
2333 if (a == b)
2434 return 0.0 ;
25- int64_t ai, bi;
26- memcpy (&ai, &a, sizeof (double ));
27- memcpy (&bi, &b, sizeof (double ));
35+ using I = typename IntRep<T>::type;
36+ I ai, bi;
37+ memcpy (&ai, &a, sizeof (T));
38+ memcpy (&bi, &b, sizeof (T));
2839 if ((ai < 0 ) != (bi < 0 ))
2940 return INFINITY;
30- return (double )llabs (ai - bi);
41+ int64_t diff = static_cast <int64_t >(ai) - static_cast <int64_t >(bi);
42+ return static_cast <double >(diff < 0 ? -diff : diff);
3143}
3244
3345enum MathFunc { FN_EXP, FN_EXP2, FN_EXP10 };
3446
35- __global__ void compute_exp ( const double *in, double *out, int n,
36- MathFunc fn) {
47+ template < typename T>
48+ __global__ void compute_exp ( const T *in, T *out, int n, MathFunc fn) {
3749 int i = blockIdx.x * blockDim.x + threadIdx.x ;
3850 if (i >= n)
3951 return ;
@@ -50,13 +62,16 @@ __global__ void compute_exp(const double *in, double *out, int n,
5062 }
5163}
5264
53- static double host_exp10 (double x) { return pow (10.0 , x); }
65+ static double host_exp (double x) { return ::exp (x); }
66+ static double host_exp2 (double x) { return ::exp2 (x); }
67+ static double host_exp10 (double x) { return ::pow (10.0 , x); }
5468
69+ template <typename T>
5570static int test_func (const char *name, MathFunc fn,
56- double (*host_fn)(double ), const double *inputs, int n,
71+ double (*host_fn)(double ), const T *inputs, int n,
5772 double max_ulp) {
58- size_t sz = n * sizeof (double );
59- double *d_in, *d_out;
73+ size_t sz = n * sizeof (T );
74+ T *d_in, *d_out;
6075 HIP_CHECK (hipMalloc (&d_in, sz));
6176 HIP_CHECK (hipMalloc (&d_out, sz));
6277 HIP_CHECK (hipMemcpy (d_in, inputs, sz, hipMemcpyHostToDevice));
@@ -66,20 +81,20 @@ static int test_func(const char *name, MathFunc fn,
6681 compute_exp<<<blocks, threads>>>(d_in, d_out, n, fn);
6782 HIP_CHECK (hipDeviceSynchronize ());
6883
69- auto h_out = std::make_unique<double []>(n);
84+ auto h_out = std::make_unique<T []>(n);
7085 HIP_CHECK (hipMemcpy (h_out.get (), d_out, sz, hipMemcpyDeviceToHost));
7186
7287 int errs = 0 ;
7388 double worst_ulp = 0.0 ;
7489 for (int i = 0 ; i < n; i++) {
75- double expected = host_fn (inputs[i]);
90+ T expected = static_cast <T>( host_fn (static_cast < double >( inputs[i])) );
7691 double ulp = ulp_distance (h_out[i], expected);
7792 if (ulp > worst_ulp)
7893 worst_ulp = ulp;
7994 if (ulp > max_ulp) {
8095 if (errs < 10 )
8196 printf (" FAIL %s(%a) = %a, expected %a, ulp = %.0f\n " , name,
82- inputs[i], h_out[i], expected, ulp);
97+ ( double ) inputs[i], ( double ) h_out[i], ( double ) expected, ulp);
8398 errs++;
8499 }
85100 }
@@ -91,33 +106,53 @@ static int test_func(const char *name, MathFunc fn,
91106 return errs;
92107}
93108
94- int main () {
95- const int N_SPECIAL = 12 ;
96- double special[] = {0.0 , -0.0 , 1.0 , -1.0 , 0.5 , -0.5 ,
97- 1e-15 , -1e-15 , 700.0 , -700.0 , 1e-300 , -1e-300 };
109+ template <typename T>
110+ static int test_type (const char *type_name, double max_ulp, double range_lo,
111+ double range_hi) {
112+ T special[] = {
113+ T (0.0 ), T (-0.0 ), T (1.0 ), T (-1.0 ), T (0.5 ), T (-0.5 ), T (1e-5 ), T (-1e-5 ),
114+ std::numeric_limits<T>::infinity (),
115+ -std::numeric_limits<T>::infinity (),
116+ std::numeric_limits<T>::quiet_NaN (),
117+ std::numeric_limits<T>::denorm_min (),
118+ -std::numeric_limits<T>::denorm_min (),
119+ std::numeric_limits<T>::min (),
120+ std::numeric_limits<T>::max (),
121+ };
122+ const int N_SPECIAL = sizeof (special) / sizeof (special[0 ]);
98123
99124 const int N_RANGE = 2048 ;
100- double range[N_RANGE] ;
125+ auto range = std::make_unique<T[]>(N_RANGE) ;
101126 for (int i = 0 ; i < N_RANGE; i++)
102- range[i] = -700.0 + 1400.0 * i / (N_RANGE - 1 );
127+ range[i] =
128+ static_cast <T>(range_lo + (range_hi - range_lo) * i / (N_RANGE - 1 ));
103129
104130 const int N_SMALL = 256 ;
105- double small[N_SMALL] ;
131+ auto small_vals = std::make_unique<T[]>(N_SMALL) ;
106132 for (int i = 0 ; i < N_SMALL; i++)
107- small [i] = -1.0 + 2.0 * i / (N_SMALL - 1 );
133+ small_vals [i] = static_cast <T>( -1.0 + 2.0 * i / (N_SMALL - 1 ) );
108134
109135 int total = N_SPECIAL + N_RANGE + N_SMALL;
110- auto inputs = std::make_unique<double []>(total);
111- memcpy (inputs.get (), special, N_SPECIAL * sizeof (double ));
112- memcpy (inputs.get () + N_SPECIAL, range, N_RANGE * sizeof (double ));
113- memcpy (inputs.get () + N_SPECIAL + N_RANGE, small, N_SMALL * sizeof (double ));
136+ auto inputs = std::make_unique<T[]>(total);
137+ std::copy (special, special + N_SPECIAL, inputs.get ());
138+ std::copy (range.get (), range.get () + N_RANGE, inputs.get () + N_SPECIAL);
139+ std::copy (small_vals.get (), small_vals.get () + N_SMALL,
140+ inputs.get () + N_SPECIAL + N_RANGE);
141+
142+ printf (" Testing %s math accuracy (max %.0f ULP, %d values):\n " , type_name,
143+ max_ulp, total);
144+ int errs = 0 ;
145+ errs += test_func (" exp" , FN_EXP, host_exp, inputs.get (), total, max_ulp);
146+ errs += test_func (" exp2" , FN_EXP2, host_exp2, inputs.get (), total, max_ulp);
147+ errs +=
148+ test_func (" exp10" , FN_EXP10, host_exp10, inputs.get (), total, max_ulp);
149+ return errs;
150+ }
114151
152+ int main () {
115153 int errs = 0 ;
116- double max_ulp = 1.0 ;
117- printf (" Testing f64 math accuracy (max %.0f ULP):\n " , max_ulp);
118- errs += test_func (" exp" , FN_EXP, exp, inputs.get (), total, max_ulp);
119- errs += test_func (" exp2" , FN_EXP2, exp2, inputs.get (), total, max_ulp);
120- errs += test_func (" exp10" , FN_EXP10, host_exp10, inputs.get (), total, max_ulp);
154+ errs += test_type<float >(" f32" , 1.0 , -87.0 , 87.0 );
155+ errs += test_type<double >(" f64" , 1.0 , -700.0 , 700.0 );
121156
122157 if (errs)
123158 printf (" %d total errors\n " , errs);
0 commit comments