33#include < cstdio>
44#include < cstdlib>
55#include < cstring>
6+ #include < limits>
67#include < memory>
78
89#define HIP_CHECK (call ) \
1516 } \
1617 } while (0 )
1718
18- static double ulp_distance (double a, double b) {
19- if (std::isnan (a) && std::isnan (b))
19+ template <typename T> struct IntRep ;
20+ template <> struct IntRep <double > { using type = int64_t ; };
21+ template <> struct IntRep <float > { using type = int32_t ; };
22+
23+ // Compute ULP distance by reinterpreting float bits as integers and
24+ // subtracting. IEEE 754 same-sign values have monotonically increasing
25+ // integer representations, so the integer difference equals the exact
26+ // count of representable floats between two values.
27+ // See: https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
28+ template <typename T> static double ulp_distance (T a, T b) {
29+ double da = static_cast <double >(a), db = static_cast <double >(b);
30+ if (std::isnan (da) && std::isnan (db))
2031 return 0.0 ;
21- if (std::isnan (a ) || std::isnan (b ))
32+ if (std::isnan (da ) || std::isnan (db ))
2233 return INFINITY;
2334 if (a == b)
2435 return 0.0 ;
25- int64_t ai, bi;
26- memcpy (&ai, &a, sizeof (double ));
27- memcpy (&bi, &b, sizeof (double ));
36+ using I = typename IntRep<T>::type;
37+ I ai, bi;
38+ memcpy (&ai, &a, sizeof (T));
39+ memcpy (&bi, &b, sizeof (T));
2840 if ((ai < 0 ) != (bi < 0 ))
2941 return INFINITY;
30- return (double )llabs (ai - bi);
42+ int64_t diff = static_cast <int64_t >(ai) - static_cast <int64_t >(bi);
43+ return static_cast <double >(diff < 0 ? -diff : diff);
3144}
3245
3346enum MathFunc { FN_EXP, FN_EXP2, FN_EXP10 };
3447
35- __global__ void compute_exp ( const double *in, double *out, int n,
36- MathFunc fn) {
48+ template < typename T>
49+ __global__ void compute_exp ( const T *in, T *out, int n, MathFunc fn) {
3750 int i = blockIdx.x * blockDim.x + threadIdx.x ;
3851 if (i >= n)
3952 return ;
@@ -50,13 +63,16 @@ __global__ void compute_exp(const double *in, double *out, int n,
5063 }
5164}
5265
53- static double host_exp10 (double x) { return pow (10.0 , x); }
66+ static double host_exp (double x) { return ::exp (x); }
67+ static double host_exp2 (double x) { return ::exp2 (x); }
68+ static double host_exp10 (double x) { return ::pow (10.0 , x); }
5469
70+ template <typename T>
5571static int test_func (const char *name, MathFunc fn,
56- double (*host_fn)(double ), const double *inputs, int n,
72+ double (*host_fn)(double ), const T *inputs, int n,
5773 double max_ulp) {
58- size_t sz = n * sizeof (double );
59- double *d_in, *d_out;
74+ size_t sz = n * sizeof (T );
75+ T *d_in, *d_out;
6076 HIP_CHECK (hipMalloc (&d_in, sz));
6177 HIP_CHECK (hipMalloc (&d_out, sz));
6278 HIP_CHECK (hipMemcpy (d_in, inputs, sz, hipMemcpyHostToDevice));
@@ -66,20 +82,20 @@ static int test_func(const char *name, MathFunc fn,
6682 compute_exp<<<blocks, threads>>>(d_in, d_out, n, fn);
6783 HIP_CHECK (hipDeviceSynchronize ());
6884
69- auto h_out = std::make_unique<double []>(n);
85+ auto h_out = std::make_unique<T []>(n);
7086 HIP_CHECK (hipMemcpy (h_out.get (), d_out, sz, hipMemcpyDeviceToHost));
7187
7288 int errs = 0 ;
7389 double worst_ulp = 0.0 ;
7490 for (int i = 0 ; i < n; i++) {
75- double expected = host_fn (inputs[i]);
91+ T expected = static_cast <T>( host_fn (static_cast < double >( inputs[i])) );
7692 double ulp = ulp_distance (h_out[i], expected);
7793 if (ulp > worst_ulp)
7894 worst_ulp = ulp;
7995 if (ulp > max_ulp) {
8096 if (errs < 10 )
8197 printf (" FAIL %s(%a) = %a, expected %a, ulp = %.0f\n " , name,
82- inputs[i], h_out[i], expected, ulp);
98+ ( double ) inputs[i], ( double ) h_out[i], ( double ) expected, ulp);
8399 errs++;
84100 }
85101 }
@@ -91,33 +107,53 @@ static int test_func(const char *name, MathFunc fn,
91107 return errs;
92108}
93109
94- int main () {
95- const int N_SPECIAL = 12 ;
96- double special[] = {0.0 , -0.0 , 1.0 , -1.0 , 0.5 , -0.5 ,
97- 1e-15 , -1e-15 , 700.0 , -700.0 , 1e-300 , -1e-300 };
110+ template <typename T>
111+ static int test_type (const char *type_name, double max_ulp, double range_lo,
112+ double range_hi) {
113+ T special[] = {
114+ T (0.0 ), T (-0.0 ), T (1.0 ), T (-1.0 ), T (0.5 ), T (-0.5 ), T (1e-5 ), T (-1e-5 ),
115+ std::numeric_limits<T>::infinity (),
116+ -std::numeric_limits<T>::infinity (),
117+ std::numeric_limits<T>::quiet_NaN (),
118+ std::numeric_limits<T>::denorm_min (),
119+ -std::numeric_limits<T>::denorm_min (),
120+ std::numeric_limits<T>::min (),
121+ std::numeric_limits<T>::max (),
122+ };
123+ const int N_SPECIAL = sizeof (special) / sizeof (special[0 ]);
98124
99125 const int N_RANGE = 2048 ;
100- double range[N_RANGE] ;
126+ auto range = std::make_unique<T[]>(N_RANGE) ;
101127 for (int i = 0 ; i < N_RANGE; i++)
102- range[i] = -700.0 + 1400.0 * i / (N_RANGE - 1 );
128+ range[i] =
129+ static_cast <T>(range_lo + (range_hi - range_lo) * i / (N_RANGE - 1 ));
103130
104131 const int N_SMALL = 256 ;
105- double small[N_SMALL] ;
132+ auto small_vals = std::make_unique<T[]>(N_SMALL) ;
106133 for (int i = 0 ; i < N_SMALL; i++)
107- small [i] = -1.0 + 2.0 * i / (N_SMALL - 1 );
134+ small_vals [i] = static_cast <T>( -1.0 + 2.0 * i / (N_SMALL - 1 ) );
108135
109136 int total = N_SPECIAL + N_RANGE + N_SMALL;
110- auto inputs = std::make_unique<double []>(total);
111- memcpy (inputs.get (), special, N_SPECIAL * sizeof (double ));
112- memcpy (inputs.get () + N_SPECIAL, range, N_RANGE * sizeof (double ));
113- memcpy (inputs.get () + N_SPECIAL + N_RANGE, small, N_SMALL * sizeof (double ));
137+ auto inputs = std::make_unique<T[]>(total);
138+ std::copy (special, special + N_SPECIAL, inputs.get ());
139+ std::copy (range.get (), range.get () + N_RANGE, inputs.get () + N_SPECIAL);
140+ std::copy (small_vals.get (), small_vals.get () + N_SMALL,
141+ inputs.get () + N_SPECIAL + N_RANGE);
142+
143+ printf (" Testing %s math accuracy (max %.0f ULP, %d values):\n " , type_name,
144+ max_ulp, total);
145+ int errs = 0 ;
146+ errs += test_func (" exp" , FN_EXP, host_exp, inputs.get (), total, max_ulp);
147+ errs += test_func (" exp2" , FN_EXP2, host_exp2, inputs.get (), total, max_ulp);
148+ errs +=
149+ test_func (" exp10" , FN_EXP10, host_exp10, inputs.get (), total, max_ulp);
150+ return errs;
151+ }
114152
153+ int main () {
115154 int errs = 0 ;
116- double max_ulp = 1.0 ;
117- printf (" Testing f64 math accuracy (max %.0f ULP):\n " , max_ulp);
118- errs += test_func (" exp" , FN_EXP, exp, inputs.get (), total, max_ulp);
119- errs += test_func (" exp2" , FN_EXP2, exp2, inputs.get (), total, max_ulp);
120- errs += test_func (" exp10" , FN_EXP10, host_exp10, inputs.get (), total, max_ulp);
155+ errs += test_type<float >(" f32" , 1.0 , -87.0 , 87.0 );
156+ errs += test_type<double >(" f64" , 1.0 , -700.0 , 700.0 );
121157
122158 if (errs)
123159 printf (" %d total errors\n " , errs);
0 commit comments