Skip to content

Commit b7fcee3

Browse files
committed
Address review: template over float/double, add inf/nan/denorm inputs
- Template ulp_distance, compute_exp kernel, and test_func over T - Add IntRep trait to map float types to same-size integers for ULP - Add special inputs: inf, -inf, NaN, denorm_min, min, max - Use type-appropriate ranges: f32 [-87,87], f64 [-700,700] - Use std::copy instead of memcpy for array assembly - Add comment explaining the ULP integer-subtraction technique
1 parent a51ce39 commit b7fcee3

1 file changed

Lines changed: 69 additions & 33 deletions

File tree

External/HIP/math-ulp-exp.hip

Lines changed: 69 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <cstdio>
44
#include <cstdlib>
55
#include <cstring>
6+
#include <limits>
67
#include <memory>
78

89
#define HIP_CHECK(call) \
@@ -15,25 +16,37 @@
1516
} \
1617
} while (0)
1718

18-
static double ulp_distance(double a, double b) {
19-
if (std::isnan(a) && std::isnan(b))
19+
template <typename T> struct IntRep;
20+
template <> struct IntRep<double> { using type = int64_t; };
21+
template <> struct IntRep<float> { using type = int32_t; };
22+
23+
// Compute ULP distance by reinterpreting float bits as integers and
24+
// subtracting. IEEE 754 same-sign values have monotonically increasing
25+
// integer representations, so the integer difference equals the exact
26+
// count of representable floats between two values.
27+
// See: https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
28+
template <typename T> static double ulp_distance(T a, T b) {
29+
double da = static_cast<double>(a), db = static_cast<double>(b);
30+
if (std::isnan(da) && std::isnan(db))
2031
return 0.0;
21-
if (std::isnan(a) || std::isnan(b))
32+
if (std::isnan(da) || std::isnan(db))
2233
return INFINITY;
2334
if (a == b)
2435
return 0.0;
25-
int64_t ai, bi;
26-
memcpy(&ai, &a, sizeof(double));
27-
memcpy(&bi, &b, sizeof(double));
36+
using I = typename IntRep<T>::type;
37+
I ai, bi;
38+
memcpy(&ai, &a, sizeof(T));
39+
memcpy(&bi, &b, sizeof(T));
2840
if ((ai < 0) != (bi < 0))
2941
return INFINITY;
30-
return (double)llabs(ai - bi);
42+
int64_t diff = static_cast<int64_t>(ai) - static_cast<int64_t>(bi);
43+
return static_cast<double>(diff < 0 ? -diff : diff);
3144
}
3245

3346
enum MathFunc { FN_EXP, FN_EXP2, FN_EXP10 };
3447

35-
__global__ void compute_exp(const double *in, double *out, int n,
36-
MathFunc fn) {
48+
template <typename T>
49+
__global__ void compute_exp(const T *in, T *out, int n, MathFunc fn) {
3750
int i = blockIdx.x * blockDim.x + threadIdx.x;
3851
if (i >= n)
3952
return;
@@ -50,13 +63,16 @@ __global__ void compute_exp(const double *in, double *out, int n,
5063
}
5164
}
5265

53-
static double host_exp10(double x) { return pow(10.0, x); }
66+
static double host_exp(double x) { return ::exp(x); }
67+
static double host_exp2(double x) { return ::exp2(x); }
68+
static double host_exp10(double x) { return ::pow(10.0, x); }
5469

70+
template <typename T>
5571
static int test_func(const char *name, MathFunc fn,
56-
double (*host_fn)(double), const double *inputs, int n,
72+
double (*host_fn)(double), const T *inputs, int n,
5773
double max_ulp) {
58-
size_t sz = n * sizeof(double);
59-
double *d_in, *d_out;
74+
size_t sz = n * sizeof(T);
75+
T *d_in, *d_out;
6076
HIP_CHECK(hipMalloc(&d_in, sz));
6177
HIP_CHECK(hipMalloc(&d_out, sz));
6278
HIP_CHECK(hipMemcpy(d_in, inputs, sz, hipMemcpyHostToDevice));
@@ -66,20 +82,20 @@ static int test_func(const char *name, MathFunc fn,
6682
compute_exp<<<blocks, threads>>>(d_in, d_out, n, fn);
6783
HIP_CHECK(hipDeviceSynchronize());
6884

69-
auto h_out = std::make_unique<double[]>(n);
85+
auto h_out = std::make_unique<T[]>(n);
7086
HIP_CHECK(hipMemcpy(h_out.get(), d_out, sz, hipMemcpyDeviceToHost));
7187

7288
int errs = 0;
7389
double worst_ulp = 0.0;
7490
for (int i = 0; i < n; i++) {
75-
double expected = host_fn(inputs[i]);
91+
T expected = static_cast<T>(host_fn(static_cast<double>(inputs[i])));
7692
double ulp = ulp_distance(h_out[i], expected);
7793
if (ulp > worst_ulp)
7894
worst_ulp = ulp;
7995
if (ulp > max_ulp) {
8096
if (errs < 10)
8197
printf(" FAIL %s(%a) = %a, expected %a, ulp = %.0f\n", name,
82-
inputs[i], h_out[i], expected, ulp);
98+
(double)inputs[i], (double)h_out[i], (double)expected, ulp);
8399
errs++;
84100
}
85101
}
@@ -91,33 +107,53 @@ static int test_func(const char *name, MathFunc fn,
91107
return errs;
92108
}
93109

94-
int main() {
95-
const int N_SPECIAL = 12;
96-
double special[] = {0.0, -0.0, 1.0, -1.0, 0.5, -0.5,
97-
1e-15, -1e-15, 700.0, -700.0, 1e-300, -1e-300};
110+
template <typename T>
111+
static int test_type(const char *type_name, double max_ulp, double range_lo,
112+
double range_hi) {
113+
T special[] = {
114+
T(0.0), T(-0.0), T(1.0), T(-1.0), T(0.5), T(-0.5), T(1e-5), T(-1e-5),
115+
std::numeric_limits<T>::infinity(),
116+
-std::numeric_limits<T>::infinity(),
117+
std::numeric_limits<T>::quiet_NaN(),
118+
std::numeric_limits<T>::denorm_min(),
119+
-std::numeric_limits<T>::denorm_min(),
120+
std::numeric_limits<T>::min(),
121+
std::numeric_limits<T>::max(),
122+
};
123+
const int N_SPECIAL = sizeof(special) / sizeof(special[0]);
98124

99125
const int N_RANGE = 2048;
100-
double range[N_RANGE];
126+
auto range = std::make_unique<T[]>(N_RANGE);
101127
for (int i = 0; i < N_RANGE; i++)
102-
range[i] = -700.0 + 1400.0 * i / (N_RANGE - 1);
128+
range[i] =
129+
static_cast<T>(range_lo + (range_hi - range_lo) * i / (N_RANGE - 1));
103130

104131
const int N_SMALL = 256;
105-
double small[N_SMALL];
132+
auto small_vals = std::make_unique<T[]>(N_SMALL);
106133
for (int i = 0; i < N_SMALL; i++)
107-
small[i] = -1.0 + 2.0 * i / (N_SMALL - 1);
134+
small_vals[i] = static_cast<T>(-1.0 + 2.0 * i / (N_SMALL - 1));
108135

109136
int total = N_SPECIAL + N_RANGE + N_SMALL;
110-
auto inputs = std::make_unique<double[]>(total);
111-
memcpy(inputs.get(), special, N_SPECIAL * sizeof(double));
112-
memcpy(inputs.get() + N_SPECIAL, range, N_RANGE * sizeof(double));
113-
memcpy(inputs.get() + N_SPECIAL + N_RANGE, small, N_SMALL * sizeof(double));
137+
auto inputs = std::make_unique<T[]>(total);
138+
std::copy(special, special + N_SPECIAL, inputs.get());
139+
std::copy(range.get(), range.get() + N_RANGE, inputs.get() + N_SPECIAL);
140+
std::copy(small_vals.get(), small_vals.get() + N_SMALL,
141+
inputs.get() + N_SPECIAL + N_RANGE);
142+
143+
printf("Testing %s math accuracy (max %.0f ULP, %d values):\n", type_name,
144+
max_ulp, total);
145+
int errs = 0;
146+
errs += test_func("exp", FN_EXP, host_exp, inputs.get(), total, max_ulp);
147+
errs += test_func("exp2", FN_EXP2, host_exp2, inputs.get(), total, max_ulp);
148+
errs +=
149+
test_func("exp10", FN_EXP10, host_exp10, inputs.get(), total, max_ulp);
150+
return errs;
151+
}
114152

153+
int main() {
115154
int errs = 0;
116-
double max_ulp = 1.0;
117-
printf("Testing f64 math accuracy (max %.0f ULP):\n", max_ulp);
118-
errs += test_func("exp", FN_EXP, exp, inputs.get(), total, max_ulp);
119-
errs += test_func("exp2", FN_EXP2, exp2, inputs.get(), total, max_ulp);
120-
errs += test_func("exp10", FN_EXP10, host_exp10, inputs.get(), total, max_ulp);
155+
errs += test_type<float>("f32", 1.0, -87.0, 87.0);
156+
errs += test_type<double>("f64", 1.0, -700.0, 700.0);
121157

122158
if (errs)
123159
printf("%d total errors\n", errs);

0 commit comments

Comments
 (0)