Skip to content

Commit 1915521

Browse files
committed
Address review: template over float/double, add inf/nan/denorm inputs
- Template ulp_distance, compute_exp kernel, and test_func over T - Add IntRep trait to map float types to same-size integers for ULP - Add special inputs: inf, -inf, NaN, denorm_min, min, max - Use type-appropriate ranges: f32 [-87,87], f64 [-700,700] - Use std::copy instead of memcpy for array assembly - Add comment explaining the ULP integer-subtraction technique
1 parent a51ce39 commit 1915521

1 file changed

Lines changed: 68 additions & 33 deletions

File tree

External/HIP/math-ulp-exp.hip

Lines changed: 68 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <cstdio>
44
#include <cstdlib>
55
#include <cstring>
6+
#include <limits>
67
#include <memory>
78

89
#define HIP_CHECK(call) \
@@ -15,25 +16,36 @@
1516
} \
1617
} while (0)
1718

18-
static double ulp_distance(double a, double b) {
19-
if (std::isnan(a) && std::isnan(b))
19+
template <typename T> struct IntRep;
20+
template <> struct IntRep<double> { using type = int64_t; };
21+
template <> struct IntRep<float> { using type = int32_t; };
22+
23+
// Compute ULP distance by reinterpreting same-sign IEEE 754 values as
24+
// integers and taking the absolute difference. This gives the ULP
25+
// distance directly: equal values differ by 0, adjacent values by 1.
26+
// See: https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
27+
template <typename T> static double ulp_distance(T a, T b) {
28+
double da = static_cast<double>(a), db = static_cast<double>(b);
29+
if (std::isnan(da) && std::isnan(db))
2030
return 0.0;
21-
if (std::isnan(a) || std::isnan(b))
31+
if (std::isnan(da) || std::isnan(db))
2232
return INFINITY;
2333
if (a == b)
2434
return 0.0;
25-
int64_t ai, bi;
26-
memcpy(&ai, &a, sizeof(double));
27-
memcpy(&bi, &b, sizeof(double));
35+
using I = typename IntRep<T>::type;
36+
I ai, bi;
37+
memcpy(&ai, &a, sizeof(T));
38+
memcpy(&bi, &b, sizeof(T));
2839
if ((ai < 0) != (bi < 0))
2940
return INFINITY;
30-
return (double)llabs(ai - bi);
41+
int64_t diff = static_cast<int64_t>(ai) - static_cast<int64_t>(bi);
42+
return static_cast<double>(diff < 0 ? -diff : diff);
3143
}
3244

3345
enum MathFunc { FN_EXP, FN_EXP2, FN_EXP10 };
3446

35-
__global__ void compute_exp(const double *in, double *out, int n,
36-
MathFunc fn) {
47+
template <typename T>
48+
__global__ void compute_exp(const T *in, T *out, int n, MathFunc fn) {
3749
int i = blockIdx.x * blockDim.x + threadIdx.x;
3850
if (i >= n)
3951
return;
@@ -50,13 +62,16 @@ __global__ void compute_exp(const double *in, double *out, int n,
5062
}
5163
}
5264

53-
static double host_exp10(double x) { return pow(10.0, x); }
65+
static double host_exp(double x) { return ::exp(x); }
66+
static double host_exp2(double x) { return ::exp2(x); }
67+
static double host_exp10(double x) { return ::pow(10.0, x); }
5468

69+
template <typename T>
5570
static int test_func(const char *name, MathFunc fn,
56-
double (*host_fn)(double), const double *inputs, int n,
71+
double (*host_fn)(double), const T *inputs, int n,
5772
double max_ulp) {
58-
size_t sz = n * sizeof(double);
59-
double *d_in, *d_out;
73+
size_t sz = n * sizeof(T);
74+
T *d_in, *d_out;
6075
HIP_CHECK(hipMalloc(&d_in, sz));
6176
HIP_CHECK(hipMalloc(&d_out, sz));
6277
HIP_CHECK(hipMemcpy(d_in, inputs, sz, hipMemcpyHostToDevice));
@@ -66,20 +81,20 @@ static int test_func(const char *name, MathFunc fn,
6681
compute_exp<<<blocks, threads>>>(d_in, d_out, n, fn);
6782
HIP_CHECK(hipDeviceSynchronize());
6883

69-
auto h_out = std::make_unique<double[]>(n);
84+
auto h_out = std::make_unique<T[]>(n);
7085
HIP_CHECK(hipMemcpy(h_out.get(), d_out, sz, hipMemcpyDeviceToHost));
7186

7287
int errs = 0;
7388
double worst_ulp = 0.0;
7489
for (int i = 0; i < n; i++) {
75-
double expected = host_fn(inputs[i]);
90+
T expected = static_cast<T>(host_fn(static_cast<double>(inputs[i])));
7691
double ulp = ulp_distance(h_out[i], expected);
7792
if (ulp > worst_ulp)
7893
worst_ulp = ulp;
7994
if (ulp > max_ulp) {
8095
if (errs < 10)
8196
printf(" FAIL %s(%a) = %a, expected %a, ulp = %.0f\n", name,
82-
inputs[i], h_out[i], expected, ulp);
97+
(double)inputs[i], (double)h_out[i], (double)expected, ulp);
8398
errs++;
8499
}
85100
}
@@ -91,33 +106,53 @@ static int test_func(const char *name, MathFunc fn,
91106
return errs;
92107
}
93108

94-
int main() {
95-
const int N_SPECIAL = 12;
96-
double special[] = {0.0, -0.0, 1.0, -1.0, 0.5, -0.5,
97-
1e-15, -1e-15, 700.0, -700.0, 1e-300, -1e-300};
109+
template <typename T>
110+
static int test_type(const char *type_name, double max_ulp, double range_lo,
111+
double range_hi) {
112+
T special[] = {
113+
T(0.0), T(-0.0), T(1.0), T(-1.0), T(0.5), T(-0.5), T(1e-5), T(-1e-5),
114+
std::numeric_limits<T>::infinity(),
115+
-std::numeric_limits<T>::infinity(),
116+
std::numeric_limits<T>::quiet_NaN(),
117+
std::numeric_limits<T>::denorm_min(),
118+
-std::numeric_limits<T>::denorm_min(),
119+
std::numeric_limits<T>::min(),
120+
std::numeric_limits<T>::max(),
121+
};
122+
const int N_SPECIAL = sizeof(special) / sizeof(special[0]);
98123

99124
const int N_RANGE = 2048;
100-
double range[N_RANGE];
125+
auto range = std::make_unique<T[]>(N_RANGE);
101126
for (int i = 0; i < N_RANGE; i++)
102-
range[i] = -700.0 + 1400.0 * i / (N_RANGE - 1);
127+
range[i] =
128+
static_cast<T>(range_lo + (range_hi - range_lo) * i / (N_RANGE - 1));
103129

104130
const int N_SMALL = 256;
105-
double small[N_SMALL];
131+
auto small_vals = std::make_unique<T[]>(N_SMALL);
106132
for (int i = 0; i < N_SMALL; i++)
107-
small[i] = -1.0 + 2.0 * i / (N_SMALL - 1);
133+
small_vals[i] = static_cast<T>(-1.0 + 2.0 * i / (N_SMALL - 1));
108134

109135
int total = N_SPECIAL + N_RANGE + N_SMALL;
110-
auto inputs = std::make_unique<double[]>(total);
111-
memcpy(inputs.get(), special, N_SPECIAL * sizeof(double));
112-
memcpy(inputs.get() + N_SPECIAL, range, N_RANGE * sizeof(double));
113-
memcpy(inputs.get() + N_SPECIAL + N_RANGE, small, N_SMALL * sizeof(double));
136+
auto inputs = std::make_unique<T[]>(total);
137+
std::copy(special, special + N_SPECIAL, inputs.get());
138+
std::copy(range.get(), range.get() + N_RANGE, inputs.get() + N_SPECIAL);
139+
std::copy(small_vals.get(), small_vals.get() + N_SMALL,
140+
inputs.get() + N_SPECIAL + N_RANGE);
141+
142+
printf("Testing %s math accuracy (max %.0f ULP, %d values):\n", type_name,
143+
max_ulp, total);
144+
int errs = 0;
145+
errs += test_func("exp", FN_EXP, host_exp, inputs.get(), total, max_ulp);
146+
errs += test_func("exp2", FN_EXP2, host_exp2, inputs.get(), total, max_ulp);
147+
errs +=
148+
test_func("exp10", FN_EXP10, host_exp10, inputs.get(), total, max_ulp);
149+
return errs;
150+
}
114151

152+
int main() {
115153
int errs = 0;
116-
double max_ulp = 1.0;
117-
printf("Testing f64 math accuracy (max %.0f ULP):\n", max_ulp);
118-
errs += test_func("exp", FN_EXP, exp, inputs.get(), total, max_ulp);
119-
errs += test_func("exp2", FN_EXP2, exp2, inputs.get(), total, max_ulp);
120-
errs += test_func("exp10", FN_EXP10, host_exp10, inputs.get(), total, max_ulp);
154+
errs += test_type<float>("f32", 1.0, -87.0, 87.0);
155+
errs += test_type<double>("f64", 1.0, -700.0, 700.0);
121156

122157
if (errs)
123158
printf("%d total errors\n", errs);

0 commit comments

Comments
 (0)