Address review: template over float/double, add inf/nan/denorm inputs

yxsamliu · yxsamliu · commit b7fcee3e1d52 · 2026-03-21T14:06:02.000-04:00
- Template ulp_distance, compute_exp kernel, and test_func over T
- Add IntRep trait to map float types to same-size integers for ULP
- Add special inputs: inf, -inf, NaN, denorm_min, min, max
- Use type-appropriate ranges: f32 [-87,87], f64 [-700,700]
- Use std::copy instead of memcpy for array assembly
- Add comment explaining the ULP integer-subtraction technique
diff --git a/External/HIP/math-ulp-exp.hip b/External/HIP/math-ulp-exp.hip
@@ -3,6 +3,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <limits>
 #include <memory>
 
 #define HIP_CHECK(call)                                                        \
@@ -15,25 +16,37 @@
     }                                                                          \
   } while (0)
 
-static double ulp_distance(double a, double b) {
-  if (std::isnan(a) && std::isnan(b))
+template <typename T> struct IntRep;
+template <> struct IntRep<double> { using type = int64_t; };
+template <> struct IntRep<float> { using type = int32_t; };
+
+// Compute ULP distance by reinterpreting float bits as integers and
+// subtracting. IEEE 754 same-sign values have monotonically increasing
+// integer representations, so the integer difference equals the exact
+// count of representable floats between two values.
+// See: https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
+template <typename T> static double ulp_distance(T a, T b) {
+  double da = static_cast<double>(a), db = static_cast<double>(b);
+  if (std::isnan(da) && std::isnan(db))
     return 0.0;
-  if (std::isnan(a) || std::isnan(b))
+  if (std::isnan(da) || std::isnan(db))
     return INFINITY;
   if (a == b)
     return 0.0;
-  int64_t ai, bi;
-  memcpy(&ai, &a, sizeof(double));
-  memcpy(&bi, &b, sizeof(double));
+  using I = typename IntRep<T>::type;
+  I ai, bi;
+  memcpy(&ai, &a, sizeof(T));
+  memcpy(&bi, &b, sizeof(T));
   if ((ai < 0) != (bi < 0))
     return INFINITY;
-  return (double)llabs(ai - bi);
+  int64_t diff = static_cast<int64_t>(ai) - static_cast<int64_t>(bi);
+  return static_cast<double>(diff < 0 ? -diff : diff);
 }
 
 enum MathFunc { FN_EXP, FN_EXP2, FN_EXP10 };
 
-__global__ void compute_exp(const double *in, double *out, int n,
-                            MathFunc fn) {
+template <typename T>
+__global__ void compute_exp(const T *in, T *out, int n, MathFunc fn) {
   int i = blockIdx.x * blockDim.x + threadIdx.x;
   if (i >= n)
     return;
@@ -50,13 +63,16 @@ __global__ void compute_exp(const double *in, double *out, int n,
   }
 }
 
-static double host_exp10(double x) { return pow(10.0, x); }
+static double host_exp(double x) { return ::exp(x); }
+static double host_exp2(double x) { return ::exp2(x); }
+static double host_exp10(double x) { return ::pow(10.0, x); }
 
+template <typename T>
 static int test_func(const char *name, MathFunc fn,
-                     double (*host_fn)(double), const double *inputs, int n,
+                     double (*host_fn)(double), const T *inputs, int n,
                      double max_ulp) {
-  size_t sz = n * sizeof(double);
-  double *d_in, *d_out;
+  size_t sz = n * sizeof(T);
+  T *d_in, *d_out;
   HIP_CHECK(hipMalloc(&d_in, sz));
   HIP_CHECK(hipMalloc(&d_out, sz));
   HIP_CHECK(hipMemcpy(d_in, inputs, sz, hipMemcpyHostToDevice));
@@ -66,20 +82,20 @@ static int test_func(const char *name, MathFunc fn,
   compute_exp<<<blocks, threads>>>(d_in, d_out, n, fn);
   HIP_CHECK(hipDeviceSynchronize());
 
-  auto h_out = std::make_unique<double[]>(n);
+  auto h_out = std::make_unique<T[]>(n);
   HIP_CHECK(hipMemcpy(h_out.get(), d_out, sz, hipMemcpyDeviceToHost));
 
   int errs = 0;
   double worst_ulp = 0.0;
   for (int i = 0; i < n; i++) {
-    double expected = host_fn(inputs[i]);
+    T expected = static_cast<T>(host_fn(static_cast<double>(inputs[i])));
     double ulp = ulp_distance(h_out[i], expected);
     if (ulp > worst_ulp)
       worst_ulp = ulp;
     if (ulp > max_ulp) {
       if (errs < 10)
         printf("  FAIL %s(%a) = %a, expected %a, ulp = %.0f\n", name,
-               inputs[i], h_out[i], expected, ulp);
+               (double)inputs[i], (double)h_out[i], (double)expected, ulp);
       errs++;
     }
   }
@@ -91,33 +107,53 @@ static int test_func(const char *name, MathFunc fn,
   return errs;
 }
 
-int main() {
-  const int N_SPECIAL = 12;
-  double special[] = {0.0,  -0.0, 1.0,  -1.0,  0.5,   -0.5,
-                      1e-15, -1e-15, 700.0, -700.0, 1e-300, -1e-300};
+template <typename T>
+static int test_type(const char *type_name, double max_ulp, double range_lo,
+                     double range_hi) {
+  T special[] = {
+      T(0.0),  T(-0.0), T(1.0),  T(-1.0), T(0.5), T(-0.5), T(1e-5), T(-1e-5),
+      std::numeric_limits<T>::infinity(),
+      -std::numeric_limits<T>::infinity(),
+      std::numeric_limits<T>::quiet_NaN(),
+      std::numeric_limits<T>::denorm_min(),
+      -std::numeric_limits<T>::denorm_min(),
+      std::numeric_limits<T>::min(),
+      std::numeric_limits<T>::max(),
+  };
+  const int N_SPECIAL = sizeof(special) / sizeof(special[0]);
 
   const int N_RANGE = 2048;
-  double range[N_RANGE];
+  auto range = std::make_unique<T[]>(N_RANGE);
   for (int i = 0; i < N_RANGE; i++)
-    range[i] = -700.0 + 1400.0 * i / (N_RANGE - 1);
+    range[i] =
+        static_cast<T>(range_lo + (range_hi - range_lo) * i / (N_RANGE - 1));
 
   const int N_SMALL = 256;
-  double small[N_SMALL];
+  auto small_vals = std::make_unique<T[]>(N_SMALL);
   for (int i = 0; i < N_SMALL; i++)
-    small[i] = -1.0 + 2.0 * i / (N_SMALL - 1);
+    small_vals[i] = static_cast<T>(-1.0 + 2.0 * i / (N_SMALL - 1));
 
   int total = N_SPECIAL + N_RANGE + N_SMALL;
-  auto inputs = std::make_unique<double[]>(total);
-  memcpy(inputs.get(), special, N_SPECIAL * sizeof(double));
-  memcpy(inputs.get() + N_SPECIAL, range, N_RANGE * sizeof(double));
-  memcpy(inputs.get() + N_SPECIAL + N_RANGE, small, N_SMALL * sizeof(double));
+  auto inputs = std::make_unique<T[]>(total);
+  std::copy(special, special + N_SPECIAL, inputs.get());
+  std::copy(range.get(), range.get() + N_RANGE, inputs.get() + N_SPECIAL);
+  std::copy(small_vals.get(), small_vals.get() + N_SMALL,
+            inputs.get() + N_SPECIAL + N_RANGE);
+
+  printf("Testing %s math accuracy (max %.0f ULP, %d values):\n", type_name,
+         max_ulp, total);
+  int errs = 0;
+  errs += test_func("exp", FN_EXP, host_exp, inputs.get(), total, max_ulp);
+  errs += test_func("exp2", FN_EXP2, host_exp2, inputs.get(), total, max_ulp);
+  errs +=
+      test_func("exp10", FN_EXP10, host_exp10, inputs.get(), total, max_ulp);
+  return errs;
+}
 
+int main() {
   int errs = 0;
-  double max_ulp = 1.0;
-  printf("Testing f64 math accuracy (max %.0f ULP):\n", max_ulp);
-  errs += test_func("exp", FN_EXP, exp, inputs.get(), total, max_ulp);
-  errs += test_func("exp2", FN_EXP2, exp2, inputs.get(), total, max_ulp);
-  errs += test_func("exp10", FN_EXP10, host_exp10, inputs.get(), total, max_ulp);
+  errs += test_type<float>("f32", 1.0, -87.0, 87.0);
+  errs += test_type<double>("f64", 1.0, -700.0, 700.0);
 
   if (errs)
     printf("%d total errors\n", errs);