DedalusProject · kburns · May 27, 2025 · May 27, 2025 · May 27, 2025 · May 27, 2025
diff --git a/dedalus/core/arithmetic.py b/dedalus/core/arithmetic.py
@@ -13,6 +13,7 @@
 import numexpr as ne
 from collections import defaultdict
 from math import prod
+import array_api_compat
 
 from .domain import Domain
 from .field import Operand, Field
@@ -245,10 +246,11 @@ def choose_layout(self):
 
     def operate(self, out):
         """Perform operation."""
+        xp = self.array_namespace
         arg0, arg1 = self.args
         # Set output layout
         out.preset_layout(arg0.layout)
-        np.add(arg0.data, arg1.data, out=out.data)
+        xp.add(arg0.data, arg1.data, out=out.data)
 
 
 # used for einsum string manipulation
@@ -616,6 +618,7 @@ def __init__(self, arg0, arg1, indices=(-1,0), out=None, **kw):
         arg2_str = arg2_str.replace(arg2_str[indices[1]], 'z')
         out_str = (arg1_str + arg2_str).replace('z', '')
         self.einsum_str = arg1_str + '...,' + arg2_str + '...->' + out_str + '...'
+        self.einsum_path = None
 
     def _check_indices(self, arg0, arg1, indices):
         if (not isinstance(arg0, Operand)) or (not isinstance(arg1, Operand)):
@@ -664,14 +667,26 @@ def GammaCoord(self, A_tensorsig, B_tensorsig, C_tensorsig):
         return G
 
     def operate(self, out):
+        xp = self.array_namespace
         arg0, arg1 = self.args
         out.preset_layout(arg0.layout)
         # Broadcast
         arg0_data = self.arg0_ghost_broadcaster.cast(arg0)
         arg1_data = self.arg1_ghost_broadcaster.cast(arg1)
         # Call einsum
         if out.data.size:
-            np.einsum(self.einsum_str, arg0_data, arg1_data, out=out.data, optimize=True)
+            if array_api_compat.is_cupy_namespace(xp):
+                if self.einsum_path is None:
+                    self.einsum_path = self.get_einsum_path(xp.asnumpy(arg0_data), xp.asnumpy(arg1_data))
+                # Cupy does not support output keyword
+                out.data[:] = xp.einsum(self.einsum_str, arg0_data, arg1_data, optimize=self.einsum_path)
+            else:
+                if self.einsum_path is None:
+                    self.einsum_path = self.get_einsum_path(arg0_data, arg1_data)
+                xp.einsum(self.einsum_str, arg0_data, arg1_data, out=out.data, optimize=self.einsum_path)
+
+    def get_einsum_path(self, arg0_data, arg1_data):
+        return np.einsum_path(self.einsum_str, arg0_data, arg1_data, optimize="optimal")[0]
 
 
 @alias("cross")
@@ -854,6 +869,7 @@ def __init__(self, arg0, arg1, out=None, **kw):
 
     def operate(self, out):
         """Perform operation."""
+        xp = self.array_namespace
         arg0, arg1 = self.args
         # Set output layout
         out.preset_layout(arg0.layout)
@@ -863,7 +879,7 @@ def operate(self, out):
         # Reshape arg data to broadcast properly for output tensorsig
         arg0_exp_data = arg0_data.reshape(self.arg0_exp_tshape + arg0_data.shape[len(arg0.tensorsig):])
         arg1_exp_data = arg1_data.reshape(self.arg1_exp_tshape + arg1_data.shape[len(arg1.tensorsig):])
-        np.multiply(arg0_exp_data, arg1_exp_data, out=out.data)
+        xp.multiply(arg0_exp_data, arg1_exp_data, out=out.data)
 
 
 class GhostBroadcaster:
@@ -919,7 +935,7 @@ def __init__(self, arg0, arg1, out=None,**kw):
         super().__init__(arg0, arg1, out=out)
         self.domain = arg1.domain
         self.tensorsig = arg1.tensorsig
-        self.dtype = np.result_type(type(arg0), arg1.dtype)
+        self.dtype = np.result_type(arg0, arg1.dtype)
 
     @classmethod
     def _check_args(cls, *args, **kw):
@@ -939,11 +955,12 @@ def enforce_conditions(self):
 
     def operate(self, out):
         """Perform operation."""
+        xp = self.array_namespace
         arg0, arg1 = self.args
         # Set output layout
         out.preset_layout(arg1.layout)
         # Multiply argument data
-        np.multiply(arg0, arg1.data, out=out.data)
+        xp.multiply(arg0, arg1.data, out=out.data)
 
     def matrix_dependence(self, *vars):
         return self.args[1].matrix_dependence(*vars)

diff --git a/dedalus/core/basis.py b/dedalus/core/basis.py
@@ -5,6 +5,7 @@
 from functools import reduce
 import inspect
 from math import prod
+import array_api_compat
 
 from . import operators
 from ..libraries import spin_recombination
@@ -14,7 +15,7 @@
 from ..tools import clenshaw
 from ..tools.array import reshape_vector, axindex, axslice, interleave_matrices
 from ..tools.dispatch import MultiClass, SkipDispatchException
-from ..tools.general import unify, DeferredTuple
+from ..tools.general import unify, DeferredTuple, is_real_dtype, is_complex_dtype
 from .coords import Coordinate, CartesianCoordinates, S2Coordinates, SphericalCoordinates, PolarCoordinates, AzimuthalCoordinate, DirectProduct
 from .domain import Domain
 from .field  import Operand, LockedField
@@ -595,8 +596,10 @@ class Jacobi(IntervalBasis, metaclass=CachedClass):
     group_shape = (1,)
     native_bounds = (-1, 1)
     transforms = {}
-    default_dct = "fftw_dct"
-    default_library = "matrix"
+    default_cpu_library = "matrix"
+    default_gpu_library = "matrix"
+    default_cpu_dct = "fftw"
+    default_gpu_dct = "matrix"
 
     @classmethod
     def _preprocess_cache_args(cls, coord, size, bounds, a, b, a0, b0, dealias, library):
@@ -631,12 +634,6 @@ def _preprocess_cache_args(cls, coord, size, bounds, a, b, a0, b0, dealias, libr
             dealias = tuple(dealias)
         if len(dealias) != 1:
             raise ValueError("Jacobi dealias must have length 1.")
-        # library: pick default based on (a0, b0)
-        if library is None:
-            if a0 == b0 == -1/2:
-                library = cls.default_dct
-            else:
-                library = cls.default_library
         return (coord, size, bounds, a, b, a0, b0, dealias, library)
 
     def __init__(self, coord, size, bounds, a, b, a0=None, b0=None, dealias=(1,), library=None):
@@ -660,10 +657,30 @@ def _native_grid(self, scale):
         N, = self.grid_shape((scale,))
         return jacobi.build_grid(N, a=self.a0, b=self.b0)
 
+    def get_library(self, dist):
+        """Get library for transforms."""
+        if self.library is None:
+            if self.a0 == self.b0 == -1/2:
+                if dist.is_cupy_namespace:
+                    return self.default_gpu_dct
+                else:
+                    return self.default_cpu_dct
+            else:
+                if dist.is_cupy_namespace:
+                    return self.default_gpu_library
+                else:
+                    return self.default_cpu_library
+        else:
+            return self.library
+
     @CachedMethod
     def transform_plan(self, dist, grid_size):
         """Build transform plan."""
-        return self.transforms[self.library](grid_size, self.size, self.a, self.b, self.a0, self.b0)
+        # Shortcut trivial transforms
+        if grid_size == 1 or self.size == 1:
+            return self.transforms["matrix"](grid_size, self.size, self.a, self.b, self.a0, self.b0, dist.array_namespace, dist.dtype)
+        else:
+            return self.transforms[self.get_library(dist)](grid_size, self.size, self.a, self.b, self.a0, self.b0, dist.array_namespace, dist.dtype)
 
     # def weights(self, scales):
     #     """Gauss-Jacobi weights."""
@@ -975,7 +992,8 @@ class FourierBase(IntervalBasis):
     """Base class for RealFourier and ComplexFourier."""
 
     native_bounds = (0, 2*np.pi)
-    default_library = "fftw"
+    default_gpu_library = "cupy"
+    default_cpu_library = "fftw"
 
     @classmethod
     def _preprocess_cache_args(cls, coord, size, bounds, dealias, library):
@@ -998,9 +1016,6 @@ def _preprocess_cache_args(cls, coord, size, bounds, dealias, library):
             dealias = tuple(dealias)
         if len(dealias) != 1:
             raise ValueError("Fourier dealias must have length 1.")
-        # library: pick default based on (a0, b0)
-        if library is None:
-            library = cls.default_library
         return (coord, size, bounds, dealias, library)
 
     def __init__(self, coord, size, bounds, dealias=(1,), library=None):
@@ -1069,14 +1084,24 @@ def _native_grid(self, scale):
         N, = self.grid_shape((scale,))
         return (2 * np.pi / N) * np.arange(N)
 
+    def get_library(self, dist):
+        """Get library for transforms."""
+        if self.library is None:
+            if dist.is_cupy_namespace:
+                return self.default_gpu_library
+            else:
+                return self.default_cpu_library
+        else:
+            return self.library
+
     @CachedMethod
     def transform_plan(self, dist, grid_size):
         """Build transform plan."""
         # Shortcut trivial transforms
         if grid_size == 1 or self.size == 1:
-            return self.transforms['matrix'](grid_size, self.size)
+            return self.transforms["matrix"](grid_size, self.size, dist.array_namespace, dist.dtype)
         else:
-            return self.transforms[self.library](grid_size, self.size)
+            return self.transforms[self.get_library(dist)](grid_size, self.size, dist.array_namespace, dist.dtype)
 
     def forward_transform(self, field, axis, gdata, cdata):
         # Transform
@@ -1097,9 +1122,9 @@ def Fourier(*args, dtype=None, **kw):
     """Factory function dispatching to RealFourier and ComplexFourier based on provided dtype."""
     if dtype is None:
         raise ValueError("dtype must be specified")
-    elif dtype == np.float64:
+    elif is_real_dtype(dtype):
         return RealFourier(*args, **kw)
-    elif dtype == np.complex128:
+    elif is_complex_dtype(dtype):
         return ComplexFourier(*args, **kw)
     else:
         raise ValueError(f"Unrecognized dtype: {dtype}")
@@ -2204,15 +2229,6 @@ def _preprocess_cache_args(cls, coordsys, shape, dtype, radii, k, alpha, dealias
             dealias = tuple(dealias)
         if len(dealias) != 2:
             raise ValueError("Annulus dealias must have length 2.")
-        # azimuth_library: pick default
-        if azimuth_library is None:
-            azimuth_library = RealFourier.default_library
-        # radius_library: pick default based on alpha
-        if radius_library is None:
-            if alpha[0] == alpha[1] == -1/2:
-                radius_library = Jacobi.default_dct
-            else:
-                radius_library = Jacobi.default_library
         return (coordsys, shape, dtype, radii, k, alpha, dealias, azimuth_library, radius_library)
 
     def __init__(self, coordsys, shape, dtype, radii=(1,2), k=0, alpha=(-0.5,-0.5), dealias=(1,1), azimuth_library=None, radius_library=None):
@@ -6238,6 +6254,7 @@ class CartesianAdvectiveCFL(operators.AdvectiveCFL):
 
     @CachedMethod
     def cfl_spacing(self):
+        xp = self.array_namespace
         velocity = self.operand
         coordsys = velocity.tensorsig[0]
         spacing = []
@@ -6260,7 +6277,7 @@ def cfl_spacing(self):
                 axis_spacing[:] = dealias * native_spacing * basis.COV.stretch
             elif basis is None:
                 axis_spacing = np.inf
-            spacing.append(axis_spacing)
+            spacing.append(xp.asarray(axis_spacing))
         return spacing
 
     def compute_cfl_frequency(self, velocity, out):

diff --git a/dedalus/core/distributor.py b/dedalus/core/distributor.py
@@ -10,6 +10,8 @@
 from math import prod
 import numbers
 from weakref import WeakSet
+import array_api_compat
+import warnings
 
 from .coords import CoordinateSystem, DirectProduct
 from ..tools.array import reshape_vector
@@ -39,12 +41,16 @@ class Distributor:
 
     Parameters
     ----------
-    dim : int
-        Dimension
+    coordsystems : CoordinateSystem or tuple of CoordinateSystems
+        Problem coordinate systems
     comm : MPI communicator, optional
         MPI communicator (default: comm world)
     mesh : tuple of ints, optional
         Process mesh for parallelization (default: 1-D mesh of available processes)
+    dtype : data type, optional
+        Default data type for fields (default: None)
+    array_namespace : array namespace or string, optional
+        Array namespace for field data (e.g. numpy or cupy, default: numpy)
 
     Attributes
     ----------
@@ -74,7 +80,7 @@ class Distributor:
     states) and the paths between them (D transforms and R transposes).
     """
 
-    def __init__(self, coordsystems, comm=None, mesh=None, dtype=None):
+    def __init__(self, coordsystems, comm=None, mesh=None, dtype=None, array_namespace=np):
         # Accept single coordsys in place of tuple/list
         if not isinstance(coordsystems, (tuple, list)):
             coordsystems = (coordsystems,)
@@ -115,6 +121,16 @@ def __init__(self, coordsystems, comm=None, mesh=None, dtype=None):
         self._build_layouts()
         # Keep set of weak field references
         self.fields = WeakSet()
+        # Array module
+        if isinstance(array_namespace, str):
+            self.array_namespace = getattr(array_api_compat, array_namespace)
+        else:
+            self.array_namespace = array_api_compat.array_namespace(array_namespace.zeros(0))
+        self.is_numpy_namespace = array_api_compat.is_numpy_namespace(self.array_namespace)
+        self.is_cupy_namespace = array_api_compat.is_cupy_namespace(self.array_namespace)
+        # Warnings for non-Cartesian problems
+        if self.is_cupy_namespace and any(cs.curvilinear for cs in self.coordsystems):
+            warnings.warn("Non-Cartesian coordinate systems not yet supported on GPU.")
 
     @CachedAttribute
     def cs_by_axis(self):
@@ -255,11 +271,12 @@ def IdentityTensor(self, coordsys_in, coordsys_out=None, bases=None, dtype=None)
         return I
 
     def local_grid(self, basis, scale=None):
+        xp = self.array_namespace
         # TODO: remove from bases and do it all here?
         if scale is None:
             scale = 1
         if basis.dim == 1:
-            return basis.local_grid(self, scale=scale)
+            return xp.asarray(basis.local_grid(self, scale=scale))
         else:
             raise ValueError("Use `local_grids` for multidimensional bases.")
 
@@ -292,16 +309,18 @@ def local_grid(self, basis, scale=None):
     #     return tuple(grids)
 
     def local_grids(self, *bases, scales=None):
+        xp = self.array_namespace
         scales = self.remedy_scales(scales)
         grids = []
         for basis in bases:
             basis_scales = scales[self.first_axis(basis):self.last_axis(basis)+1]
-            grids.extend(basis.local_grids(self, scales=basis_scales))
+            grids.extend(xp.asarray(basis.local_grids(self, scales=basis_scales)))
         return grids
 
     def local_modes(self, basis):
         # TODO: remove from bases and do it all here?
-        return basis.local_modes(self)
+        xp = self.array_namespace
+        return xp.asarray(basis.local_modes(self))
 
     @CachedAttribute
     def default_nonconst_groups(self):