From cb46d47c1cccdaf503fc46b1d43fafb35f1ab72f Mon Sep 17 00:00:00 2001
From: Lehman Garrison <lgarrison@flatironinstitute.org>
Date: Mon, 11 May 2026 14:50:40 -0400
Subject: [PATCH] potential: allocate persistent scratch space for gradients

Avoid repeated malloc/free calls when calculating gradients with
transformed components. Uses thread-local scratch space that is allocated
on first use and cached for reuse later.

Note that this space is never freed! If working with large orbit arrays,
this may result in higher than expected memory usage.
---
 CHANGES.rst                                   |  2 +
 .../potential/potential/src/cpotential.cpp    | 40 +++++++++++--------
 2 files changed, 26 insertions(+), 16 deletions(-)
diff --git a/CHANGES.rst b/CHANGES.rst
index b63c28b8f..a9c20226e 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -6,6 +6,8 @@ New Features
 
 - Added support for using the ``TimeInterpolatedPotential`` with the
   ``MN3ExponentialDiskPotential`` class.
+- Improve performance of repeated gradient computations with transformed components by
+  allocating persistent scratch space.
 
 Bug fixes
 ---------
diff --git a/src/gala/potential/potential/src/cpotential.cpp b/src/gala/potential/potential/src/cpotential.cpp
index d942f5f9e..29dbe5717 100644
--- a/src/gala/potential/potential/src/cpotential.cpp
+++ b/src/gala/potential/potential/src/cpotential.cpp
@@ -1,5 +1,7 @@
 #include <math.h>
 #include <stdlib.h>
+#include <cstring>
+#include <vector>
 #include "cpotential.h"
 #include "src/vectorization.h"
 
@@ -215,8 +217,23 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) {
     // qp: shape [p->n_dim, N]
     // grad: shape [p->n_dim, N]
 
-    double *qp_trans = NULL;
-    double *tmp_grad = NULL;
+    // Per-thread scratch for the do_shift_rotate path.
+    // This is to avoid repeated allocations of temporary arrays when repeating gradient calls.
+    // Note that the scratch memory is never deallocated!
+    struct GradScratch {
+        std::vector<double> qp_trans;
+        std::vector<double> tmp_grad;
+        void ensure(size_t n) {
+            if (qp_trans.size() < n) {
+                qp_trans.resize(n);
+                tmp_grad.resize(n);
+            }
+        }
+    };
+    static thread_local GradScratch scratch;
+
+    double *qp_trans = nullptr;
+    double *tmp_grad = nullptr;
     bool need_transform = false;
 
     // Check if any components need transformation
@@ -227,10 +244,10 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) {
         }
     }
 
-    // Allocate temporary arrays if transformation is needed
     if (need_transform) {
-        qp_trans = (double*)malloc(p->n_dim * N * sizeof(double));
-        tmp_grad = (double*)malloc(p->n_dim * N * sizeof(double));
+        scratch.ensure(p->n_dim * N);
+        qp_trans = scratch.qp_trans.data();
+        tmp_grad = scratch.tmp_grad.data();
     }
 
     // Initialize gradient array
@@ -243,11 +260,8 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) {
         if (p->do_shift_rotate[i] == 0) {
             (p->gradient)[i](t, (p->parameters)[i], qp, p->n_dim, N, grad, (p->state)[i]);
         } else {
-            // Initialize temporary arrays
-            for (size_t j = 0; j < p->n_dim * N; j++) {
-                qp_trans[j] = 0.;
-                tmp_grad[j] = 0.;
-            }
+            std::memset(qp_trans, 0, p->n_dim * N * sizeof(double));
+            std::memset(tmp_grad, 0, p->n_dim * N * sizeof(double));
 
             // Apply shift and rotation to all particles
             apply_shift_rotate_N(
@@ -278,12 +292,6 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) {
             }
         }
     }
-
-    // Free temporary arrays
-    if (need_transform) {
-        free(qp_trans);
-        free(tmp_grad);
-    }
 }