From cb46d47c1cccdaf503fc46b1d43fafb35f1ab72f Mon Sep 17 00:00:00 2001 From: Lehman Garrison Date: Mon, 11 May 2026 14:50:40 -0400 Subject: [PATCH] potential: allocate persistent scratch space for gradients Avoid repeated malloc/free calls when calculating gradients with transformed components. Uses thread-local scratch space that is allocated on first use and cached for reuse later. Note that this space is never freed! If working with large orbit arrays, this may result in higher than expected memory usage. --- CHANGES.rst | 2 + .../potential/potential/src/cpotential.cpp | 40 +++++++++++-------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b63c28b8f..a9c20226e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,8 @@ New Features - Added support for using the ``TimeInterpolatedPotential`` with the ``MN3ExponentialDiskPotential`` class. +- Improve performance of repeated gradient computations with transformed components by + allocating persistent scratch space. Bug fixes --------- diff --git a/src/gala/potential/potential/src/cpotential.cpp b/src/gala/potential/potential/src/cpotential.cpp index d942f5f9e..29dbe5717 100644 --- a/src/gala/potential/potential/src/cpotential.cpp +++ b/src/gala/potential/potential/src/cpotential.cpp @@ -1,5 +1,7 @@ #include #include +#include +#include #include "cpotential.h" #include "src/vectorization.h" @@ -215,8 +217,23 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) { // qp: shape [p->n_dim, N] // grad: shape [p->n_dim, N] - double *qp_trans = NULL; - double *tmp_grad = NULL; + // Per-thread scratch for the do_shift_rotate path. + // This is to avoid repeated allocations of temporary arrays when repeating gradient calls. + // Note that the scratch memory is never deallocated! + struct GradScratch { + std::vector qp_trans; + std::vector tmp_grad; + void ensure(size_t n) { + if (qp_trans.size() < n) { + qp_trans.resize(n); + tmp_grad.resize(n); + } + } + }; + static thread_local GradScratch scratch; + + double *qp_trans = nullptr; + double *tmp_grad = nullptr; bool need_transform = false; // Check if any components need transformation @@ -227,10 +244,10 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) { } } - // Allocate temporary arrays if transformation is needed if (need_transform) { - qp_trans = (double*)malloc(p->n_dim * N * sizeof(double)); - tmp_grad = (double*)malloc(p->n_dim * N * sizeof(double)); + scratch.ensure(p->n_dim * N); + qp_trans = scratch.qp_trans.data(); + tmp_grad = scratch.tmp_grad.data(); } // Initialize gradient array @@ -243,11 +260,8 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) { if (p->do_shift_rotate[i] == 0) { (p->gradient)[i](t, (p->parameters)[i], qp, p->n_dim, N, grad, (p->state)[i]); } else { - // Initialize temporary arrays - for (size_t j = 0; j < p->n_dim * N; j++) { - qp_trans[j] = 0.; - tmp_grad[j] = 0.; - } + std::memset(qp_trans, 0, p->n_dim * N * sizeof(double)); + std::memset(tmp_grad, 0, p->n_dim * N * sizeof(double)); // Apply shift and rotation to all particles apply_shift_rotate_N( @@ -278,12 +292,6 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) { } } } - - // Free temporary arrays - if (need_transform) { - free(qp_trans); - free(tmp_grad); - } }