diff --git a/CHANGES.rst b/CHANGES.rst index b63c28b8f..a9c20226e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,8 @@ New Features - Added support for using the ``TimeInterpolatedPotential`` with the ``MN3ExponentialDiskPotential`` class. +- Improve performance of repeated gradient computations with transformed components by + allocating persistent scratch space. Bug fixes --------- diff --git a/src/gala/potential/potential/src/cpotential.cpp b/src/gala/potential/potential/src/cpotential.cpp index d942f5f9e..29dbe5717 100644 --- a/src/gala/potential/potential/src/cpotential.cpp +++ b/src/gala/potential/potential/src/cpotential.cpp @@ -1,5 +1,7 @@ #include #include +#include +#include #include "cpotential.h" #include "src/vectorization.h" @@ -215,8 +217,23 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) { // qp: shape [p->n_dim, N] // grad: shape [p->n_dim, N] - double *qp_trans = NULL; - double *tmp_grad = NULL; + // Per-thread scratch for the do_shift_rotate path. + // This is to avoid repeated allocations of temporary arrays when repeating gradient calls. + // Note that the scratch memory is never deallocated! + struct GradScratch { + std::vector qp_trans; + std::vector tmp_grad; + void ensure(size_t n) { + if (qp_trans.size() < n) { + qp_trans.resize(n); + tmp_grad.resize(n); + } + } + }; + static thread_local GradScratch scratch; + + double *qp_trans = nullptr; + double *tmp_grad = nullptr; bool need_transform = false; // Check if any components need transformation @@ -227,10 +244,10 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) { } } - // Allocate temporary arrays if transformation is needed if (need_transform) { - qp_trans = (double*)malloc(p->n_dim * N * sizeof(double)); - tmp_grad = (double*)malloc(p->n_dim * N * sizeof(double)); + scratch.ensure(p->n_dim * N); + qp_trans = scratch.qp_trans.data(); + tmp_grad = scratch.tmp_grad.data(); } // Initialize gradient array @@ -243,11 +260,8 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) { if (p->do_shift_rotate[i] == 0) { (p->gradient)[i](t, (p->parameters)[i], qp, p->n_dim, N, grad, (p->state)[i]); } else { - // Initialize temporary arrays - for (size_t j = 0; j < p->n_dim * N; j++) { - qp_trans[j] = 0.; - tmp_grad[j] = 0.; - } + std::memset(qp_trans, 0, p->n_dim * N * sizeof(double)); + std::memset(tmp_grad, 0, p->n_dim * N * sizeof(double)); // Apply shift and rotation to all particles apply_shift_rotate_N( @@ -278,12 +292,6 @@ void c_gradient(CPotential *p, size_t N, double t, double *qp, double *grad) { } } } - - // Free temporary arrays - if (need_transform) { - free(qp_trans); - free(tmp_grad); - } }