1515#include " node_realm-inl.h"
1616#include " node_shadow_realm.h"
1717#include " node_snapshot_builder.h"
18+ #include " node_v8.h"
1819#include " node_v8_platform-inl.h"
20+ #include " v8-profiler.h"
1921#include " node_wasm_web_api.h"
2022#include " uv.h"
2123#ifdef NODE_ENABLE_VTUNE_PROFILING
@@ -117,6 +119,10 @@ void* NodeArrayBufferAllocator::Allocate(size_t size) {
117119 ret = allocator_->Allocate (size);
118120 if (ret != nullptr ) [[likely]] {
119121 total_mem_usage_.fetch_add (size, std::memory_order_relaxed);
122+ auto * pa = profiling_allocator_.load (std::memory_order_acquire);
123+ if (pa != nullptr ) [[unlikely]] {
124+ pa->TrackAllocate (ret, size);
125+ }
120126 }
121127 return ret;
122128}
@@ -126,15 +132,38 @@ void* NodeArrayBufferAllocator::AllocateUninitialized(size_t size) {
126132 void * ret = allocator_->AllocateUninitialized (size);
127133 if (ret != nullptr ) [[likely]] {
128134 total_mem_usage_.fetch_add (size, std::memory_order_relaxed);
135+ auto * pa = profiling_allocator_.load (std::memory_order_acquire);
136+ if (pa != nullptr ) [[unlikely]] {
137+ pa->TrackAllocate (ret, size);
138+ }
129139 }
130140 return ret;
131141}
132142
133143void NodeArrayBufferAllocator::Free (void * data, size_t size) {
144+ auto * pa = profiling_allocator_.load (std::memory_order_acquire);
145+ if (pa != nullptr ) [[unlikely]] {
146+ pa->TrackFree (data);
147+ }
134148 total_mem_usage_.fetch_sub (size, std::memory_order_relaxed);
135149 allocator_->Free (data, size);
136150}
137151
152+ ProfilingArrayBufferAllocator*
153+ NodeArrayBufferAllocator::CreateProfilingAllocator () {
154+ if (!owned_profiling_allocator_) {
155+ owned_profiling_allocator_ =
156+ std::make_unique<ProfilingArrayBufferAllocator>();
157+ }
158+ auto * pa = owned_profiling_allocator_.get ();
159+ profiling_allocator_.store (pa, std::memory_order_release);
160+ return pa;
161+ }
162+
163+ void NodeArrayBufferAllocator::ClearProfilingAllocator () {
164+ profiling_allocator_.store (nullptr , std::memory_order_release);
165+ }
166+
138167DebuggingArrayBufferAllocator::~DebuggingArrayBufferAllocator () {
139168 CHECK (allocations_.empty ());
140169}
@@ -191,11 +220,154 @@ void DebuggingArrayBufferAllocator::RegisterPointerInternal(void* data,
191220 allocations_[data] = size;
192221}
193222
223+ void ProfilingArrayBufferAllocator::TrackAllocate (void * data, size_t size) {
224+ // GC safety: this runs inside the ArrayBuffer allocator where V8 may
225+ // prohibit heap allocation and JS execution (e.g. BackingStore::Allocate
226+ // inside the ArrayBuffer constructor). Extract only the ALS value
227+ // from the CPED at allocation time via HeapProfiler::LookupAlsValue(),
228+ // which uses OrderedHashMap::FindEntry (zero-allocation, GC-safe).
229+ // HandleScope, GetContinuationPreservedEmbedderData, Global::Reset,
230+ // and OrderedHashMap::FindEntry use the handle-scope stack / malloc,
231+ // not the V8 heap.
232+ v8::Isolate* isolate = isolate_.load (std::memory_order_relaxed);
233+ if (std::this_thread::get_id () != main_thread_id_ ||
234+ isolate == nullptr ) {
235+ return ;
236+ }
237+ v8::HandleScope handle_scope (isolate);
238+ v8::Local<v8::Value> cped =
239+ isolate->GetContinuationPreservedEmbedderData ();
240+ if (!cped.IsEmpty () && cped->IsMap ()) {
241+ v8::HeapProfiler* profiler = isolate->GetHeapProfiler ();
242+ v8::Local<v8::Value> als_value;
243+ if (profiler->LookupAlsValue (cped).ToLocal (&als_value)) {
244+ Mutex::ScopedLock lock (mutex_);
245+ auto & entry = allocations_[data];
246+ entry.label_value .Reset (isolate, als_value);
247+ entry.size = size;
248+ }
249+ }
250+ }
251+
252+ void ProfilingArrayBufferAllocator::TrackFree (void * data) {
253+ Mutex::ScopedLock lock (mutex_);
254+ allocations_.erase (data);
255+ }
256+
257+ void ProfilingArrayBufferAllocator::Enable (v8::Isolate* isolate) {
258+ // Synchronization strategy: isolate_ serves as the "enabled" sentinel.
259+ // TrackAllocate() and GetPerLabelBytes() check isolate_ == nullptr without
260+ // holding the mutex as an early-exit guard. We set isolate_ LAST here so
261+ // that by the time any reader sees non-null isolate_, main_thread_id_ is
262+ // already valid. The corresponding Disable() sets isolate_ FIRST (to
263+ // nullptr) before clearing other fields. All accesses to isolate_ outside
264+ // the mutex happen on the main thread, so relaxed memory ordering
265+ // suffices — std::atomic prevents compiler reordering and torn reads.
266+ Mutex::ScopedLock lock (mutex_);
267+ main_thread_id_ = std::this_thread::get_id ();
268+ isolate_.store (isolate, std::memory_order_relaxed);
269+ }
270+
271+ void ProfilingArrayBufferAllocator::Disable () {
272+ // Clear isolate_ FIRST — it is the "enabled" sentinel checked by
273+ // TrackAllocate() and GetPerLabelBytes() without holding the mutex.
274+ // Setting it to nullptr before clearing other state ensures any
275+ // re-entrant call (e.g., if allocations_.clear() triggers a Global
276+ // destructor → GC → allocation → TrackAllocate on the same thread)
277+ // will see null and exit early.
278+ isolate_.store (nullptr , std::memory_order_relaxed);
279+ Mutex::ScopedLock lock (mutex_);
280+ allocations_.clear ();
281+ }
282+
283+ std::vector<ProfilingArrayBufferAllocator::LabeledBytes>
284+ ProfilingArrayBufferAllocator::GetPerLabelBytes () const {
285+ v8::Isolate* isolate = isolate_.load (std::memory_order_relaxed);
286+ if (isolate == nullptr ) {
287+ return {};
288+ }
289+
290+ v8::HandleScope handle_scope (isolate);
291+
292+ // Snapshot: convert stored Globals to Locals under the lock, then release
293+ // the lock before doing V8 API calls that may trigger GC. If GC fires
294+ // weak callbacks that call Free(), they acquire the lock independently
295+ // without deadlocking.
296+ struct SnapshotEntry {
297+ v8::Local<v8::Value> label_value;
298+ size_t size;
299+ };
300+ std::vector<SnapshotEntry> snapshot;
301+ {
302+ Mutex::ScopedLock lock (mutex_);
303+ snapshot.reserve (allocations_.size ());
304+ for (const auto & [ptr, entry] : allocations_) {
305+ if (!entry.label_value .IsEmpty ()) {
306+ snapshot.push_back ({entry.label_value .Get (isolate), entry.size });
307+ }
308+ }
309+ }
310+
311+ // Resolve labels outside the lock. Array::Get() and String::Utf8Value
312+ // may allocate on the V8 heap — safe here because GetPerLabelBytes is
313+ // called from GetAllocationProfile (a JS callback) where GC is allowed.
314+ // The stored values are ALS values (flat [key, val, ...] arrays) extracted
315+ // at allocation time — no Map lookup needed.
316+ v8::Local<v8::Context> v8_context = isolate->GetCurrentContext ();
317+ std::unordered_map<std::string, LabeledBytes> aggregated;
318+ for (const auto & snap : snapshot) {
319+ if (!snap.label_value ->IsArray ()) continue ;
320+ v8::Local<v8::Array> flat = snap.label_value .As <v8::Array>();
321+ uint32_t len = flat->Length ();
322+ LabelPairs labels;
323+ for (uint32_t j = 0 ; j + 1 < len; j += 2 ) {
324+ v8::Local<v8::Value> k, v;
325+ if (!flat->Get (v8_context, j).ToLocal (&k)) continue ;
326+ if (!flat->Get (v8_context, j + 1 ).ToLocal (&v)) continue ;
327+ v8::String::Utf8Value key_str (isolate, k);
328+ v8::String::Utf8Value val_str (isolate, v);
329+ if (*key_str == nullptr || *val_str == nullptr ) continue ;
330+ labels.emplace_back (*key_str, *val_str);
331+ }
332+ if (labels.empty ()) continue ;
333+ std::string key = SerializeLabels (labels);
334+ auto & agg = aggregated[key];
335+ if (agg.labels .empty ()) agg.labels = std::move (labels);
336+ agg.bytes += static_cast <int64_t >(snap.size );
337+ }
338+
339+ std::vector<LabeledBytes> result;
340+ result.reserve (aggregated.size ());
341+ for (auto & [key, entry] : aggregated) {
342+ if (entry.bytes > 0 ) {
343+ result.push_back (std::move (entry));
344+ }
345+ }
346+ return result;
347+ }
348+
349+ std::string ProfilingArrayBufferAllocator::SerializeLabels (
350+ const LabelPairs& labels) {
351+ std::string key;
352+ for (const auto & [k, v] : labels) {
353+ if (!key.empty ()) key += ' \0 ' ;
354+ key += k;
355+ key += ' \0 ' ;
356+ key += v;
357+ }
358+ return key;
359+ }
360+
194361std::unique_ptr<ArrayBufferAllocator> ArrayBufferAllocator::Create (bool debug) {
195362 if (debug || per_process::cli_options->debug_arraybuffer_allocations )
196363 return std::make_unique<DebuggingArrayBufferAllocator>();
197- else
198- return std::make_unique<NodeArrayBufferAllocator>();
364+ // Use the plain NodeArrayBufferAllocator by default. When heap profiling
365+ // with labels is started (v8.startSamplingHeapProfiler), a
366+ // ProfilingArrayBufferAllocator tracker is created on demand and installed
367+ // as a delegate — see NodeArrayBufferAllocator::CreateProfilingAllocator().
368+ // This ensures ZERO per-allocation overhead when profiling is not active:
369+ // no atomic load, no virtual dispatch change, just a predicted null-branch.
370+ return std::make_unique<NodeArrayBufferAllocator>();
199371}
200372
201373ArrayBufferAllocator* CreateArrayBufferAllocator () {
0 commit comments