Skip to content

Commit ac212aa

Browse files
committed
WIP: NAPI shutdown lifecycle fixes for QuickJS
These are partial fixes toward making JS_FreeRuntime's assert(list_empty(&rt->gc_obj_list)) hold without patching quickjs. 136 tests still pass; the assert still fires due to architectural issues (NapiCallback/NapiWrap/NapiExternal classes have gc_mark = nullptr so QuickJS cycle GC cannot trace through them). Real bugs fixed in this commit: * Detach iteration use-after-free: Iterating env->refs while JS_FreeValue triggers finalizers (which call napi_delete_reference and delete RefInfo entries) was UB. Snapshot env->refs first; clear before iterating. * napi_delete_reference / napi_reference_unref dropped JSValues only JS_FreeContext, pinning JSValues into JS_FreeRuntime and contributing to the gc_obj_list assertion. Always free strong holds; only the env->refs bookkeeping is shutdown-skipped. * Other NAPI fixes from the prior cycle: weak-ref semantics in napi_create_reference, ExternalCallback newTarget self-ref leak removal, callbackData double-ownership in create_function_internal, RefInfo tracking for napi_create_promise, FreeEnv lifecycle. Co-authored-by: Copilot <[email protected]>
1 parent fe26c61 commit ac212aa

5 files changed

Lines changed: 200 additions & 39 deletions

File tree

Core/AppRuntime/Source/AppRuntime_QuickJS.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,8 @@ namespace Babylon
3131
}
3232

3333
// Use the context within a scope.
34+
Napi::Env env = Napi::Attach(context);
3435
{
35-
Napi::Env env = Napi::Attach(context);
36-
3736
// Install microtask processing as a post-tick callback
3837
SetPostTickCallback([runtime]() {
3938
JSContext* pending_ctx;
@@ -48,8 +47,12 @@ namespace Babylon
4847
Napi::Detach(env);
4948
}
5049

51-
// Destroy the context and runtime.
50+
// Destroy the context and runtime. QuickJS finalizers fired during
51+
// these calls (e.g. ObjectWrap deleters) may invoke napi_delete_reference,
52+
// so the napi_env__ structure must outlive both. Detach() above already
53+
// released all JS-side resources; FreeEnv() below frees the env itself.
5254
JS_FreeContext(context);
5355
JS_FreeRuntime(runtime);
56+
Napi::FreeEnv(env);
5457
}
5558
}

Core/Node-API/Include/Engine/QuickJS/napi/env.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,17 @@ namespace Napi
77
{
88
Napi::Env Attach(JSContext* context);
99

10+
// Releases JS resources held by the env (handle scope, persistent refs, etc.)
11+
// and marks it as shutting down so subsequent finalizer-driven calls into
12+
// NAPI become no-ops. The env pointer remains valid (do not call NAPI APIs
13+
// that touch JS state on it) until FreeEnv() is invoked.
1014
void Detach(Napi::Env);
1115

16+
// Frees the napi_env structure itself. Must be called AFTER JS_FreeRuntime
17+
// because QuickJS finalizers (e.g. ObjectWrap deleters) may still call
18+
// napi_delete_reference / external callbacks which dereference the env.
19+
void FreeEnv(Napi::Env);
20+
1221
Napi::Value Eval(Napi::Env env, const char* source, const char* sourceUrl);
1322

1423
JSContext* GetContext(Napi::Env);

Core/Node-API/Source/env_quickjs.cc

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,21 +47,98 @@ namespace Napi
4747
void Detach(Env env)
4848
{
4949
napi_env env_ptr{env};
50-
if (env_ptr)
50+
if (!env_ptr)
51+
{
52+
return;
53+
}
54+
55+
// Drain any pending microtasks/jobs (e.g. unresolved Promise reactions)
56+
// BEFORE we mark the env as shutting down. Pending jobs hold closures
57+
// and any captured values; left in the runtime job queue they would
58+
// pin objects past JS_FreeRuntime and fail the gc_obj_list assertion.
59+
// Executing them now may run native callbacks that legitimately create
60+
// and free napi refs, so this must happen while NAPI is still live.
61+
JSRuntime* rt = JS_GetRuntime(env_ptr->context);
62+
JSContext* pending_ctx;
63+
while (JS_IsJobPending(rt))
5164
{
52-
if (!JS_IsUndefined(env_ptr->has_own_property_function))
65+
int ret = JS_ExecutePendingJob(rt, &pending_ctx);
66+
if (ret <= 0) {
67+
break;
68+
}
69+
}
70+
71+
// Mark the env as shutting down. After this:
72+
// - napi_create_reference becomes a no-op-ish path (no JS handle).
73+
// - napi_delete_reference will not access the JS context.
74+
// Finalizers triggered later by JS_FreeContext / JS_FreeRuntime can
75+
// still legitimately call napi_delete_reference; we make those calls
76+
// safe instead of UAFing the (already-deleted) env.
77+
env_ptr->is_shutting_down = true;
78+
79+
if (!JS_IsUndefined(env_ptr->has_own_property_function))
80+
{
81+
JS_FreeValue(env_ptr->context, env_ptr->has_own_property_function);
82+
env_ptr->has_own_property_function = JS_UNDEFINED;
83+
}
84+
85+
// Release all strong JS references that NAPI is currently holding via
86+
// napi_create_reference. Without this, the QuickJS GC invoked from
87+
// JS_FreeRuntime cannot collect wrapped objects that are pinned by
88+
// napi_ref instances, and assert(list_empty(&rt->gc_obj_list)) fires.
89+
//
90+
// We do NOT delete the RefInfo objects here: they will be freed when
91+
// their owning C++ destructors run napi_delete_reference (typically
92+
// from ObjectWrap finalizers during JS_FreeRuntime). We just neutralize
93+
// them so that those later calls don't double-free the JS value or
94+
// touch a freed env.
95+
// IMPORTANT: snapshot env->refs into a local container before iterating.
96+
// Each JS_FreeValue below can trigger QuickJS finalizers (e.g.
97+
// ObjectWrap finalize → C++ destructor → ~Reference<T> →
98+
// napi_delete_reference). With env->is_shutting_down already set, the
99+
// shutdown-aware napi_delete_reference does NOT erase from env->refs
100+
// before delete'ing the RefInfo, so iterating env->refs directly would
101+
// deref a freed pointer (use-after-free). Snapshot + iterate over the
102+
// snapshot avoids that, and we then clear env->refs once at the end.
103+
std::vector<RefInfo*> snapshot(env_ptr->refs.begin(), env_ptr->refs.end());
104+
env_ptr->refs.clear();
105+
for (RefInfo* info : snapshot)
106+
{
107+
if (info->count > 0)
53108
{
54-
JS_FreeValue(env_ptr->context, env_ptr->has_own_property_function);
55-
env_ptr->has_own_property_function = JS_UNDEFINED;
109+
JSValue v = info->value;
110+
info->value = JS_UNDEFINED;
111+
info->count = 0;
112+
JS_FreeValue(env_ptr->context, v);
56113
}
114+
}
57115

58-
// Free all remaining JSValues in the handle scope stack
59-
for (auto& ptr : env_ptr->handle_scope_stack)
116+
// Free all remaining JSValues in the handle scope stack. Use a swap
117+
// loop because finalizers triggered by JS_FreeValue can themselves
118+
// push new entries onto handle_scope_stack (e.g. via FromJSValue),
119+
// which would invalidate iterators of a range-for over the original
120+
// vector and leave entries unfreed.
121+
while (!env_ptr->handle_scope_stack.empty())
122+
{
123+
std::vector<std::unique_ptr<JSValue>> stack;
124+
stack.swap(env_ptr->handle_scope_stack);
125+
for (auto& ptr : stack)
60126
{
61127
JS_FreeValue(env_ptr->context, *ptr);
62128
}
63-
env_ptr->handle_scope_stack.clear();
129+
}
130+
131+
// Run a final GC to collect anything we just released that participates
132+
// in cycles. Without this, cycle-members released above may persist and
133+
// pin further objects, eventually failing the assertion in JS_FreeRuntime.
134+
JS_RunGC(JS_GetRuntime(env_ptr->context));
135+
}
64136

137+
void FreeEnv(Env env)
138+
{
139+
napi_env env_ptr{env};
140+
if (env_ptr)
141+
{
65142
delete env_ptr;
66143
}
67144
}

Core/Node-API/Source/js_native_api_quickjs.cc

Lines changed: 81 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ struct CallbackInfo {
7171
class ExternalCallback {
7272
public:
7373
ExternalCallback(napi_env env, napi_callback cb, void* data)
74-
: _env(env), _cb(cb), _data(data), newTarget(JS_UNDEFINED) {}
74+
: _env(env), _cb(cb), _data(data) {}
7575

7676
static JSValue Callback(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int magic, JSValue *func_data) {
7777
ExternalCallback* externalCallback = reinterpret_cast<ExternalCallback*>(JS_GetOpaque(func_data[0], js_callback_class_id));
@@ -94,19 +94,22 @@ static JSValue Callback(JSContext *ctx, JSValueConst this_val, int argc, JSValue
9494

9595
JSValue actualThis = JS_UNDEFINED;
9696
bool isConstructCall = (magic == 1);
97-
98-
// Handle constructor call
97+
98+
// For JS_CLASS_C_FUNCTION_DATA constructor calls, QuickJS passes new_target
99+
// as this_val. Use it directly instead of storing the function as a raw
100+
// JSValue on ExternalCallback — doing so created a self-referencing C-level
101+
// ref that QuickJS GC could not see, leaking the function permanently.
99102
if (isConstructCall) {
100-
JSValue prototypeProperty = JS_GetPropertyStr(ctx, externalCallback->newTarget, "prototype");
101-
103+
JSValue prototypeProperty = JS_GetPropertyStr(ctx, this_val, "prototype");
104+
102105
if (JS_IsException(prototypeProperty)) {
103106
externalCallback->_env->current_context = savedCtx; // RESTORE
104107
return JS_EXCEPTION;
105108
}
106-
109+
107110
actualThis = JS_NewObjectProto(ctx, prototypeProperty);
108111
JS_FreeValue(ctx, prototypeProperty);
109-
112+
110113
if (JS_IsException(actualThis)) {
111114
externalCallback->_env->current_context = savedCtx; // RESTORE
112115
return JS_EXCEPTION;
@@ -117,7 +120,12 @@ static JSValue Callback(JSContext *ctx, JSValueConst this_val, int argc, JSValue
117120

118121
CallbackInfo cbInfo;
119122
cbInfo.thisArg = reinterpret_cast<napi_value>(&actualThis);
120-
cbInfo.newTarget = reinterpret_cast<napi_value>(&externalCallback->newTarget);
123+
// For constructor calls, this_val IS new_target (the constructor function
124+
// itself). Cast away const for the napi_value contract; the callee must not
125+
// mutate it during the call.
126+
cbInfo.newTarget = isConstructCall
127+
? reinterpret_cast<napi_value>(const_cast<JSValue*>(&this_val))
128+
: nullptr;
121129
cbInfo.isConstructCall = isConstructCall;
122130
cbInfo.argc = argc;
123131
cbInfo.argv = args.empty() ? nullptr : args.data();
@@ -176,25 +184,16 @@ static JSValue Callback(JSContext *ctx, JSValueConst this_val, int argc, JSValue
176184
void* opaque = JS_GetOpaque(val, js_callback_class_id);
177185
ExternalCallback* externalCallback = reinterpret_cast<ExternalCallback*>(opaque);
178186
if (externalCallback != nullptr) {
179-
JS_FreeValueRT(rt, externalCallback->newTarget);
180187
delete externalCallback;
181188
}
182189
}
183190

184-
JSValue newTarget;
185-
186191
private:
187192
napi_env _env;
188193
napi_callback _cb;
189194
void* _data;
190195
};
191196

192-
// Reference info for preventing GC
193-
struct RefInfo {
194-
JSValue value;
195-
uint32_t count;
196-
};
197-
198197
// Initialize class IDs (allocate once globally, register per runtime)
199198
void InitClassIds(JSRuntime* rt) {
200199
if (!class_ids_allocated) {
@@ -942,9 +941,11 @@ static napi_status create_function_internal(napi_env env, const char* utf8name,
942941
JS_FreeAtom(env->context, nameAtom);
943942
}
944943

945-
externalCallback->newTarget = JS_DupValue(env->context, func);
946-
947944
*result = FromJSValue(env, func);
945+
// Release our reference on the opaque callback object: the cfunction
946+
// retains it via func_data (which dups), so it stays alive as long as the
947+
// cfunction does. Without this the opaque object leaks (ref_count stuck at 2).
948+
JS_FreeValue(env->context, callbackData);
948949
napi_clear_last_error(env);
949950
return napi_ok;
950951
}
@@ -1682,8 +1683,29 @@ napi_status napi_create_reference(napi_env env, napi_value value, uint32_t initi
16821683
CHECK_ARG(env, result);
16831684

16841685
JSValue jsValue = ToJSValue(value);
1685-
RefInfo* info = new RefInfo{ JS_DupValue(env->context, jsValue), initial_refcount };
1686-
1686+
1687+
// initial_refcount == 0 means a WEAK reference per NAPI semantics. Holding
1688+
// a duplicated JSValue in that case would create an invisible-to-GC C-level
1689+
// root and prevent QuickJS from ever collecting wrapped objects (which is
1690+
// the root cause of the assert(list_empty(&rt->gc_obj_list)) failure in
1691+
// JS_FreeRuntime, since napi_wrap calls napi_create_reference with
1692+
// initial_refcount == 0).
1693+
RefInfo* info;
1694+
if (initial_refcount > 0) {
1695+
info = new RefInfo{ JS_DupValue(env->context, jsValue), initial_refcount };
1696+
} else {
1697+
// Weak: don't dup. The stored JSValue is non-owning and must not be
1698+
// dereferenced once count is back to 0 unless it has been re-strengthened.
1699+
info = new RefInfo{ jsValue, 0 };
1700+
}
1701+
1702+
// Track so Detach() can release any strong holds before JS_FreeRuntime runs
1703+
// its terminal GC. Skip tracking once shutting down — env state is being
1704+
// torn down and we must not mutate it from finalizer-driven calls.
1705+
if (!env->is_shutting_down) {
1706+
env->refs.insert(info);
1707+
}
1708+
16871709
*result = reinterpret_cast<napi_ref>(info);
16881710
napi_clear_last_error(env);
16891711
return napi_ok;
@@ -1694,7 +1716,21 @@ napi_status napi_delete_reference(napi_env env, napi_ref ref) {
16941716
CHECK_ARG(env, ref);
16951717

16961718
RefInfo* info = reinterpret_cast<RefInfo*>(ref);
1697-
JS_FreeValue(env->context, info->value);
1719+
1720+
// Even during shutdown we MUST drop any strong JS hold this reference still
1721+
// owns. Detach() drains env->refs but cannot reach RefInfo objects that are
1722+
// created after, or that the drain skipped because they were processed by
1723+
// a finalizer racing the loop. Leaving info->count > 0 with a live value
1724+
// keeps that JSValue alive past JS_FreeContext and fails the
1725+
// assert(list_empty(&rt->gc_obj_list)) inside JS_FreeRuntime.
1726+
if (!env->is_shutting_down) {
1727+
env->refs.erase(info);
1728+
}
1729+
if (info->count > 0) {
1730+
JS_FreeValue(env->context, info->value);
1731+
info->value = JS_UNDEFINED;
1732+
info->count = 0;
1733+
}
16981734
delete info;
16991735

17001736
napi_clear_last_error(env);
@@ -1704,31 +1740,43 @@ napi_status napi_delete_reference(napi_env env, napi_ref ref) {
17041740
napi_status napi_reference_ref(napi_env env, napi_ref ref, uint32_t* result) {
17051741
CHECK_ENV(env);
17061742
CHECK_ARG(env, ref);
1707-
1743+
17081744
RefInfo* info = reinterpret_cast<RefInfo*>(ref);
1745+
// Going 0 -> 1 must promote a weak ref to a strong one by duping the value
1746+
// so the JS object can no longer be collected from under us.
1747+
if (info->count == 0 && !env->is_shutting_down) {
1748+
info->value = JS_DupValue(env->context, info->value);
1749+
}
17091750
info->count++;
1710-
1751+
17111752
if (result != nullptr) {
17121753
*result = info->count;
17131754
}
1714-
1755+
17151756
napi_clear_last_error(env);
17161757
return napi_ok;
17171758
}
17181759

17191760
napi_status napi_reference_unref(napi_env env, napi_ref ref, uint32_t* result) {
17201761
CHECK_ENV(env);
17211762
CHECK_ARG(env, ref);
1722-
1763+
17231764
RefInfo* info = reinterpret_cast<RefInfo*>(ref);
17241765
if (info->count > 0) {
17251766
info->count--;
1767+
// Going 1 -> 0 demotes back to weak: release the strong JS hold. We must
1768+
// do this even during shutdown to avoid pinning JSValues past
1769+
// JS_FreeContext / JS_FreeRuntime (gc_obj_list assertion).
1770+
if (info->count == 0) {
1771+
JS_FreeValue(env->context, info->value);
1772+
info->value = JS_UNDEFINED;
1773+
}
17261774
}
1727-
1775+
17281776
if (result != nullptr) {
17291777
*result = info->count;
17301778
}
1731-
1779+
17321780
napi_clear_last_error(env);
17331781
return napi_ok;
17341782
}
@@ -2184,8 +2232,12 @@ napi_status napi_create_promise(napi_env env, napi_deferred* deferred, napi_valu
21842232
JS_SetPropertyStr(env->context, container, "reject", resolving_funcs[1]);
21852233

21862234
// Create reference directly from container WITHOUT going through FromJSValue
2187-
// to avoid double-ownership (handle scope + reference)
2235+
// to avoid double-ownership (handle scope + reference). Track in env->refs
2236+
// so Detach() can release the strong hold if the promise is never settled.
21882237
RefInfo* refInfo = new RefInfo{ container, 1 }; // Use refcount 1 for immediate access
2238+
if (!env->is_shutting_down) {
2239+
env->refs.insert(refInfo);
2240+
}
21892241
napi_ref ref = reinterpret_cast<napi_ref>(refInfo);
21902242

21912243
*deferred = reinterpret_cast<napi_deferred>(ref);

Core/Node-API/Source/js_native_api_quickjs.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,15 @@
55
#include <thread>
66
#include <cassert>
77
#include <vector>
8+
#include <unordered_set>
9+
10+
// Reference info for a napi_ref. count==0 means a weak reference (no JS
11+
// resource held); count>0 means a strong reference (value is JS_DupValue'd
12+
// and must be JS_FreeValue'd when count returns to 0 or the ref is deleted).
13+
struct RefInfo {
14+
JSValue value;
15+
uint32_t count;
16+
};
817

918
struct napi_env__ {
1019
JSContext* context = nullptr;
@@ -17,6 +26,17 @@ struct napi_env__ {
1726
// Handle scope storage
1827
std::vector<std::unique_ptr<JSValue>> handle_scope_stack;
1928
size_t current_scope_start = 0;
29+
30+
// All RefInfos created via napi_create_reference. Used by Detach() to
31+
// release strong references on the JS values they hold so that the QuickJS
32+
// garbage collector can reclaim wrapped objects (and their finalizers can
33+
// run) during JS_FreeContext / JS_FreeRuntime.
34+
std::unordered_set<RefInfo*> refs;
35+
36+
// Set true by Detach(). After this point napi_delete_reference must not
37+
// touch JS state because Detach() already released it, and napi_create_reference
38+
// must not allocate new JS-holding refs (the runtime is shutting down).
39+
bool is_shutting_down = false;
2040
};
2141

2242
#define RETURN_STATUS_IF_FALSE(env, condition, status) \

0 commit comments

Comments
 (0)