Skip to content

Commit 8568a1f

Browse files
eeeebbbbrrrrclaude
andauthored
Capture Rust backtraces for Postgres ERRORs raised through pg_sys FFIcalls (pgcentralfoundation#2262)
When Rust code calls a pg_sys function (e.g. pg_sys::relation_open()) and that function internally raises an ERROR via elog/ereport, the longjmp is caught by pg_guard_ffi_boundary and converted to a Rust panic. The panic hook captures a Rust backtrace at that point, but it was never attached to the error — the CaughtError::PostgresError path went through pg_re_throw() which bypassed do_ereport() entirely, so the backtrace was discarded. Fix this by: - In `downcast_panic_payload()`, when unwrapping a `CaughtError::PostgresError`, attach the backtrace from PANIC_LOCATION to the error report. This is the common path used by both `PgTryBuilder` and `run_guarded`, so the backtrace is available regardless of how the error is caught. - In `run_guarded(),` route CaughtError::PostgresError through GuardAction::Report (and thus do_ereport) instead of GuardAction::ReThrow (and pg_re_throw). This ensures the backtrace appears in the ERROR's DETAIL line when the error propagates all the way out. - In `pg_guard_ffi_boundary`, call `FlushErrorState(`) after` CopyErrorData` to properly clean the original error off Postgres' fixed-size errordata[] stack before do_ereport pushes a new entry. - Remove the now-unused `GuardAction::ReThrow` variant and `pg_re_throw()` call. --------- Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
1 parent 86ad24a commit 8568a1f

3 files changed

Lines changed: 99 additions & 29 deletions

File tree

pgrx-pg-sys/src/submodules/ffi.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,12 @@ unsafe fn pg_guard_ffi_boundary_impl<T, F: FnOnce() -> T>(f: F) -> T {
240240
};
241241
let line = errdata.lineno as _;
242242

243-
// clean up after ourselves by freeing the result of [CopyErrorData] and restoring
244-
// Postgres' understanding of where its next longjmp should go
243+
// clean up after ourselves by freeing the result of [CopyErrorData], flushing the
244+
// Postgres error state (so the original error no longer occupies a slot on Postgres'
245+
// fixed-size errordata[] stack), and restoring Postgres' understanding of where its
246+
// next longjmp should go
245247
pg_sys::FreeErrorData(errdata_ptr);
248+
pg_sys::FlushErrorState();
246249
pg_sys::PG_exception_stack = prev_exception_stack;
247250
pg_sys::error_context_stack = prev_error_context_stack;
248251

pgrx-pg-sys/src/submodules/panic.rs

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -343,17 +343,20 @@ pub fn register_pg_guard_panic_hook() {
343343

344344
let default_hook = std::panic::take_hook();
345345
std::panic::set_hook(Box::new(move |info: _| {
346-
if is_os_main_thread() == Some(true) {
347-
// if this is the main thread, swallow the panic message and use postgres' error-reporting mechanism.
348-
PANIC_LOCATION.with(|thread_local| {
349-
thread_local.replace({
350-
let mut info: ErrorReportLocation = info.into();
351-
info.backtrace = Some(std::backtrace::Backtrace::capture());
352-
Some(info)
353-
})
354-
});
355-
} else {
356-
// if this isn't the main thread, we don't know which connection to associate the panic with.
346+
// Always capture the backtrace into PANIC_LOCATION so that
347+
// downcast_panic_payload can attach it to CaughtError::PostgresError.
348+
// This is a thread-local, so it's harmless on non-main threads.
349+
PANIC_LOCATION.with(|thread_local| {
350+
thread_local.replace({
351+
let mut info: ErrorReportLocation = info.into();
352+
info.backtrace = Some(std::backtrace::Backtrace::capture());
353+
Some(info)
354+
})
355+
});
356+
357+
if is_os_main_thread() == Some(false) {
358+
// definitely not the main thread -- we don't know which connection
359+
// to associate the panic with, so also call the default hook
357360
default_hook(info)
358361
}
359362
}))
@@ -386,7 +389,6 @@ impl CaughtError {
386389
#[derive(Debug)]
387390
enum GuardAction<R> {
388391
Return(R),
389-
ReThrow,
390392
Report(ErrorReportWithLevel),
391393
}
392394

@@ -428,16 +430,6 @@ where
428430
{
429431
match unsafe { run_guarded(AssertUnwindSafe(f)) } {
430432
GuardAction::Return(r) => r,
431-
GuardAction::ReThrow => {
432-
#[cfg_attr(target_os = "windows", link(name = "postgres"))]
433-
unsafe extern "C-unwind" {
434-
fn pg_re_throw() -> !;
435-
}
436-
unsafe {
437-
crate::CurrentMemoryContext = crate::ErrorContext;
438-
pg_re_throw()
439-
}
440-
}
441433
GuardAction::Report(ereport) => {
442434
do_ereport(ereport);
443435
unreachable!("pgrx reported a CaughtError that wasn't raised at ERROR or above");
@@ -454,10 +446,11 @@ where
454446
match catch_unwind(f) {
455447
Ok(v) => GuardAction::Return(v),
456448
Err(e) => match downcast_panic_payload(e) {
457-
CaughtError::PostgresError(_) => {
458-
// Return to the caller to rethrow -- we can't do it here
459-
// since we this function's has non-POF frames.
460-
GuardAction::ReThrow
449+
CaughtError::PostgresError(ereport) => {
450+
// Postgres raised this error via longjmp, which pg_guard_ffi_boundary caught
451+
// and converted into a Rust panic. downcast_panic_payload already attached the
452+
// Rust backtrace from the panic hook, so just report it through do_ereport.
453+
GuardAction::Report(ereport)
461454
}
462455
CaughtError::ErrorReport(ereport) | CaughtError::RustPanic { ereport, .. } => {
463456
GuardAction::Report(ereport)
@@ -470,7 +463,20 @@ where
470463
pub(crate) fn downcast_panic_payload(e: Box<dyn Any + Send>) -> CaughtError {
471464
if e.downcast_ref::<CaughtError>().is_some() {
472465
// caught a previously caught CaughtError that is being rethrown
473-
*e.downcast::<CaughtError>().unwrap()
466+
let mut caught = *e.downcast::<CaughtError>().unwrap();
467+
468+
// For PostgresErrors (originating from a pg_sys FFI longjmp caught by
469+
// pg_guard_ffi_boundary), the panic hook captured a Rust backtrace into
470+
// PANIC_LOCATION. Attach it now so callers (PgTryBuilder, run_guarded,
471+
// etc.) can include it in the ERROR's DETAIL line.
472+
if let CaughtError::PostgresError(ref mut ereport) = caught {
473+
if ereport.inner.location.backtrace.is_none() {
474+
let panic_location = take_panic_location();
475+
ereport.inner.location.backtrace = panic_location.backtrace;
476+
}
477+
}
478+
479+
caught
474480
} else if e.downcast_ref::<ErrorReportWithLevel>().is_some() {
475481
// someone called `panic_any(ErrorReportWithLevel)`
476482
CaughtError::ErrorReport(*e.downcast().unwrap())

pgrx-tests/src/tests/pg_try_tests.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,67 @@ mod tests {
6666
Spi::get_one::<()>("SELECT crash()").map(|_| ())
6767
}
6868

69+
/// When a `pg_sys::` FFI call raises a Postgres ERROR (via longjmp), the error should
70+
/// carry a Rust backtrace so that it appears in the ERROR's DETAIL line.
71+
#[pg_test]
72+
fn test_postgres_error_contains_backtrace() {
73+
use pgrx::pg_sys::panic::CaughtError;
74+
75+
let has_backtrace = PgTryBuilder::new(|| {
76+
// relation_open with a bogus OID will raise an ERROR inside Postgres
77+
unsafe {
78+
pg_sys::relation_open(pg_sys::Oid::INVALID, pg_sys::AccessShareLock as _);
79+
}
80+
unreachable!("relation_open should have raised an ERROR");
81+
})
82+
.catch_others(|e| match e {
83+
CaughtError::PostgresError(ref ereport) => ereport.backtrace().is_some(),
84+
_ => panic!("expected CaughtError::PostgresError, got {e:?}"),
85+
})
86+
.execute();
87+
88+
assert!(has_backtrace, "PostgresError from a pg_sys FFI call should contain a backtrace");
89+
}
90+
91+
/// When a Postgres ERROR propagates through Rust frames, destructors must run (via panic
92+
/// unwinding) AND the resulting error should carry a Rust backtrace in its detail.
93+
#[pg_test]
94+
fn test_postgres_error_runs_drop_and_has_backtrace() {
95+
use pgrx::pg_sys::panic::CaughtError;
96+
97+
let dropped = Rc::new(AtomicBool::new(false));
98+
99+
struct Guard(Rc<AtomicBool>);
100+
impl Drop for Guard {
101+
fn drop(&mut self) {
102+
self.0.store(true, Ordering::SeqCst);
103+
}
104+
}
105+
106+
let (drop_ran, has_backtrace) = PgTryBuilder::new(|| {
107+
let _guard = Guard(dropped.clone());
108+
109+
// relation_open with a bogus OID raises an ERROR inside Postgres, which
110+
// pg_guard_ffi_boundary converts to a panic. The panic unwinds through
111+
// this frame, running Guard's destructor.
112+
unsafe {
113+
pg_sys::relation_open(pg_sys::Oid::INVALID, pg_sys::AccessShareLock as _);
114+
}
115+
unreachable!("relation_open should have raised an ERROR");
116+
})
117+
.catch_others(|e| {
118+
let bt = match e {
119+
CaughtError::PostgresError(ref ereport) => ereport.backtrace().is_some(),
120+
_ => panic!("expected CaughtError::PostgresError, got {e:?}"),
121+
};
122+
(dropped.load(Ordering::SeqCst), bt)
123+
})
124+
.execute();
125+
126+
assert!(drop_ran, "Guard's Drop impl should have run during panic unwinding");
127+
assert!(has_backtrace, "PostgresError should carry a Rust backtrace");
128+
}
129+
69130
#[pg_test]
70131
fn test_pg_try_execute_no_error_no_catch() {
71132
let result = PgTryBuilder::new(|| 42).execute();

0 commit comments

Comments
 (0)