Skip to content

Commit d374a86

Browse files
committed
Fix data race on cancelled flag reads
Snapshot conn->cancelled under lock in both the busy handler (after waking from condvar) and the progress handler. Previously both paths read the field outside the lock, which is a data race with exqlite_cancel writing to it on another scheduler thread. Also run clang-format to satisfy the lint check.
1 parent 5d7cd56 commit d374a86

1 file changed

Lines changed: 77 additions & 43 deletions

File tree

c_src/sqlite3_nif.c

Lines changed: 77 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -17,77 +17,109 @@
1717
// backends (pthreads and Win32), so #ifdef _WIN32 covers all platforms.
1818

1919
#ifdef _WIN32
20-
#include <windows.h>
20+
#include <windows.h>
2121

22-
typedef struct {
22+
typedef struct
23+
{
2324
CRITICAL_SECTION cs;
2425
CONDITION_VARIABLE cv;
2526
} timed_wait_t;
2627

27-
static void tw_init(timed_wait_t* tw)
28+
static void
29+
tw_init(timed_wait_t* tw)
2830
{
2931
InitializeCriticalSection(&tw->cs);
3032
InitializeConditionVariable(&tw->cv);
3133
}
3234

33-
static void tw_destroy(timed_wait_t* tw)
35+
static void
36+
tw_destroy(timed_wait_t* tw)
3437
{
3538
DeleteCriticalSection(&tw->cs);
3639
// Windows CONDITION_VARIABLE has no destroy function
3740
}
3841

39-
static void tw_lock(timed_wait_t* tw) { EnterCriticalSection(&tw->cs); }
40-
static void tw_unlock(timed_wait_t* tw) { LeaveCriticalSection(&tw->cs); }
42+
static void
43+
tw_lock(timed_wait_t* tw)
44+
{
45+
EnterCriticalSection(&tw->cs);
46+
}
47+
static void
48+
tw_unlock(timed_wait_t* tw)
49+
{
50+
LeaveCriticalSection(&tw->cs);
51+
}
4152

4253
// Returns 0 if signalled, 1 if timed out
43-
static int tw_wait_ms(timed_wait_t* tw, int ms)
54+
static int
55+
tw_wait_ms(timed_wait_t* tw, int ms)
4456
{
4557
return SleepConditionVariableCS(&tw->cv, &tw->cs, (DWORD)ms) ? 0 : 1;
4658
}
4759

48-
static void tw_signal(timed_wait_t* tw) { WakeConditionVariable(&tw->cv); }
60+
static void
61+
tw_signal(timed_wait_t* tw)
62+
{
63+
WakeConditionVariable(&tw->cv);
64+
}
4965

5066
#else /* POSIX */
51-
#include <pthread.h>
52-
#include <time.h>
53-
#include <errno.h>
67+
#include <pthread.h>
68+
#include <time.h>
69+
#include <errno.h>
5470

55-
typedef struct {
71+
typedef struct
72+
{
5673
pthread_mutex_t mtx;
57-
pthread_cond_t cond;
74+
pthread_cond_t cond;
5875
} timed_wait_t;
5976

60-
static void tw_init(timed_wait_t* tw)
77+
static void
78+
tw_init(timed_wait_t* tw)
6179
{
6280
pthread_mutex_init(&tw->mtx, NULL);
6381
// CLOCK_REALTIME: macOS doesn't support CLOCK_MONOTONIC for condvars
6482
pthread_cond_init(&tw->cond, NULL);
6583
}
6684

67-
static void tw_destroy(timed_wait_t* tw)
85+
static void
86+
tw_destroy(timed_wait_t* tw)
6887
{
6988
pthread_cond_destroy(&tw->cond);
7089
pthread_mutex_destroy(&tw->mtx);
7190
}
7291

73-
static void tw_lock(timed_wait_t* tw) { pthread_mutex_lock(&tw->mtx); }
74-
static void tw_unlock(timed_wait_t* tw) { pthread_mutex_unlock(&tw->mtx); }
92+
static void
93+
tw_lock(timed_wait_t* tw)
94+
{
95+
pthread_mutex_lock(&tw->mtx);
96+
}
97+
static void
98+
tw_unlock(timed_wait_t* tw)
99+
{
100+
pthread_mutex_unlock(&tw->mtx);
101+
}
75102

76103
// Returns 0 if signalled, 1 if timed out
77-
static int tw_wait_ms(timed_wait_t* tw, int ms)
104+
static int
105+
tw_wait_ms(timed_wait_t* tw, int ms)
78106
{
79107
struct timespec ts;
80108
clock_gettime(CLOCK_REALTIME, &ts);
81-
ts.tv_sec += ms / 1000;
109+
ts.tv_sec += ms / 1000;
82110
ts.tv_nsec += (ms % 1000) * 1000000L;
83111
if (ts.tv_nsec >= 1000000000L) {
84-
ts.tv_sec += 1;
112+
ts.tv_sec += 1;
85113
ts.tv_nsec -= 1000000000L;
86114
}
87115
return pthread_cond_timedwait(&tw->cond, &tw->mtx, &ts) == ETIMEDOUT ? 1 : 0;
88116
}
89117

90-
static void tw_signal(timed_wait_t* tw) { pthread_cond_signal(&tw->cond); }
118+
static void
119+
tw_signal(timed_wait_t* tw)
120+
{
121+
pthread_cond_signal(&tw->cond);
122+
}
91123

92124
#endif /* _WIN32 */
93125

@@ -138,9 +170,9 @@ typedef struct connection
138170

139171
// Custom busy handler state
140172
timed_wait_t cancel_tw;
141-
volatile int cancelled; // volatile for MSVC compat
173+
volatile int cancelled; // volatile for MSVC compat
142174
int busy_timeout_ms;
143-
ErlNifEnv* callback_env; // for enif_is_process_alive
175+
ErlNifEnv* callback_env; // for enif_is_process_alive
144176
ErlNifPid caller_pid;
145177
} connection_t;
146178

@@ -394,20 +426,20 @@ exqlite_busy_handler(void* arg, int count)
394426

395427
// Snapshot cancel state and timeout with lock held
396428
tw_lock(&conn->cancel_tw);
397-
int cancelled = conn->cancelled;
429+
int cancelled = conn->cancelled;
398430
int timeout_ms = conn->busy_timeout_ms;
399431

400432
// Check if the calling process is still alive
401433
if (!cancelled && conn->callback_env != NULL &&
402434
!enif_is_process_alive(conn->callback_env, &conn->caller_pid)) {
403435
conn->cancelled = 1;
404-
cancelled = 1;
436+
cancelled = 1;
405437
}
406438
tw_unlock(&conn->cancel_tw);
407439

408440
// Check if already cancelled
409441
if (cancelled) {
410-
return 0; // stop retrying → SQLite returns SQLITE_BUSY
442+
return 0; // stop retrying → SQLite returns SQLITE_BUSY
411443
}
412444

413445
// No timeout → fail immediately
@@ -419,7 +451,7 @@ exqlite_busy_handler(void* arg, int count)
419451
// Use the same delay schedule as SQLite's default busy handler
420452
// for the first few retries, then 50ms waits after that.
421453
static const int delays[] = {1, 2, 5, 10, 15, 20, 25, 25, 25, 50, 50};
422-
static const int ndelay = sizeof(delays) / sizeof(delays[0]);
454+
static const int ndelay = sizeof(delays) / sizeof(delays[0]);
423455

424456
int total_waited = 0;
425457
for (int i = 0; i < count && i < ndelay; i++) {
@@ -430,41 +462,43 @@ exqlite_busy_handler(void* arg, int count)
430462
}
431463

432464
if (total_waited >= timeout_ms) {
433-
return 0; // timeout exceeded
465+
return 0; // timeout exceeded
434466
}
435467

436468
// Calculate sleep duration for this iteration
437-
int sleep_ms = (count < ndelay) ? delays[count] : 50;
469+
int sleep_ms = (count < ndelay) ? delays[count] : 50;
438470
int remaining = timeout_ms - total_waited;
439471
if (sleep_ms > remaining) {
440472
sleep_ms = remaining;
441473
}
442474

443475
// Wait on the condvar — can be woken early by cancel()
444476
tw_lock(&conn->cancel_tw);
445-
if (!conn->cancelled) {
477+
int cancelled = conn->cancelled;
478+
if (!cancelled) {
446479
tw_wait_ms(&conn->cancel_tw, sleep_ms);
480+
cancelled = conn->cancelled; // snapshot again after waking
447481
}
448482
tw_unlock(&conn->cancel_tw);
449483

450-
// After waking, check cancel again
451-
if (conn->cancelled) {
484+
// After waking, use the snapshot to avoid data race
485+
if (cancelled) {
452486
return 0;
453487
}
454488

455-
return 1; // retry the operation
489+
return 1; // retry the operation
456490
}
457491

458492
// Progress handler: fires every N VDBE opcodes.
459493
// Returns non-zero to interrupt execution when cancelled.
460-
// Note: reads cancelled without lock for performance (fires every 1000 opcodes).
461-
// Worst case is one extra iteration before seeing the flag. sqlite3_interrupt()
462-
// provides a redundant cancellation path.
463494
static int
464495
exqlite_progress_handler(void* arg)
465496
{
466497
connection_t* conn = (connection_t*)arg;
467-
return conn->cancelled ? 1 : 0;
498+
tw_lock(&conn->cancel_tw);
499+
int cancelled = conn->cancelled;
500+
tw_unlock(&conn->cancel_tw);
501+
return cancelled ? 1 : 0;
468502
}
469503

470504
// Stash the current env + caller pid before a db operation.
@@ -474,7 +508,7 @@ connection_stash_caller(connection_t* conn, ErlNifEnv* env)
474508
{
475509
conn->callback_env = env;
476510
enif_self(env, &conn->caller_pid);
477-
511+
478512
// Reset cancel flag for new operation while holding cancel_tw
479513
// to avoid racing with exqlite_cancel or other users of this flag.
480514
tw_lock(&conn->cancel_tw);
@@ -537,16 +571,16 @@ exqlite_open(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
537571
enif_mutex_destroy(mutex);
538572
return make_error_tuple(env, am_out_of_memory);
539573
}
540-
conn->db = db;
541-
conn->mutex = mutex;
542-
574+
conn->db = db;
575+
conn->mutex = mutex;
576+
543577
// Initialize cancellable busy handler fields early so destructor can safely
544578
// call tw_destroy even if subsequent initialization steps fail.
545579
tw_init(&conn->cancel_tw);
546580
conn->cancelled = 0;
547-
conn->busy_timeout_ms = 2000; // default matches sqlite3_busy_timeout(db, 2000)
581+
conn->busy_timeout_ms = 2000; // default matches sqlite3_busy_timeout(db, 2000)
548582
conn->callback_env = NULL;
549-
583+
550584
conn->interrupt_mutex = enif_mutex_create("exqlite:interrupt");
551585
if (conn->interrupt_mutex == NULL) {
552586
// conn->db, conn->mutex, and conn->cancel_tw are set; destructor will clean them up.

0 commit comments

Comments
 (0)