Skip to content

Commit 06d40db

Browse files
committed
Fix data race on cancelled flag reads
Snapshot conn->cancelled under lock in both the busy handler (after waking from condvar) and the progress handler. Previously both paths read the field outside the lock, which is a data race with exqlite_cancel writing to it on another scheduler thread. Also run clang-format to satisfy the lint check.
1 parent 4f1df42 commit 06d40db

1 file changed

Lines changed: 73 additions & 39 deletions

File tree

c_src/sqlite3_nif.c

Lines changed: 73 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,77 +17,109 @@
1717
// backends (pthreads and Win32), so #ifdef _WIN32 covers all platforms.
1818

1919
#ifdef _WIN32
20-
#include <windows.h>
20+
#include <windows.h>
2121

22-
typedef struct {
22+
typedef struct
23+
{
2324
CRITICAL_SECTION cs;
2425
CONDITION_VARIABLE cv;
2526
} timed_wait_t;
2627

27-
static void tw_init(timed_wait_t* tw)
28+
static void
29+
tw_init(timed_wait_t* tw)
2830
{
2931
InitializeCriticalSection(&tw->cs);
3032
InitializeConditionVariable(&tw->cv);
3133
}
3234

33-
static void tw_destroy(timed_wait_t* tw)
35+
static void
36+
tw_destroy(timed_wait_t* tw)
3437
{
3538
DeleteCriticalSection(&tw->cs);
3639
// Windows CONDITION_VARIABLE has no destroy function
3740
}
3841

39-
static void tw_lock(timed_wait_t* tw) { EnterCriticalSection(&tw->cs); }
40-
static void tw_unlock(timed_wait_t* tw) { LeaveCriticalSection(&tw->cs); }
42+
static void
43+
tw_lock(timed_wait_t* tw)
44+
{
45+
EnterCriticalSection(&tw->cs);
46+
}
47+
static void
48+
tw_unlock(timed_wait_t* tw)
49+
{
50+
LeaveCriticalSection(&tw->cs);
51+
}
4152

4253
// Returns 0 if signalled, 1 if timed out
43-
static int tw_wait_ms(timed_wait_t* tw, int ms)
54+
static int
55+
tw_wait_ms(timed_wait_t* tw, int ms)
4456
{
4557
return SleepConditionVariableCS(&tw->cv, &tw->cs, (DWORD)ms) ? 0 : 1;
4658
}
4759

48-
static void tw_signal(timed_wait_t* tw) { WakeConditionVariable(&tw->cv); }
60+
static void
61+
tw_signal(timed_wait_t* tw)
62+
{
63+
WakeConditionVariable(&tw->cv);
64+
}
4965

5066
#else /* POSIX */
51-
#include <pthread.h>
52-
#include <time.h>
53-
#include <errno.h>
67+
#include <pthread.h>
68+
#include <time.h>
69+
#include <errno.h>
5470

55-
typedef struct {
71+
typedef struct
72+
{
5673
pthread_mutex_t mtx;
57-
pthread_cond_t cond;
74+
pthread_cond_t cond;
5875
} timed_wait_t;
5976

60-
static void tw_init(timed_wait_t* tw)
77+
static void
78+
tw_init(timed_wait_t* tw)
6179
{
6280
pthread_mutex_init(&tw->mtx, NULL);
6381
// CLOCK_REALTIME: macOS doesn't support CLOCK_MONOTONIC for condvars
6482
pthread_cond_init(&tw->cond, NULL);
6583
}
6684

67-
static void tw_destroy(timed_wait_t* tw)
85+
static void
86+
tw_destroy(timed_wait_t* tw)
6887
{
6988
pthread_cond_destroy(&tw->cond);
7089
pthread_mutex_destroy(&tw->mtx);
7190
}
7291

73-
static void tw_lock(timed_wait_t* tw) { pthread_mutex_lock(&tw->mtx); }
74-
static void tw_unlock(timed_wait_t* tw) { pthread_mutex_unlock(&tw->mtx); }
92+
static void
93+
tw_lock(timed_wait_t* tw)
94+
{
95+
pthread_mutex_lock(&tw->mtx);
96+
}
97+
static void
98+
tw_unlock(timed_wait_t* tw)
99+
{
100+
pthread_mutex_unlock(&tw->mtx);
101+
}
75102

76103
// Returns 0 if signalled, 1 if timed out
77-
static int tw_wait_ms(timed_wait_t* tw, int ms)
104+
static int
105+
tw_wait_ms(timed_wait_t* tw, int ms)
78106
{
79107
struct timespec ts;
80108
clock_gettime(CLOCK_REALTIME, &ts);
81-
ts.tv_sec += ms / 1000;
109+
ts.tv_sec += ms / 1000;
82110
ts.tv_nsec += (ms % 1000) * 1000000L;
83111
if (ts.tv_nsec >= 1000000000L) {
84-
ts.tv_sec += 1;
112+
ts.tv_sec += 1;
85113
ts.tv_nsec -= 1000000000L;
86114
}
87115
return pthread_cond_timedwait(&tw->cond, &tw->mtx, &ts) == ETIMEDOUT ? 1 : 0;
88116
}
89117

90-
static void tw_signal(timed_wait_t* tw) { pthread_cond_signal(&tw->cond); }
118+
static void
119+
tw_signal(timed_wait_t* tw)
120+
{
121+
pthread_cond_signal(&tw->cond);
122+
}
91123

92124
#endif /* _WIN32 */
93125

@@ -143,9 +175,9 @@ typedef struct connection
143175

144176
// Custom busy handler state
145177
timed_wait_t cancel_tw;
146-
volatile int cancelled; // volatile for MSVC compat
178+
volatile int cancelled; // volatile for MSVC compat
147179
int busy_timeout_ms;
148-
ErlNifEnv* callback_env; // for enif_is_process_alive
180+
ErlNifEnv* callback_env; // for enif_is_process_alive
149181
ErlNifPid caller_pid;
150182
} connection_t;
151183

@@ -399,20 +431,20 @@ exqlite_busy_handler(void* arg, int count)
399431

400432
// Snapshot cancel state and timeout with lock held
401433
tw_lock(&conn->cancel_tw);
402-
int cancelled = conn->cancelled;
434+
int cancelled = conn->cancelled;
403435
int timeout_ms = conn->busy_timeout_ms;
404436

405437
// Check if the calling process is still alive
406438
if (!cancelled && conn->callback_env != NULL &&
407439
!enif_is_process_alive(conn->callback_env, &conn->caller_pid)) {
408440
conn->cancelled = 1;
409-
cancelled = 1;
441+
cancelled = 1;
410442
}
411443
tw_unlock(&conn->cancel_tw);
412444

413445
// Check if already cancelled
414446
if (cancelled) {
415-
return 0; // stop retrying → SQLite returns SQLITE_BUSY
447+
return 0; // stop retrying → SQLite returns SQLITE_BUSY
416448
}
417449

418450
// No timeout → fail immediately
@@ -424,7 +456,7 @@ exqlite_busy_handler(void* arg, int count)
424456
// Use the same delay schedule as SQLite's default busy handler
425457
// for the first few retries, then 50ms waits after that.
426458
static const int delays[] = {1, 2, 5, 10, 15, 20, 25, 25, 25, 50, 50};
427-
static const int ndelay = sizeof(delays) / sizeof(delays[0]);
459+
static const int ndelay = sizeof(delays) / sizeof(delays[0]);
428460

429461
int total_waited = 0;
430462
for (int i = 0; i < count && i < ndelay; i++) {
@@ -435,41 +467,43 @@ exqlite_busy_handler(void* arg, int count)
435467
}
436468

437469
if (total_waited >= timeout_ms) {
438-
return 0; // timeout exceeded
470+
return 0; // timeout exceeded
439471
}
440472

441473
// Calculate sleep duration for this iteration
442-
int sleep_ms = (count < ndelay) ? delays[count] : 50;
474+
int sleep_ms = (count < ndelay) ? delays[count] : 50;
443475
int remaining = timeout_ms - total_waited;
444476
if (sleep_ms > remaining) {
445477
sleep_ms = remaining;
446478
}
447479

448480
// Wait on the condvar — can be woken early by cancel()
449481
tw_lock(&conn->cancel_tw);
450-
if (!conn->cancelled) {
482+
int cancelled = conn->cancelled;
483+
if (!cancelled) {
451484
tw_wait_ms(&conn->cancel_tw, sleep_ms);
485+
cancelled = conn->cancelled; // snapshot again after waking
452486
}
453487
tw_unlock(&conn->cancel_tw);
454488

455-
// After waking, check cancel again
456-
if (conn->cancelled) {
489+
// After waking, use the snapshot to avoid data race
490+
if (cancelled) {
457491
return 0;
458492
}
459493

460-
return 1; // retry the operation
494+
return 1; // retry the operation
461495
}
462496

463497
// Progress handler: fires every N VDBE opcodes.
464498
// Returns non-zero to interrupt execution when cancelled.
465-
// Note: reads cancelled without lock for performance (fires every 1000 opcodes).
466-
// Worst case is one extra iteration before seeing the flag. sqlite3_interrupt()
467-
// provides a redundant cancellation path.
468499
static int
469500
exqlite_progress_handler(void* arg)
470501
{
471502
connection_t* conn = (connection_t*)arg;
472-
return conn->cancelled ? 1 : 0;
503+
tw_lock(&conn->cancel_tw);
504+
int cancelled = conn->cancelled;
505+
tw_unlock(&conn->cancel_tw);
506+
return cancelled ? 1 : 0;
473507
}
474508

475509
// Stash the current env + caller pid before a db operation.
@@ -479,7 +513,7 @@ connection_stash_caller(connection_t* conn, ErlNifEnv* env)
479513
{
480514
conn->callback_env = env;
481515
enif_self(env, &conn->caller_pid);
482-
516+
483517
// Reset cancel flag for new operation while holding cancel_tw
484518
// to avoid racing with exqlite_cancel or other users of this flag.
485519
tw_lock(&conn->cancel_tw);
@@ -551,7 +585,7 @@ exqlite_open(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
551585
// call tw_destroy even if subsequent initialization steps fail.
552586
tw_init(&conn->cancel_tw);
553587
conn->cancelled = 0;
554-
conn->busy_timeout_ms = 2000; // default matches sqlite3_busy_timeout(db, 2000)
588+
conn->busy_timeout_ms = 2000; // default matches sqlite3_busy_timeout(db, 2000)
555589
conn->callback_env = NULL;
556590

557591
conn->interrupt_mutex = enif_mutex_create("exqlite:interrupt");

0 commit comments

Comments
 (0)