Skip to content

Commit 25b69eb

Browse files
committed
Merge tag 'landlock-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux
Pull Landlock fixes from Mickaël Salaün: "This mainly fixes Landlock TSYNC issues related to interrupts and unexpected task exit. Other fixes touch documentation and sample, and a new test extends coverage" * tag 'landlock-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux: landlock: Expand restrict flags example for ABI version 8 selftests/landlock: Test tsync interruption and cancellation paths landlock: Clean up interrupted thread logic in TSYNC landlock: Serialize TSYNC thread restriction samples/landlock: Bump ABI version to 8 landlock: Improve TSYNC types landlock: Fully release unused TSYNC work entries landlock: Fix formatting
2 parents 453a4a5 + a238110 commit 25b69eb

6 files changed

Lines changed: 190 additions & 33 deletions

File tree

Documentation/userspace-api/landlock.rst

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Landlock: unprivileged access control
88
=====================================
99

1010
:Author: Mickaël Salaün
11-
:Date: January 2026
11+
:Date: March 2026
1212

1313
The goal of Landlock is to enable restriction of ambient rights (e.g. global
1414
filesystem or network access) for a set of processes. Because Landlock
@@ -197,12 +197,27 @@ similar backwards compatibility check is needed for the restrict flags
197197

198198
.. code-block:: c
199199
200-
__u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
201-
if (abi < 7) {
202-
/* Clear logging flags unsupported before ABI 7. */
200+
__u32 restrict_flags =
201+
LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
202+
LANDLOCK_RESTRICT_SELF_TSYNC;
203+
switch (abi) {
204+
case 1 ... 6:
205+
/* Removes logging flags for ABI < 7 */
203206
restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
204207
LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
205208
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF);
209+
__attribute__((fallthrough));
210+
case 7:
211+
/*
212+
* Removes multithreaded enforcement flag for ABI < 8
213+
*
214+
* WARNING: Without this flag, calling landlock_restrict_self(2) is
215+
* only equivalent if the calling process is single-threaded. Below
216+
* ABI v8 (and as of ABI v8, when not using this flag), a Landlock
217+
* policy would only be enforced for the calling thread and its
218+
* children (and not for all threads, including parents and siblings).
219+
*/
220+
restrict_flags &= ~LANDLOCK_RESTRICT_SELF_TSYNC;
206221
}
207222
208223
The next step is to restrict the current thread from gaining more privileges

samples/landlock/sandboxer.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ static bool check_ruleset_scope(const char *const env_var,
299299

300300
/* clang-format on */
301301

302-
#define LANDLOCK_ABI_LAST 7
302+
#define LANDLOCK_ABI_LAST 8
303303

304304
#define XSTR(s) #s
305305
#define STR(s) XSTR(s)
@@ -436,7 +436,8 @@ int main(const int argc, char *const argv[], char *const *const envp)
436436
/* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */
437437
supported_restrict_flags &=
438438
~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
439-
439+
__attribute__((fallthrough));
440+
case 7:
440441
/* Must be printed for any ABI < LANDLOCK_ABI_LAST. */
441442
fprintf(stderr,
442443
"Hint: You should update the running kernel "

security/landlock/domain.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,7 @@ static struct landlock_details *get_current_details(void)
9494
* allocate with GFP_KERNEL_ACCOUNT because it is independent from the
9595
* caller.
9696
*/
97-
details =
98-
kzalloc_flex(*details, exe_path, path_size);
97+
details = kzalloc_flex(*details, exe_path, path_size);
9998
if (!details)
10099
return ERR_PTR(-ENOMEM);
101100

security/landlock/ruleset.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
3232
{
3333
struct landlock_ruleset *new_ruleset;
3434

35-
new_ruleset =
36-
kzalloc_flex(*new_ruleset, access_masks, num_layers,
37-
GFP_KERNEL_ACCOUNT);
35+
new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers,
36+
GFP_KERNEL_ACCOUNT);
3837
if (!new_ruleset)
3938
return ERR_PTR(-ENOMEM);
4039
refcount_set(&new_ruleset->usage, 1);
@@ -559,8 +558,8 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent,
559558
if (IS_ERR(new_dom))
560559
return new_dom;
561560

562-
new_dom->hierarchy = kzalloc_obj(*new_dom->hierarchy,
563-
GFP_KERNEL_ACCOUNT);
561+
new_dom->hierarchy =
562+
kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT);
564563
if (!new_dom->hierarchy)
565564
return ERR_PTR(-ENOMEM);
566565

security/landlock/tsync.c

Lines changed: 73 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,40 @@ static struct tsync_work *tsync_works_provide(struct tsync_works *s,
203203
return ctx;
204204
}
205205

206+
/**
207+
* tsync_works_trim - Put the last tsync_work element
208+
*
209+
* @s: TSYNC works to trim.
210+
*
211+
* Put the last task and decrement the size of @s.
212+
*
213+
* This helper does not cancel a running task, but just reset the last element
214+
* to zero.
215+
*/
216+
static void tsync_works_trim(struct tsync_works *s)
217+
{
218+
struct tsync_work *ctx;
219+
220+
if (WARN_ON_ONCE(s->size <= 0))
221+
return;
222+
223+
ctx = s->works[s->size - 1];
224+
225+
/*
226+
* For consistency, remove the task from ctx so that it does not look like
227+
* we handed it a task_work.
228+
*/
229+
put_task_struct(ctx->task);
230+
*ctx = (typeof(*ctx)){};
231+
232+
/*
233+
* Cancel the tsync_works_provide() change to recycle the reserved memory
234+
* for the next thread, if any. This also ensures that cancel_tsync_works()
235+
* and tsync_works_release() do not see any NULL task pointers.
236+
*/
237+
s->size--;
238+
}
239+
206240
/*
207241
* tsync_works_grow_by - preallocates space for n more contexts in s
208242
*
@@ -256,13 +290,14 @@ static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags)
256290
* tsync_works_contains - checks for presence of task in s
257291
*/
258292
static bool tsync_works_contains_task(const struct tsync_works *s,
259-
struct task_struct *task)
293+
const struct task_struct *task)
260294
{
261295
size_t i;
262296

263297
for (i = 0; i < s->size; i++)
264298
if (s->works[i]->task == task)
265299
return true;
300+
266301
return false;
267302
}
268303

@@ -276,14 +311,15 @@ static void tsync_works_release(struct tsync_works *s)
276311
size_t i;
277312

278313
for (i = 0; i < s->size; i++) {
279-
if (!s->works[i]->task)
314+
if (WARN_ON_ONCE(!s->works[i]->task))
280315
continue;
281316

282317
put_task_struct(s->works[i]->task);
283318
}
284319

285320
for (i = 0; i < s->capacity; i++)
286321
kfree(s->works[i]);
322+
287323
kfree(s->works);
288324
s->works = NULL;
289325
s->size = 0;
@@ -295,7 +331,7 @@ static void tsync_works_release(struct tsync_works *s)
295331
*/
296332
static size_t count_additional_threads(const struct tsync_works *works)
297333
{
298-
struct task_struct *thread, *caller;
334+
const struct task_struct *caller, *thread;
299335
size_t n = 0;
300336

301337
caller = current;
@@ -334,7 +370,8 @@ static bool schedule_task_work(struct tsync_works *works,
334370
struct tsync_shared_context *shared_ctx)
335371
{
336372
int err;
337-
struct task_struct *thread, *caller;
373+
const struct task_struct *caller;
374+
struct task_struct *thread;
338375
struct tsync_work *ctx;
339376
bool found_more_threads = false;
340377

@@ -379,16 +416,14 @@ static bool schedule_task_work(struct tsync_works *works,
379416

380417
init_task_work(&ctx->work, restrict_one_thread_callback);
381418
err = task_work_add(thread, &ctx->work, TWA_SIGNAL);
382-
if (err) {
419+
if (unlikely(err)) {
383420
/*
384421
* task_work_add() only fails if the task is about to exit. We
385422
* checked that earlier, but it can happen as a race. Resume
386423
* without setting an error, as the task is probably gone in the
387-
* next loop iteration. For consistency, remove the task from ctx
388-
* so that it does not look like we handed it a task_work.
424+
* next loop iteration.
389425
*/
390-
put_task_struct(ctx->task);
391-
ctx->task = NULL;
426+
tsync_works_trim(works);
392427

393428
atomic_dec(&shared_ctx->num_preparing);
394429
atomic_dec(&shared_ctx->num_unfinished);
@@ -406,12 +441,15 @@ static bool schedule_task_work(struct tsync_works *works,
406441
* shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two
407442
* completions if needed, as if the task was never scheduled.
408443
*/
409-
static void cancel_tsync_works(struct tsync_works *works,
444+
static void cancel_tsync_works(const struct tsync_works *works,
410445
struct tsync_shared_context *shared_ctx)
411446
{
412-
int i;
447+
size_t i;
413448

414449
for (i = 0; i < works->size; i++) {
450+
if (WARN_ON_ONCE(!works->works[i]->task))
451+
continue;
452+
415453
if (!task_work_cancel(works->works[i]->task,
416454
&works->works[i]->work))
417455
continue;
@@ -447,6 +485,16 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
447485
shared_ctx.new_cred = new_cred;
448486
shared_ctx.set_no_new_privs = task_no_new_privs(current);
449487

488+
/*
489+
* Serialize concurrent TSYNC operations to prevent deadlocks when
490+
* multiple threads call landlock_restrict_self() simultaneously.
491+
* If the lock is already held, we gracefully yield by restarting the
492+
* syscall. This allows the current thread to process pending
493+
* task_works before retrying.
494+
*/
495+
if (!down_write_trylock(&current->signal->exec_update_lock))
496+
return restart_syscall();
497+
450498
/*
451499
* We schedule a pseudo-signal task_work for each of the calling task's
452500
* sibling threads. In the task work, each thread:
@@ -527,24 +575,30 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
527575
-ERESTARTNOINTR);
528576

529577
/*
530-
* Cancel task works for tasks that did not start running yet,
531-
* and decrement all_prepared and num_unfinished accordingly.
578+
* Opportunistic improvement: try to cancel task
579+
* works for tasks that did not start running
580+
* yet. We do not have a guarantee that it
581+
* cancels any of the enqueued task works
582+
* because task_work_run() might already have
583+
* dequeued them.
532584
*/
533585
cancel_tsync_works(&works, &shared_ctx);
534586

535587
/*
536-
* The remaining task works have started running, so waiting for
537-
* their completion will finish.
588+
* Break the loop with error. The cleanup code
589+
* after the loop unblocks the remaining
590+
* task_works.
538591
*/
539-
wait_for_completion(&shared_ctx.all_prepared);
592+
break;
540593
}
541594
}
542595
} while (found_more_threads &&
543596
!atomic_read(&shared_ctx.preparation_error));
544597

545598
/*
546-
* We now have all sibling threads blocking and in "prepared" state in the
547-
* task work. Ask all threads to commit.
599+
* We now have either (a) all sibling threads blocking and in "prepared"
600+
* state in the task work, or (b) the preparation error is set. Ask all
601+
* threads to commit (or abort).
548602
*/
549603
complete_all(&shared_ctx.ready_to_commit);
550604

@@ -556,6 +610,6 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
556610
wait_for_completion(&shared_ctx.all_finished);
557611

558612
tsync_works_release(&works);
559-
613+
up_write(&current->signal->exec_update_lock);
560614
return atomic_read(&shared_ctx.preparation_error);
561615
}

tools/testing/selftests/landlock/tsync_test.c

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
*/
77

88
#define _GNU_SOURCE
9+
#include <linux/landlock.h>
910
#include <pthread.h>
11+
#include <signal.h>
1012
#include <sys/prctl.h>
11-
#include <linux/landlock.h>
1213

1314
#include "common.h"
1415

@@ -158,4 +159,92 @@ TEST(competing_enablement)
158159
EXPECT_EQ(0, close(ruleset_fd));
159160
}
160161

162+
static void signal_nop_handler(int sig)
163+
{
164+
}
165+
166+
struct signaler_data {
167+
pthread_t target;
168+
volatile bool stop;
169+
};
170+
171+
static void *signaler_thread(void *data)
172+
{
173+
struct signaler_data *sd = data;
174+
175+
while (!sd->stop)
176+
pthread_kill(sd->target, SIGUSR1);
177+
178+
return NULL;
179+
}
180+
181+
/*
182+
* Number of idle sibling threads. This must be large enough that even on
183+
* machines with many cores, the sibling threads cannot all complete their
184+
* credential preparation in a single parallel wave, otherwise the signaler
185+
* thread has no window to interrupt wait_for_completion_interruptible().
186+
* 200 threads on a 64-core machine yields ~3 serialized waves, giving the
187+
* tight signal loop enough time to land an interruption.
188+
*/
189+
#define NUM_IDLE_THREADS 200
190+
191+
/*
192+
* Exercises the tsync interruption and cancellation paths in tsync.c.
193+
*
194+
* When a signal interrupts the calling thread while it waits for sibling
195+
* threads to finish their credential preparation
196+
* (wait_for_completion_interruptible in landlock_restrict_sibling_threads),
197+
* the kernel sets ERESTARTNOINTR, cancels queued task works that have not
198+
* started yet (cancel_tsync_works), then waits for the remaining works to
199+
* finish. On the error return, syscalls.c aborts the prepared credentials.
200+
* The kernel automatically restarts the syscall, so userspace sees success.
201+
*/
202+
TEST(tsync_interrupt)
203+
{
204+
size_t i;
205+
pthread_t threads[NUM_IDLE_THREADS];
206+
pthread_t signaler;
207+
struct signaler_data sd;
208+
struct sigaction sa = {};
209+
const int ruleset_fd = create_ruleset(_metadata);
210+
211+
disable_caps(_metadata);
212+
213+
/* Install a no-op SIGUSR1 handler so the signal does not kill us. */
214+
sa.sa_handler = signal_nop_handler;
215+
sigemptyset(&sa.sa_mask);
216+
ASSERT_EQ(0, sigaction(SIGUSR1, &sa, NULL));
217+
218+
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
219+
220+
for (i = 0; i < NUM_IDLE_THREADS; i++)
221+
ASSERT_EQ(0, pthread_create(&threads[i], NULL, idle, NULL));
222+
223+
/*
224+
* Start a signaler thread that continuously sends SIGUSR1 to the
225+
* calling thread. This maximizes the chance of interrupting
226+
* wait_for_completion_interruptible() in the kernel's tsync path.
227+
*/
228+
sd.target = pthread_self();
229+
sd.stop = false;
230+
ASSERT_EQ(0, pthread_create(&signaler, NULL, signaler_thread, &sd));
231+
232+
/*
233+
* The syscall may be interrupted and transparently restarted by the
234+
* kernel (ERESTARTNOINTR). From userspace, it should always succeed.
235+
*/
236+
EXPECT_EQ(0, landlock_restrict_self(ruleset_fd,
237+
LANDLOCK_RESTRICT_SELF_TSYNC));
238+
239+
sd.stop = true;
240+
ASSERT_EQ(0, pthread_join(signaler, NULL));
241+
242+
for (i = 0; i < NUM_IDLE_THREADS; i++) {
243+
ASSERT_EQ(0, pthread_cancel(threads[i]));
244+
ASSERT_EQ(0, pthread_join(threads[i], NULL));
245+
}
246+
247+
EXPECT_EQ(0, close(ruleset_fd));
248+
}
249+
161250
TEST_HARNESS_MAIN

0 commit comments

Comments
 (0)