Skip to content

Commit dd6c438

Browse files
committed
Merge tag 'vfs-7.1-rc1.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner: - eventpoll: fix ep_remove() UAF and follow-up cleanup - fs: aio: set VMA_DONTCOPY_BIT in mmap to fix NULL-pointer-dereference error - writeback: Fix use after free in inode_switch_wbs_work_fn() - fuse: reject oversized dirents in page cache - fs: aio: reject partial mremap to avoid Null-pointer-dereference error - nstree: fix func. parameter kernel-doc warnings - fs: Handle multiply claimed blocks more gracefully with mmb * tag 'vfs-7.1-rc1.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: eventpoll: drop vestigial epi->dying flag eventpoll: drop dead bool return from ep_remove_epi() eventpoll: refresh eventpoll_release() fast-path comment eventpoll: move f_lock acquisition into ep_remove_file() eventpoll: fix ep_remove struct eventpoll / struct file UAF eventpoll: move epi_fget() up eventpoll: rename ep_remove_safe() back to ep_remove() eventpoll: drop vestigial __ prefix from ep_remove_{file,epi}() eventpoll: kill __ep_remove() eventpoll: split __ep_remove() eventpoll: use hlist_is_singular_node() in __ep_remove() fs: Handle multiply claimed blocks more gracefully with mmb nstree: fix func. parameter kernel-doc warnings fs: aio: reject partial mremap to avoid Null-pointer-dereference error fuse: reject oversized dirents in page cache writeback: Fix use after free in inode_switch_wbs_work_fn() fs: aio: set VMA_DONTCOPY_BIT in mmap to fix NULL-pointer-dereference error
2 parents bd1886d + ac8777c commit dd6c438

7 files changed

Lines changed: 125 additions & 109 deletions

File tree

fs/aio.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,8 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
422422

423423
ctx = rcu_dereference(table->table[i]);
424424
if (ctx && ctx->aio_ring_file == file) {
425-
if (!atomic_read(&ctx->dead)) {
425+
if (!atomic_read(&ctx->dead) &&
426+
(ctx->mmap_size == (vma->vm_end - vma->vm_start))) {
426427
ctx->user_id = ctx->mmap_base = vma->vm_start;
427428
res = 0;
428429
}
@@ -447,7 +448,7 @@ static const struct vm_operations_struct aio_ring_vm_ops = {
447448

448449
static int aio_ring_mmap_prepare(struct vm_area_desc *desc)
449450
{
450-
vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT);
451+
vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT, VMA_DONTCOPY_BIT);
451452
desc->vm_ops = &aio_ring_vm_ops;
452453
return 0;
453454
}

fs/buffer.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -719,8 +719,15 @@ void mmb_mark_buffer_dirty(struct buffer_head *bh,
719719
mark_buffer_dirty(bh);
720720
if (!bh->b_mmb) {
721721
spin_lock(&mmb->lock);
722+
/*
723+
* For a corrupted filesystem with multiply claimed blocks this
724+
* can fail. Avoid corrupting the linked list in that case.
725+
*/
726+
if (cmpxchg(&bh->b_mmb, NULL, mmb) != NULL) {
727+
spin_unlock(&mmb->lock);
728+
return;
729+
}
722730
list_move_tail(&bh->b_assoc_buffers, &mmb->list);
723-
bh->b_mmb = mmb;
724731
spin_unlock(&mmb->lock);
725732
}
726733
}

fs/eventpoll.c

Lines changed: 78 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,6 @@ struct epitem {
148148
/* The file descriptor information this item refers to */
149149
struct epoll_filefd ffd;
150150

151-
/*
152-
* Protected by file->f_lock, true for to-be-released epitem already
153-
* removed from the "struct file" items list; together with
154-
* eventpoll->refcount orchestrates "struct eventpoll" disposal
155-
*/
156-
bool dying;
157-
158151
/* List containing poll wait queues */
159152
struct eppoll_entry *pwqlist;
160153

@@ -220,10 +213,7 @@ struct eventpoll {
220213
struct hlist_head refs;
221214
u8 loop_check_depth;
222215

223-
/*
224-
* usage count, used together with epitem->dying to
225-
* orchestrate the disposal of this struct
226-
*/
216+
/* usage count, orchestrates "struct eventpoll" disposal */
227217
refcount_t refcount;
228218

229219
/* used to defer freeing past ep_get_upwards_depth_proc() RCU walk */
@@ -827,36 +817,47 @@ static void ep_free(struct eventpoll *ep)
827817
}
828818

829819
/*
830-
* Removes a "struct epitem" from the eventpoll RB tree and deallocates
831-
* all the associated resources. Must be called with "mtx" held.
832-
* If the dying flag is set, do the removal only if force is true.
833-
* This prevents ep_clear_and_put() from dropping all the ep references
834-
* while running concurrently with eventpoll_release_file().
835-
* Returns true if the eventpoll can be disposed.
820+
* The ffd.file pointer may be in the process of being torn down due to
821+
* being closed, but we may not have finished eventpoll_release() yet.
822+
*
823+
* Normally, even with the atomic_long_inc_not_zero, the file may have
824+
* been free'd and then gotten re-allocated to something else (since
825+
* files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
826+
*
827+
* But for epoll, users hold the ep->mtx mutex, and as such any file in
828+
* the process of being free'd will block in eventpoll_release_file()
829+
* and thus the underlying file allocation will not be free'd, and the
830+
* file re-use cannot happen.
831+
*
832+
* For the same reason we can avoid a rcu_read_lock() around the
833+
* operation - 'ffd.file' cannot go away even if the refcount has
834+
* reached zero (but we must still not call out to ->poll() functions
835+
* etc).
836836
*/
837-
static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
837+
static struct file *epi_fget(const struct epitem *epi)
838838
{
839-
struct file *file = epi->ffd.file;
840-
struct epitems_head *to_free;
841-
struct hlist_head *head;
839+
struct file *file;
842840

843-
lockdep_assert_irqs_enabled();
841+
file = epi->ffd.file;
842+
if (!file_ref_get(&file->f_ref))
843+
file = NULL;
844+
return file;
845+
}
844846

845-
/*
846-
* Removes poll wait queue hooks.
847-
*/
848-
ep_unregister_pollwait(ep, epi);
847+
/*
848+
* Takes &file->f_lock; returns with it released.
849+
*/
850+
static void ep_remove_file(struct eventpoll *ep, struct epitem *epi,
851+
struct file *file)
852+
{
853+
struct epitems_head *to_free = NULL;
854+
struct hlist_head *head;
849855

850-
/* Remove the current item from the list of epoll hooks */
851-
spin_lock(&file->f_lock);
852-
if (epi->dying && !force) {
853-
spin_unlock(&file->f_lock);
854-
return false;
855-
}
856+
lockdep_assert_held(&ep->mtx);
856857

857-
to_free = NULL;
858+
spin_lock(&file->f_lock);
858859
head = file->f_ep;
859-
if (head->first == &epi->fllink && !epi->fllink.next) {
860+
if (hlist_is_singular_node(&epi->fllink, head)) {
860861
/* See eventpoll_release() for details. */
861862
WRITE_ONCE(file->f_ep, NULL);
862863
if (!is_file_epoll(file)) {
@@ -869,6 +870,11 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
869870
hlist_del_rcu(&epi->fllink);
870871
spin_unlock(&file->f_lock);
871872
free_ephead(to_free);
873+
}
874+
875+
static void ep_remove_epi(struct eventpoll *ep, struct epitem *epi)
876+
{
877+
lockdep_assert_held(&ep->mtx);
872878

873879
rb_erase_cached(&epi->rbn, &ep->rbr);
874880

@@ -888,16 +894,32 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
888894
kfree_rcu(epi, rcu);
889895

890896
percpu_counter_dec(&ep->user->epoll_watches);
891-
return true;
892897
}
893898

894899
/*
895900
* ep_remove variant for callers owing an additional reference to the ep
896901
*/
897-
static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi)
902+
static void ep_remove(struct eventpoll *ep, struct epitem *epi)
898903
{
899-
if (__ep_remove(ep, epi, false))
900-
WARN_ON_ONCE(ep_refcount_dec_and_test(ep));
904+
struct file *file __free(fput) = NULL;
905+
906+
lockdep_assert_irqs_enabled();
907+
lockdep_assert_held(&ep->mtx);
908+
909+
ep_unregister_pollwait(ep, epi);
910+
911+
/*
912+
* If we manage to grab a reference it means we're not in
913+
* eventpoll_release_file() and aren't going to be: once @file's
914+
* refcount has reached zero, file_ref_get() cannot bring it back.
915+
*/
916+
file = epi_fget(epi);
917+
if (!file)
918+
return;
919+
920+
ep_remove_file(ep, epi, file);
921+
ep_remove_epi(ep, epi);
922+
WARN_ON_ONCE(ep_refcount_dec_and_test(ep));
901923
}
902924

903925
static void ep_clear_and_put(struct eventpoll *ep)
@@ -923,7 +945,7 @@ static void ep_clear_and_put(struct eventpoll *ep)
923945

924946
/*
925947
* Walks through the whole tree and try to free each "struct epitem".
926-
* Note that ep_remove_safe() will not remove the epitem in case of a
948+
* Note that ep_remove() will not remove the epitem in case of a
927949
* racing eventpoll_release_file(); the latter will do the removal.
928950
* At this point we are sure no poll callbacks will be lingering around.
929951
* Since we still own a reference to the eventpoll struct, the loop can't
@@ -932,7 +954,7 @@ static void ep_clear_and_put(struct eventpoll *ep)
932954
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = next) {
933955
next = rb_next(rbp);
934956
epi = rb_entry(rbp, struct epitem, rbn);
935-
ep_remove_safe(ep, epi);
957+
ep_remove(ep, epi);
936958
cond_resched();
937959
}
938960

@@ -1012,34 +1034,6 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep
10121034
return res;
10131035
}
10141036

1015-
/*
1016-
* The ffd.file pointer may be in the process of being torn down due to
1017-
* being closed, but we may not have finished eventpoll_release() yet.
1018-
*
1019-
* Normally, even with the atomic_long_inc_not_zero, the file may have
1020-
* been free'd and then gotten re-allocated to something else (since
1021-
* files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
1022-
*
1023-
* But for epoll, users hold the ep->mtx mutex, and as such any file in
1024-
* the process of being free'd will block in eventpoll_release_file()
1025-
* and thus the underlying file allocation will not be free'd, and the
1026-
* file re-use cannot happen.
1027-
*
1028-
* For the same reason we can avoid a rcu_read_lock() around the
1029-
* operation - 'ffd.file' cannot go away even if the refcount has
1030-
* reached zero (but we must still not call out to ->poll() functions
1031-
* etc).
1032-
*/
1033-
static struct file *epi_fget(const struct epitem *epi)
1034-
{
1035-
struct file *file;
1036-
1037-
file = epi->ffd.file;
1038-
if (!file_ref_get(&file->f_ref))
1039-
file = NULL;
1040-
return file;
1041-
}
1042-
10431037
/*
10441038
* Differs from ep_eventpoll_poll() in that internal callers already have
10451039
* the ep->mtx so we need to start from depth=1, such that mutex_lock_nested()
@@ -1117,18 +1111,17 @@ void eventpoll_release_file(struct file *file)
11171111
{
11181112
struct eventpoll *ep;
11191113
struct epitem *epi;
1120-
bool dispose;
11211114

11221115
/*
1123-
* Use the 'dying' flag to prevent a concurrent ep_clear_and_put() from
1124-
* touching the epitems list before eventpoll_release_file() can access
1125-
* the ep->mtx.
1116+
* A concurrent ep_remove() cannot outrace us: it pins @file via
1117+
* epi_fget(), which fails once __fput() has dropped the refcount
1118+
* to zero -- the path we're on. So any racing ep_remove() bails
1119+
* and leaves the epi for us to clean up here.
11261120
*/
11271121
again:
11281122
spin_lock(&file->f_lock);
11291123
if (file->f_ep && file->f_ep->first) {
11301124
epi = hlist_entry(file->f_ep->first, struct epitem, fllink);
1131-
epi->dying = true;
11321125
spin_unlock(&file->f_lock);
11331126

11341127
/*
@@ -1137,10 +1130,15 @@ void eventpoll_release_file(struct file *file)
11371130
*/
11381131
ep = epi->ep;
11391132
mutex_lock(&ep->mtx);
1140-
dispose = __ep_remove(ep, epi, true);
1133+
1134+
ep_unregister_pollwait(ep, epi);
1135+
1136+
ep_remove_file(ep, epi, file);
1137+
ep_remove_epi(ep, epi);
1138+
11411139
mutex_unlock(&ep->mtx);
11421140

1143-
if (dispose && ep_refcount_dec_and_test(ep))
1141+
if (ep_refcount_dec_and_test(ep))
11441142
ep_free(ep);
11451143
goto again;
11461144
}
@@ -1619,21 +1617,21 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
16191617
mutex_unlock(&tep->mtx);
16201618

16211619
/*
1622-
* ep_remove_safe() calls in the later error paths can't lead to
1620+
* ep_remove() calls in the later error paths can't lead to
16231621
* ep_free() as the ep file itself still holds an ep reference.
16241622
*/
16251623
ep_get(ep);
16261624

16271625
/* now check if we've created too many backpaths */
16281626
if (unlikely(full_check && reverse_path_check())) {
1629-
ep_remove_safe(ep, epi);
1627+
ep_remove(ep, epi);
16301628
return -EINVAL;
16311629
}
16321630

16331631
if (epi->event.events & EPOLLWAKEUP) {
16341632
error = ep_create_wakeup_source(epi);
16351633
if (error) {
1636-
ep_remove_safe(ep, epi);
1634+
ep_remove(ep, epi);
16371635
return error;
16381636
}
16391637
}
@@ -1657,7 +1655,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
16571655
* high memory pressure.
16581656
*/
16591657
if (unlikely(!epq.epi)) {
1660-
ep_remove_safe(ep, epi);
1658+
ep_remove(ep, epi);
16611659
return -ENOMEM;
16621660
}
16631661

@@ -2352,7 +2350,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
23522350
* The eventpoll itself is still alive: the refcount
23532351
* can't go to zero here.
23542352
*/
2355-
ep_remove_safe(ep, epi);
2353+
ep_remove(ep, epi);
23562354
error = 0;
23572355
} else {
23582356
error = -ENOENT;

fs/fs-writeback.c

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -568,28 +568,30 @@ void inode_switch_wbs_work_fn(struct work_struct *work)
568568
struct inode_switch_wbs_context *isw, *next_isw;
569569
struct llist_node *list;
570570

571+
list = llist_del_all(&new_wb->switch_wbs_ctxs);
571572
/*
572-
* Grab out reference to wb so that it cannot get freed under us
573+
* Nothing to do? That would be a problem as references held by isw
574+
* items protect wb from freeing...
575+
*/
576+
if (WARN_ON_ONCE(!list))
577+
return;
578+
579+
/*
580+
* Grab our reference to wb so that it cannot get freed under us
573581
* after we process all the isw items.
574582
*/
575583
wb_get(new_wb);
576-
while (1) {
577-
list = llist_del_all(&new_wb->switch_wbs_ctxs);
578-
/* Nothing to do? */
579-
if (!list)
580-
break;
581-
/*
582-
* In addition to synchronizing among switchers, I_WB_SWITCH
583-
* tells the RCU protected stat update paths to grab the i_page
584-
* lock so that stat transfer can synchronize against them.
585-
* Let's continue after I_WB_SWITCH is guaranteed to be
586-
* visible.
587-
*/
588-
synchronize_rcu();
584+
/*
585+
* In addition to synchronizing among switchers, I_WB_SWITCH
586+
* tells the RCU protected stat update paths to grab the i_page
587+
* lock so that stat transfer can synchronize against them.
588+
* Let's continue after I_WB_SWITCH is guaranteed to be
589+
* visible.
590+
*/
591+
synchronize_rcu();
589592

590-
llist_for_each_entry_safe(isw, next_isw, list, list)
591-
process_inode_switch_wbs(new_wb, isw);
592-
}
593+
llist_for_each_entry_safe(isw, next_isw, list, list)
594+
process_inode_switch_wbs(new_wb, isw);
593595
wb_put(new_wb);
594596
}
595597

fs/fuse/readdir.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ static void fuse_add_dirent_to_cache(struct file *file,
4141
unsigned int offset;
4242
void *addr;
4343

44+
/* Dirent doesn't fit in readdir cache page? Skip caching. */
45+
if (reclen > PAGE_SIZE)
46+
return;
47+
4448
spin_lock(&fi->rdc.lock);
4549
/*
4650
* Is cache already completed? Or this entry does not go at the end of

0 commit comments

Comments
 (0)