Skip to content

Commit d1384f7

Browse files
committed
Merge tag 'vfs-7.0-rc6.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner: - Fix netfs_limit_iter() hitting BUG() when an ITER_KVEC iterator reaches it via core dump writes to 9P filesystems. Add ITER_KVEC handling following the same pattern as the existing ITER_BVEC code. - Fix a NULL pointer dereference in the netfs unbuffered write retry path when the filesystem (e.g., 9P) doesn't set the prepare_write operation. - Clear I_DIRTY_TIME in sync_lazytime for filesystems implementing ->sync_lazytime. Without this the flag stays set and may cause additional unnecessary calls during inode deactivation. - Increase tmpfs size in mount_setattr selftests. A recent commit bumped the ext4 image size to 2 GB but didn't adjust the tmpfs backing store, so mkfs.ext4 fails with ENOSPC writing metadata. - Fix an invalid folio access in iomap when i_blkbits matches the folio size but differs from the I/O granularity. The cur_folio pointer would not get invalidated and iomap_read_end() would still be called on it despite the IO helper owning it. - Fix hash_name() docstring. - Fix read abandonment during netfs retry where the subreq variable used for abandonment could be uninitialized on the first pass or point to a deleted subrequest on later passes. - Don't block sync for filesystems with no data integrity guarantees. Add a SB_I_NO_DATA_INTEGRITY superblock flag replacing the per-inode AS_NO_DATA_INTEGRITY mapping flag so sync kicks off writeback but doesn't wait for flusher threads. This fixes a suspend-to-RAM hang on fuse-overlayfs where the flusher thread blocks when the fuse daemon is frozen. - Fix a lockdep splat in iomap when reads fail. iomap_read_end_io() invokes fserror_report() which calls igrab() taking i_lock in hardirq context while i_lock is normally held with interrupts enabled. Kick failed read handling to a workqueue. - Remove the redundant netfs_io_stream::front member and use stream->subrequests.next instead, fixing a potential issue in the direct write code path. * tag 'vfs-7.0-rc6.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: netfs: Fix the handling of stream->front by removing it iomap: fix lockdep complaint when reads fail writeback: don't block sync for filesystems with no data integrity guarantees netfs: Fix read abandonment during retry vfs: fix docstring of hash_name() iomap: fix invalid folio access when i_blkbits differs from I/O granularity selftests/mount_setattr: increase tmpfs size for idmapped mount tests fs: clear I_DIRTY_TIME in sync_lazytime netfs: Fix NULL pointer dereference in netfs_unbuffered_write() on retry netfs: Fix kernel BUG in netfs_limit_iter() for ITER_KVEC iterators
2 parents fc9eae2 + 0e764b9 commit d1384f7

20 files changed

Lines changed: 168 additions & 53 deletions

File tree

fs/fs-writeback.c

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,16 +1711,28 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
17111711
}
17121712
}
17131713

1714+
static bool __sync_lazytime(struct inode *inode)
1715+
{
1716+
spin_lock(&inode->i_lock);
1717+
if (!(inode_state_read(inode) & I_DIRTY_TIME)) {
1718+
spin_unlock(&inode->i_lock);
1719+
return false;
1720+
}
1721+
inode_state_clear(inode, I_DIRTY_TIME);
1722+
spin_unlock(&inode->i_lock);
1723+
inode->i_op->sync_lazytime(inode);
1724+
return true;
1725+
}
1726+
17141727
bool sync_lazytime(struct inode *inode)
17151728
{
17161729
if (!(inode_state_read_once(inode) & I_DIRTY_TIME))
17171730
return false;
17181731

17191732
trace_writeback_lazytime(inode);
17201733
if (inode->i_op->sync_lazytime)
1721-
inode->i_op->sync_lazytime(inode);
1722-
else
1723-
mark_inode_dirty_sync(inode);
1734+
return __sync_lazytime(inode);
1735+
mark_inode_dirty_sync(inode);
17241736
return true;
17251737
}
17261738

@@ -2775,13 +2787,8 @@ static void wait_sb_inodes(struct super_block *sb)
27752787
* The mapping can appear untagged while still on-list since we
27762788
* do not have the mapping lock. Skip it here, wb completion
27772789
* will remove it.
2778-
*
2779-
* If the mapping does not have data integrity semantics,
2780-
* there's no need to wait for the writeout to complete, as the
2781-
* mapping cannot guarantee that data is persistently stored.
27822790
*/
2783-
if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK) ||
2784-
mapping_no_data_integrity(mapping))
2791+
if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
27852792
continue;
27862793

27872794
spin_unlock_irq(&sb->s_inode_wblist_lock);
@@ -2916,6 +2923,17 @@ void sync_inodes_sb(struct super_block *sb)
29162923
*/
29172924
if (bdi == &noop_backing_dev_info)
29182925
return;
2926+
2927+
/*
2928+
* If the superblock has SB_I_NO_DATA_INTEGRITY set, there's no need to
2929+
* wait for the writeout to complete, as the filesystem cannot guarantee
2930+
* data persistence on sync. Just kick off writeback and return.
2931+
*/
2932+
if (sb->s_iflags & SB_I_NO_DATA_INTEGRITY) {
2933+
wakeup_flusher_threads_bdi(bdi, WB_REASON_SYNC);
2934+
return;
2935+
}
2936+
29192937
WARN_ON(!rwsem_is_locked(&sb->s_umount));
29202938

29212939
/* protect against inode wb switch, see inode_switch_wbs_work_fn() */

fs/fuse/file.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3201,10 +3201,8 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
32013201

32023202
inode->i_fop = &fuse_file_operations;
32033203
inode->i_data.a_ops = &fuse_file_aops;
3204-
if (fc->writeback_cache) {
3204+
if (fc->writeback_cache)
32053205
mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data);
3206-
mapping_set_no_data_integrity(&inode->i_data);
3207-
}
32083206

32093207
INIT_LIST_HEAD(&fi->write_files);
32103208
INIT_LIST_HEAD(&fi->queued_writes);

fs/fuse/inode.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,6 +1709,7 @@ static void fuse_sb_defaults(struct super_block *sb)
17091709
sb->s_export_op = &fuse_export_operations;
17101710
sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
17111711
sb->s_iflags |= SB_I_NOIDMAP;
1712+
sb->s_iflags |= SB_I_NO_DATA_INTEGRITY;
17121713
if (sb->s_user_ns != &init_user_ns)
17131714
sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
17141715
sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);

fs/iomap/bio.c

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
#include "internal.h"
99
#include "trace.h"
1010

11-
static void iomap_read_end_io(struct bio *bio)
11+
static DEFINE_SPINLOCK(failed_read_lock);
12+
static struct bio_list failed_read_list = BIO_EMPTY_LIST;
13+
14+
static void __iomap_read_end_io(struct bio *bio)
1215
{
1316
int error = blk_status_to_errno(bio->bi_status);
1417
struct folio_iter fi;
@@ -18,6 +21,52 @@ static void iomap_read_end_io(struct bio *bio)
1821
bio_put(bio);
1922
}
2023

24+
static void
25+
iomap_fail_reads(
26+
struct work_struct *work)
27+
{
28+
struct bio *bio;
29+
struct bio_list tmp = BIO_EMPTY_LIST;
30+
unsigned long flags;
31+
32+
spin_lock_irqsave(&failed_read_lock, flags);
33+
bio_list_merge_init(&tmp, &failed_read_list);
34+
spin_unlock_irqrestore(&failed_read_lock, flags);
35+
36+
while ((bio = bio_list_pop(&tmp)) != NULL) {
37+
__iomap_read_end_io(bio);
38+
cond_resched();
39+
}
40+
}
41+
42+
static DECLARE_WORK(failed_read_work, iomap_fail_reads);
43+
44+
static void iomap_fail_buffered_read(struct bio *bio)
45+
{
46+
unsigned long flags;
47+
48+
/*
49+
* Bounce I/O errors to a workqueue to avoid nested i_lock acquisitions
50+
* in the fserror code. The caller no longer owns the bio reference
51+
* after the spinlock drops.
52+
*/
53+
spin_lock_irqsave(&failed_read_lock, flags);
54+
if (bio_list_empty(&failed_read_list))
55+
WARN_ON_ONCE(!schedule_work(&failed_read_work));
56+
bio_list_add(&failed_read_list, bio);
57+
spin_unlock_irqrestore(&failed_read_lock, flags);
58+
}
59+
60+
static void iomap_read_end_io(struct bio *bio)
61+
{
62+
if (bio->bi_status) {
63+
iomap_fail_buffered_read(bio);
64+
return;
65+
}
66+
67+
__iomap_read_end_io(bio);
68+
}
69+
2170
static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
2271
{
2372
struct bio *bio = ctx->read_ctx;

fs/iomap/buffered-io.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
514514
loff_t length = iomap_length(iter);
515515
struct folio *folio = ctx->cur_folio;
516516
size_t folio_len = folio_size(folio);
517+
struct iomap_folio_state *ifs;
517518
size_t poff, plen;
518519
loff_t pos_diff;
519520
int ret;
@@ -525,7 +526,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
525526
return iomap_iter_advance(iter, length);
526527
}
527528

528-
ifs_alloc(iter->inode, folio, iter->flags);
529+
ifs = ifs_alloc(iter->inode, folio, iter->flags);
529530

530531
length = min_t(loff_t, length, folio_len - offset_in_folio(folio, pos));
531532
while (length) {
@@ -560,11 +561,15 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
560561

561562
*bytes_submitted += plen;
562563
/*
563-
* If the entire folio has been read in by the IO
564-
* helper, then the helper owns the folio and will end
565-
* the read on it.
564+
* Hand off folio ownership to the IO helper when:
565+
* 1) The entire folio has been submitted for IO, or
566+
* 2) There is no ifs attached to the folio
567+
*
568+
* Case (2) occurs when 1 << i_blkbits matches the folio
569+
* size but the underlying filesystem or block device
570+
* uses a smaller granularity for IO.
566571
*/
567-
if (*bytes_submitted == folio_len)
572+
if (*bytes_submitted == folio_len || !ifs)
568573
ctx->cur_folio = NULL;
569574
}
570575

fs/namei.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2437,8 +2437,14 @@ u64 hashlen_string(const void *salt, const char *name)
24372437
EXPORT_SYMBOL(hashlen_string);
24382438

24392439
/*
2440-
* Calculate the length and hash of the path component, and
2441-
* return the length as the result.
2440+
* hash_name - Calculate the length and hash of the path component
2441+
* @nd: the path resolution state
2442+
* @name: the pathname to read the component from
2443+
* @lastword: if the component fits in a single word, LAST_WORD_IS_DOT,
2444+
* LAST_WORD_IS_DOTDOT, or some other value depending on whether the
2445+
* component is '.', '..', or something else. Otherwise, @lastword is 0.
2446+
*
2447+
* Returns: a pointer to the terminating '/' or NUL character in @name.
24422448
*/
24432449
static inline const char *hash_name(struct nameidata *nd,
24442450
const char *name,

fs/netfs/buffered_read.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,8 @@ static void netfs_queue_read(struct netfs_io_request *rreq,
171171
spin_lock(&rreq->lock);
172172
list_add_tail(&subreq->rreq_link, &stream->subrequests);
173173
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
174-
stream->front = subreq;
175174
if (!stream->active) {
176-
stream->collected_to = stream->front->start;
175+
stream->collected_to = subreq->start;
177176
/* Store list pointers before active flag */
178177
smp_store_release(&stream->active, true);
179178
}

fs/netfs/direct_read.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,8 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
7171
spin_lock(&rreq->lock);
7272
list_add_tail(&subreq->rreq_link, &stream->subrequests);
7373
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
74-
stream->front = subreq;
7574
if (!stream->active) {
76-
stream->collected_to = stream->front->start;
75+
stream->collected_to = subreq->start;
7776
/* Store list pointers before active flag */
7877
smp_store_release(&stream->active, true);
7978
}

fs/netfs/direct_write.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ static int netfs_unbuffered_write(struct netfs_io_request *wreq)
111111
netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred);
112112
subreq = stream->construct;
113113
stream->construct = NULL;
114-
stream->front = NULL;
115114
}
116115

117116
/* Check if (re-)preparation failed. */
@@ -186,10 +185,18 @@ static int netfs_unbuffered_write(struct netfs_io_request *wreq)
186185
stream->sreq_max_segs = INT_MAX;
187186

188187
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
189-
stream->prepare_write(subreq);
190188

191-
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
192-
netfs_stat(&netfs_n_wh_retry_write_subreq);
189+
if (stream->prepare_write) {
190+
stream->prepare_write(subreq);
191+
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
192+
netfs_stat(&netfs_n_wh_retry_write_subreq);
193+
} else {
194+
struct iov_iter source;
195+
196+
netfs_reset_iter(subreq);
197+
source = subreq->io_iter;
198+
netfs_reissue_write(stream, subreq, &source);
199+
}
193200
}
194201

195202
netfs_unbuffered_write_done(wreq);

fs/netfs/iterator.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,47 @@ static size_t netfs_limit_bvec(const struct iov_iter *iter, size_t start_offset,
142142
return min(span, max_size);
143143
}
144144

145+
/*
146+
* Select the span of a kvec iterator we're going to use. Limit it by both
147+
* maximum size and maximum number of segments. Returns the size of the span
148+
* in bytes.
149+
*/
150+
static size_t netfs_limit_kvec(const struct iov_iter *iter, size_t start_offset,
151+
size_t max_size, size_t max_segs)
152+
{
153+
const struct kvec *kvecs = iter->kvec;
154+
unsigned int nkv = iter->nr_segs, ix = 0, nsegs = 0;
155+
size_t len, span = 0, n = iter->count;
156+
size_t skip = iter->iov_offset + start_offset;
157+
158+
if (WARN_ON(!iov_iter_is_kvec(iter)) ||
159+
WARN_ON(start_offset > n) ||
160+
n == 0)
161+
return 0;
162+
163+
while (n && ix < nkv && skip) {
164+
len = kvecs[ix].iov_len;
165+
if (skip < len)
166+
break;
167+
skip -= len;
168+
n -= len;
169+
ix++;
170+
}
171+
172+
while (n && ix < nkv) {
173+
len = min3(n, kvecs[ix].iov_len - skip, max_size);
174+
span += len;
175+
nsegs++;
176+
ix++;
177+
if (span >= max_size || nsegs >= max_segs)
178+
break;
179+
skip = 0;
180+
n -= len;
181+
}
182+
183+
return min(span, max_size);
184+
}
185+
145186
/*
146187
* Select the span of an xarray iterator we're going to use. Limit it by both
147188
* maximum size and maximum number of segments. It is assumed that segments
@@ -245,6 +286,8 @@ size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
245286
return netfs_limit_bvec(iter, start_offset, max_size, max_segs);
246287
if (iov_iter_is_xarray(iter))
247288
return netfs_limit_xarray(iter, start_offset, max_size, max_segs);
289+
if (iov_iter_is_kvec(iter))
290+
return netfs_limit_kvec(iter, start_offset, max_size, max_segs);
248291
BUG();
249292
}
250293
EXPORT_SYMBOL(netfs_limit_iter);

0 commit comments

Comments
 (0)