Skip to content

Commit f1e14b9

Browse files
Christoph Hellwigkawasaki
authored andcommitted
iomap: refactor the writeback interface
Replace ->map_blocks with a new ->writeback_range, which differs in the following ways: - it must also queue up the I/O for writeback, that is called into the slightly refactored and extended in scope iomap_add_to_ioend for each region - can handle only a part of the requested region, that is the retry loop for partial mappings moves to the caller - handles cleanup on failures as well, and thus also replaces the discard_folio method only implemented by XFS. This will allow to use the iomap writeback code also for file systems that are not block based like fuse. Co-developed-by: Joanne Koong <[email protected]> Signed-off-by: Joanne Koong <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Brian Foster <[email protected]> Reviewed-by: "Darrick J. Wong" <[email protected]> Acked-by: Damien Le Moal <[email protected]> # zonefs
1 parent 77f6b72 commit f1e14b9

8 files changed

Lines changed: 197 additions & 161 deletions

File tree

Documentation/filesystems/iomap/operations.rst

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ writeback.
271271
It does not lock ``i_rwsem`` or ``invalidate_lock``.
272272

273273
The dirty bit will be cleared for all folios run through the
274-
``->map_blocks`` machinery described below even if the writeback fails.
274+
``->writeback_range`` machinery described below even if the writeback fails.
275275
This is to prevent dirty folio clots when storage devices fail; an
276276
``-EIO`` is recorded for userspace to collect via ``fsync``.
277277

@@ -283,15 +283,14 @@ The ``ops`` structure must be specified and is as follows:
283283
.. code-block:: c
284284
285285
struct iomap_writeback_ops {
286-
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
287-
loff_t offset, unsigned len);
288-
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
289-
void (*discard_folio)(struct folio *folio, loff_t pos);
286+
int (*writeback_range)(struct iomap_writepage_ctx *wpc,
287+
struct folio *folio, u64 pos, unsigned int len, u64 end_pos);
288+
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
290289
};
291290
292291
The fields are as follows:
293292

294-
- ``map_blocks``: Sets ``wpc->iomap`` to the space mapping of the file
293+
- ``writeback_range``: Sets ``wpc->iomap`` to the space mapping of the file
295294
range (in bytes) given by ``offset`` and ``len``.
296295
iomap calls this function for each dirty fs block in each dirty folio,
297296
though it will `reuse mappings
@@ -306,6 +305,15 @@ The fields are as follows:
306305
This revalidation must be open-coded by the filesystem; it is
307306
unclear if ``iomap::validity_cookie`` can be reused for this
308307
purpose.
308+
309+
If this methods fails to schedule I/O for any part of a dirty folio, it
310+
should throw away any reservations that may have been made for the write.
311+
The folio will be marked clean and an ``-EIO`` recorded in the
312+
pagecache.
313+
Filesystems can use this callback to `remove
314+
<https://lore.kernel.org/all/[email protected]/>`_
315+
delalloc reservations to avoid having delalloc reservations for
316+
clean pagecache.
309317
This function must be supplied by the filesystem.
310318

311319
- ``submit_ioend``: Allows the file systems to hook into writeback bio
@@ -316,18 +324,6 @@ The fields are as follows:
316324
transactions from process context before submitting the bio.
317325
This function is optional.
318326

319-
- ``discard_folio``: iomap calls this function after ``->map_blocks``
320-
fails to schedule I/O for any part of a dirty folio.
321-
The function should throw away any reservations that may have been
322-
made for the write.
323-
The folio will be marked clean and an ``-EIO`` recorded in the
324-
pagecache.
325-
Filesystems can use this callback to `remove
326-
<https://lore.kernel.org/all/[email protected]/>`_
327-
delalloc reservations to avoid having delalloc reservations for
328-
clean pagecache.
329-
This function is optional.
330-
331327
Pagecache Writeback Completion
332328
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
333329

block/fops.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -537,22 +537,29 @@ static void blkdev_readahead(struct readahead_control *rac)
537537
iomap_readahead(rac, &blkdev_iomap_ops);
538538
}
539539

540-
static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
541-
struct inode *inode, loff_t offset, unsigned int len)
540+
static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc,
541+
struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
542542
{
543-
loff_t isize = i_size_read(inode);
543+
loff_t isize = i_size_read(wpc->inode);
544544

545545
if (WARN_ON_ONCE(offset >= isize))
546546
return -EIO;
547-
if (offset >= wpc->iomap.offset &&
548-
offset < wpc->iomap.offset + wpc->iomap.length)
549-
return 0;
550-
return blkdev_iomap_begin(inode, offset, isize - offset,
551-
IOMAP_WRITE, &wpc->iomap, NULL);
547+
548+
if (offset < wpc->iomap.offset ||
549+
offset >= wpc->iomap.offset + wpc->iomap.length) {
550+
int error;
551+
552+
error = blkdev_iomap_begin(wpc->inode, offset, isize - offset,
553+
IOMAP_WRITE, &wpc->iomap, NULL);
554+
if (error)
555+
return error;
556+
}
557+
558+
return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
552559
}
553560

554561
static const struct iomap_writeback_ops blkdev_writeback_ops = {
555-
.map_blocks = blkdev_map_blocks,
562+
.writeback_range = blkdev_writeback_range,
556563
};
557564

558565
static int blkdev_writepages(struct address_space *mapping,

fs/gfs2/bmap.c

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2469,23 +2469,25 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
24692469
return error;
24702470
}
24712471

2472-
static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
2473-
loff_t offset, unsigned int len)
2472+
static ssize_t gfs2_writeback_range(struct iomap_writepage_ctx *wpc,
2473+
struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
24742474
{
2475-
int ret;
2476-
2477-
if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
2475+
if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(wpc->inode))))
24782476
return -EIO;
24792477

2480-
if (offset >= wpc->iomap.offset &&
2481-
offset < wpc->iomap.offset + wpc->iomap.length)
2482-
return 0;
2478+
if (offset < wpc->iomap.offset ||
2479+
offset >= wpc->iomap.offset + wpc->iomap.length) {
2480+
int ret;
24832481

2484-
memset(&wpc->iomap, 0, sizeof(wpc->iomap));
2485-
ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap);
2486-
return ret;
2482+
memset(&wpc->iomap, 0, sizeof(wpc->iomap));
2483+
ret = gfs2_iomap_get(wpc->inode, offset, INT_MAX, &wpc->iomap);
2484+
if (ret)
2485+
return ret;
2486+
}
2487+
2488+
return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
24872489
}
24882490

24892491
const struct iomap_writeback_ops gfs2_writeback_ops = {
2490-
.map_blocks = gfs2_map_blocks,
2492+
.writeback_range = gfs2_writeback_range,
24912493
};

fs/iomap/buffered-io.c

Lines changed: 44 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,14 +1668,30 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos,
16681668
* At the end of a writeback pass, there will be a cached ioend remaining on the
16691669
* writepage context that the caller will need to submit.
16701670
*/
1671-
static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
1672-
struct folio *folio, loff_t pos, loff_t end_pos, unsigned len)
1671+
ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
1672+
loff_t pos, loff_t end_pos, unsigned int dirty_len)
16731673
{
16741674
struct iomap_folio_state *ifs = folio->private;
16751675
size_t poff = offset_in_folio(folio, pos);
16761676
unsigned int ioend_flags = 0;
1677+
unsigned int map_len = min_t(u64, dirty_len,
1678+
wpc->iomap.offset + wpc->iomap.length - pos);
16771679
int error;
16781680

1681+
trace_iomap_add_to_ioend(wpc->inode, pos, dirty_len, &wpc->iomap);
1682+
1683+
WARN_ON_ONCE(!folio->private && map_len < dirty_len);
1684+
1685+
switch (wpc->iomap.type) {
1686+
case IOMAP_INLINE:
1687+
WARN_ON_ONCE(1);
1688+
return -EIO;
1689+
case IOMAP_HOLE:
1690+
return map_len;
1691+
default:
1692+
break;
1693+
}
1694+
16791695
if (wpc->iomap.type == IOMAP_UNWRITTEN)
16801696
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
16811697
if (wpc->iomap.flags & IOMAP_F_SHARED)
@@ -1693,11 +1709,11 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
16931709
wpc->ioend = iomap_alloc_ioend(wpc, pos, ioend_flags);
16941710
}
16951711

1696-
if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff))
1712+
if (!bio_add_folio(&wpc->ioend->io_bio, folio, map_len, poff))
16971713
goto new_ioend;
16981714

16991715
if (ifs)
1700-
atomic_add(len, &ifs->write_bytes_pending);
1716+
atomic_add(map_len, &ifs->write_bytes_pending);
17011717

17021718
/*
17031719
* Clamp io_offset and io_size to the incore EOF so that ondisk
@@ -1740,63 +1756,39 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
17401756
* Note that this defeats the ability to chain the ioends of
17411757
* appending writes.
17421758
*/
1743-
wpc->ioend->io_size += len;
1759+
wpc->ioend->io_size += map_len;
17441760
if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos)
17451761
wpc->ioend->io_size = end_pos - wpc->ioend->io_offset;
17461762

1747-
wbc_account_cgroup_owner(wpc->wbc, folio, len);
1748-
return 0;
1763+
wbc_account_cgroup_owner(wpc->wbc, folio, map_len);
1764+
return map_len;
17491765
}
1766+
EXPORT_SYMBOL_GPL(iomap_add_to_ioend);
17501767

1751-
static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
1752-
struct folio *folio, u64 pos, u64 end_pos, unsigned dirty_len,
1768+
static int iomap_writeback_range(struct iomap_writepage_ctx *wpc,
1769+
struct folio *folio, u64 pos, u32 rlen, u64 end_pos,
17531770
bool *wb_pending)
17541771
{
1755-
int error;
1756-
17571772
do {
1758-
unsigned map_len;
1759-
1760-
error = wpc->ops->map_blocks(wpc, wpc->inode, pos, dirty_len);
1761-
if (error)
1762-
break;
1763-
trace_iomap_writepage_map(wpc->inode, pos, dirty_len,
1764-
&wpc->iomap);
1773+
ssize_t ret;
17651774

1766-
map_len = min_t(u64, dirty_len,
1767-
wpc->iomap.offset + wpc->iomap.length - pos);
1768-
WARN_ON_ONCE(!folio->private && map_len < dirty_len);
1775+
ret = wpc->ops->writeback_range(wpc, folio, pos, rlen, end_pos);
1776+
if (WARN_ON_ONCE(ret == 0 || ret > rlen))
1777+
return -EIO;
1778+
if (ret < 0)
1779+
return ret;
1780+
rlen -= ret;
1781+
pos += ret;
17691782

1770-
switch (wpc->iomap.type) {
1771-
case IOMAP_INLINE:
1772-
WARN_ON_ONCE(1);
1773-
error = -EIO;
1774-
break;
1775-
case IOMAP_HOLE:
1776-
break;
1777-
default:
1778-
error = iomap_add_to_ioend(wpc, folio, pos, end_pos,
1779-
map_len);
1780-
if (!error)
1781-
*wb_pending = true;
1782-
break;
1783-
}
1784-
dirty_len -= map_len;
1785-
pos += map_len;
1786-
} while (dirty_len && !error);
1783+
/*
1784+
* Holes are not be written back by ->writeback_range, so track
1785+
* if we did handle anything that is not a hole here.
1786+
*/
1787+
if (wpc->iomap.type != IOMAP_HOLE)
1788+
*wb_pending = true;
1789+
} while (rlen);
17871790

1788-
/*
1789-
* We cannot cancel the ioend directly here on error. We may have
1790-
* already set other pages under writeback and hence we have to run I/O
1791-
* completion to mark the error state of the pages under writeback
1792-
* appropriately.
1793-
*
1794-
* Just let the file system know what portion of the folio failed to
1795-
* map.
1796-
*/
1797-
if (error && wpc->ops->discard_folio)
1798-
wpc->ops->discard_folio(folio, pos);
1799-
return error;
1791+
return 0;
18001792
}
18011793

18021794
/*
@@ -1908,8 +1900,8 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
19081900
*/
19091901
end_aligned = round_up(end_pos, i_blocksize(inode));
19101902
while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) {
1911-
error = iomap_writepage_map_blocks(wpc, folio, pos, end_pos,
1912-
rlen, &wb_pending);
1903+
error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos,
1904+
&wb_pending);
19131905
if (error)
19141906
break;
19151907
pos += rlen;

fs/iomap/trace.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ DEFINE_EVENT(iomap_class, name, \
169169
DEFINE_IOMAP_EVENT(iomap_iter_dstmap);
170170
DEFINE_IOMAP_EVENT(iomap_iter_srcmap);
171171

172-
TRACE_EVENT(iomap_writepage_map,
172+
TRACE_EVENT(iomap_add_to_ioend,
173173
TP_PROTO(struct inode *inode, u64 pos, unsigned int dirty_len,
174174
struct iomap *iomap),
175175
TP_ARGS(inode, pos, dirty_len, iomap),

0 commit comments

Comments
 (0)