Skip to content

Commit 6c0c068

Browse files
Christoph Hellwigkawasaki
authored andcommitted
iomap: refactor the writeback interface
Replace ->map_blocks with a new ->writeback_range, which differs in the following ways: - it must also queue up the I/O for writeback, that is called into the slightly refactored and extended in scope iomap_add_to_ioend for each region - can handle only a part of the requested region, that is the retry loop for partial mappings moves to the caller - handles cleanup on failures as well, and thus also replaces the discard_folio method only implemented by XFS. This will allow to use the iomap writeback code also for file systems that are not block based like fuse. Co-developed-by: Joanne Koong <[email protected]> Signed-off-by: Joanne Koong <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Brian Foster <[email protected]> Reviewed-by: "Darrick J. Wong" <[email protected]> Acked-by: Damien Le Moal <[email protected]> # zonefs
1 parent 7c3e774 commit 6c0c068

8 files changed

Lines changed: 197 additions & 161 deletions

File tree

Documentation/filesystems/iomap/operations.rst

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ writeback.
271271
It does not lock ``i_rwsem`` or ``invalidate_lock``.
272272

273273
The dirty bit will be cleared for all folios run through the
274-
``->map_blocks`` machinery described below even if the writeback fails.
274+
``->writeback_range`` machinery described below even if the writeback fails.
275275
This is to prevent dirty folio clots when storage devices fail; an
276276
``-EIO`` is recorded for userspace to collect via ``fsync``.
277277

@@ -283,15 +283,14 @@ The ``ops`` structure must be specified and is as follows:
283283
.. code-block:: c
284284
285285
struct iomap_writeback_ops {
286-
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
287-
loff_t offset, unsigned len);
288-
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
289-
void (*discard_folio)(struct folio *folio, loff_t pos);
286+
int (*writeback_range)(struct iomap_writepage_ctx *wpc,
287+
struct folio *folio, u64 pos, unsigned int len, u64 end_pos);
288+
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
290289
};
291290
292291
The fields are as follows:
293292

294-
- ``map_blocks``: Sets ``wpc->iomap`` to the space mapping of the file
293+
- ``writeback_range``: Sets ``wpc->iomap`` to the space mapping of the file
295294
range (in bytes) given by ``offset`` and ``len``.
296295
iomap calls this function for each dirty fs block in each dirty folio,
297296
though it will `reuse mappings
@@ -306,6 +305,15 @@ The fields are as follows:
306305
This revalidation must be open-coded by the filesystem; it is
307306
unclear if ``iomap::validity_cookie`` can be reused for this
308307
purpose.
308+
309+
If this methods fails to schedule I/O for any part of a dirty folio, it
310+
should throw away any reservations that may have been made for the write.
311+
The folio will be marked clean and an ``-EIO`` recorded in the
312+
pagecache.
313+
Filesystems can use this callback to `remove
314+
<https://lore.kernel.org/all/[email protected]/>`_
315+
delalloc reservations to avoid having delalloc reservations for
316+
clean pagecache.
309317
This function must be supplied by the filesystem.
310318

311319
- ``submit_ioend``: Allows the file systems to hook into writeback bio
@@ -316,18 +324,6 @@ The fields are as follows:
316324
transactions from process context before submitting the bio.
317325
This function is optional.
318326

319-
- ``discard_folio``: iomap calls this function after ``->map_blocks``
320-
fails to schedule I/O for any part of a dirty folio.
321-
The function should throw away any reservations that may have been
322-
made for the write.
323-
The folio will be marked clean and an ``-EIO`` recorded in the
324-
pagecache.
325-
Filesystems can use this callback to `remove
326-
<https://lore.kernel.org/all/[email protected]/>`_
327-
delalloc reservations to avoid having delalloc reservations for
328-
clean pagecache.
329-
This function is optional.
330-
331327
Pagecache Writeback Completion
332328
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
333329

block/fops.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -537,22 +537,29 @@ static void blkdev_readahead(struct readahead_control *rac)
537537
iomap_readahead(rac, &blkdev_iomap_ops);
538538
}
539539

540-
static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
541-
struct inode *inode, loff_t offset, unsigned int len)
540+
static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc,
541+
struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
542542
{
543-
loff_t isize = i_size_read(inode);
543+
loff_t isize = i_size_read(wpc->inode);
544544

545545
if (WARN_ON_ONCE(offset >= isize))
546546
return -EIO;
547-
if (offset >= wpc->iomap.offset &&
548-
offset < wpc->iomap.offset + wpc->iomap.length)
549-
return 0;
550-
return blkdev_iomap_begin(inode, offset, isize - offset,
551-
IOMAP_WRITE, &wpc->iomap, NULL);
547+
548+
if (offset < wpc->iomap.offset ||
549+
offset >= wpc->iomap.offset + wpc->iomap.length) {
550+
int error;
551+
552+
error = blkdev_iomap_begin(wpc->inode, offset, isize - offset,
553+
IOMAP_WRITE, &wpc->iomap, NULL);
554+
if (error)
555+
return error;
556+
}
557+
558+
return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
552559
}
553560

554561
static const struct iomap_writeback_ops blkdev_writeback_ops = {
555-
.map_blocks = blkdev_map_blocks,
562+
.writeback_range = blkdev_writeback_range,
556563
};
557564

558565
static int blkdev_writepages(struct address_space *mapping,

fs/gfs2/bmap.c

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2469,23 +2469,25 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
24692469
return error;
24702470
}
24712471

2472-
static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
2473-
loff_t offset, unsigned int len)
2472+
static ssize_t gfs2_writeback_range(struct iomap_writepage_ctx *wpc,
2473+
struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
24742474
{
2475-
int ret;
2476-
2477-
if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
2475+
if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(wpc->inode))))
24782476
return -EIO;
24792477

2480-
if (offset >= wpc->iomap.offset &&
2481-
offset < wpc->iomap.offset + wpc->iomap.length)
2482-
return 0;
2478+
if (offset < wpc->iomap.offset ||
2479+
offset >= wpc->iomap.offset + wpc->iomap.length) {
2480+
int ret;
24832481

2484-
memset(&wpc->iomap, 0, sizeof(wpc->iomap));
2485-
ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap);
2486-
return ret;
2482+
memset(&wpc->iomap, 0, sizeof(wpc->iomap));
2483+
ret = gfs2_iomap_get(wpc->inode, offset, INT_MAX, &wpc->iomap);
2484+
if (ret)
2485+
return ret;
2486+
}
2487+
2488+
return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
24872489
}
24882490

24892491
const struct iomap_writeback_ops gfs2_writeback_ops = {
2490-
.map_blocks = gfs2_map_blocks,
2492+
.writeback_range = gfs2_writeback_range,
24912493
};

fs/iomap/buffered-io.c

Lines changed: 44 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,14 +1671,30 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos,
16711671
* At the end of a writeback pass, there will be a cached ioend remaining on the
16721672
* writepage context that the caller will need to submit.
16731673
*/
1674-
static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
1675-
struct folio *folio, loff_t pos, loff_t end_pos, unsigned len)
1674+
ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
1675+
loff_t pos, loff_t end_pos, unsigned int dirty_len)
16761676
{
16771677
struct iomap_folio_state *ifs = folio->private;
16781678
size_t poff = offset_in_folio(folio, pos);
16791679
unsigned int ioend_flags = 0;
1680+
unsigned int map_len = min_t(u64, dirty_len,
1681+
wpc->iomap.offset + wpc->iomap.length - pos);
16801682
int error;
16811683

1684+
trace_iomap_add_to_ioend(wpc->inode, pos, dirty_len, &wpc->iomap);
1685+
1686+
WARN_ON_ONCE(!folio->private && map_len < dirty_len);
1687+
1688+
switch (wpc->iomap.type) {
1689+
case IOMAP_INLINE:
1690+
WARN_ON_ONCE(1);
1691+
return -EIO;
1692+
case IOMAP_HOLE:
1693+
return map_len;
1694+
default:
1695+
break;
1696+
}
1697+
16821698
if (wpc->iomap.type == IOMAP_UNWRITTEN)
16831699
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
16841700
if (wpc->iomap.flags & IOMAP_F_SHARED)
@@ -1696,11 +1712,11 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
16961712
wpc->ioend = iomap_alloc_ioend(wpc, pos, ioend_flags);
16971713
}
16981714

1699-
if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff))
1715+
if (!bio_add_folio(&wpc->ioend->io_bio, folio, map_len, poff))
17001716
goto new_ioend;
17011717

17021718
if (ifs)
1703-
atomic_add(len, &ifs->write_bytes_pending);
1719+
atomic_add(map_len, &ifs->write_bytes_pending);
17041720

17051721
/*
17061722
* Clamp io_offset and io_size to the incore EOF so that ondisk
@@ -1743,63 +1759,39 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
17431759
* Note that this defeats the ability to chain the ioends of
17441760
* appending writes.
17451761
*/
1746-
wpc->ioend->io_size += len;
1762+
wpc->ioend->io_size += map_len;
17471763
if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos)
17481764
wpc->ioend->io_size = end_pos - wpc->ioend->io_offset;
17491765

1750-
wbc_account_cgroup_owner(wpc->wbc, folio, len);
1751-
return 0;
1766+
wbc_account_cgroup_owner(wpc->wbc, folio, map_len);
1767+
return map_len;
17521768
}
1769+
EXPORT_SYMBOL_GPL(iomap_add_to_ioend);
17531770

1754-
static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
1755-
struct folio *folio, u64 pos, u64 end_pos, unsigned dirty_len,
1771+
static int iomap_writeback_range(struct iomap_writepage_ctx *wpc,
1772+
struct folio *folio, u64 pos, u32 rlen, u64 end_pos,
17561773
bool *wb_pending)
17571774
{
1758-
int error;
1759-
17601775
do {
1761-
unsigned map_len;
1762-
1763-
error = wpc->ops->map_blocks(wpc, wpc->inode, pos, dirty_len);
1764-
if (error)
1765-
break;
1766-
trace_iomap_writepage_map(wpc->inode, pos, dirty_len,
1767-
&wpc->iomap);
1776+
ssize_t ret;
17681777

1769-
map_len = min_t(u64, dirty_len,
1770-
wpc->iomap.offset + wpc->iomap.length - pos);
1771-
WARN_ON_ONCE(!folio->private && map_len < dirty_len);
1778+
ret = wpc->ops->writeback_range(wpc, folio, pos, rlen, end_pos);
1779+
if (WARN_ON_ONCE(ret == 0 || ret > rlen))
1780+
return -EIO;
1781+
if (ret < 0)
1782+
return ret;
1783+
rlen -= ret;
1784+
pos += ret;
17721785

1773-
switch (wpc->iomap.type) {
1774-
case IOMAP_INLINE:
1775-
WARN_ON_ONCE(1);
1776-
error = -EIO;
1777-
break;
1778-
case IOMAP_HOLE:
1779-
break;
1780-
default:
1781-
error = iomap_add_to_ioend(wpc, folio, pos, end_pos,
1782-
map_len);
1783-
if (!error)
1784-
*wb_pending = true;
1785-
break;
1786-
}
1787-
dirty_len -= map_len;
1788-
pos += map_len;
1789-
} while (dirty_len && !error);
1786+
/*
1787+
* Holes are not be written back by ->writeback_range, so track
1788+
* if we did handle anything that is not a hole here.
1789+
*/
1790+
if (wpc->iomap.type != IOMAP_HOLE)
1791+
*wb_pending = true;
1792+
} while (rlen);
17901793

1791-
/*
1792-
* We cannot cancel the ioend directly here on error. We may have
1793-
* already set other pages under writeback and hence we have to run I/O
1794-
* completion to mark the error state of the pages under writeback
1795-
* appropriately.
1796-
*
1797-
* Just let the file system know what portion of the folio failed to
1798-
* map.
1799-
*/
1800-
if (error && wpc->ops->discard_folio)
1801-
wpc->ops->discard_folio(folio, pos);
1802-
return error;
1794+
return 0;
18031795
}
18041796

18051797
/*
@@ -1911,8 +1903,8 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
19111903
*/
19121904
end_aligned = round_up(end_pos, i_blocksize(inode));
19131905
while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) {
1914-
error = iomap_writepage_map_blocks(wpc, folio, pos, end_pos,
1915-
rlen, &wb_pending);
1906+
error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos,
1907+
&wb_pending);
19161908
if (error)
19171909
break;
19181910
pos += rlen;

fs/iomap/trace.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ DEFINE_EVENT(iomap_class, name, \
169169
DEFINE_IOMAP_EVENT(iomap_iter_dstmap);
170170
DEFINE_IOMAP_EVENT(iomap_iter_srcmap);
171171

172-
TRACE_EVENT(iomap_writepage_map,
172+
TRACE_EVENT(iomap_add_to_ioend,
173173
TP_PROTO(struct inode *inode, u64 pos, unsigned int dirty_len,
174174
struct iomap *iomap),
175175
TP_ARGS(inode, pos, dirty_len, iomap),

0 commit comments

Comments
 (0)