Skip to content

Commit 28b3ab7

Browse files
Christoph Hellwigkawasaki
authored andcommitted
iomap: refactor the writeback interface
Replace ->map_blocks with a new ->writeback_range, which differs in the following ways: - it must also queue up the I/O for writeback, that is called into the slightly refactored and extended in scope iomap_add_to_ioend for each region - can handle only a part of the requested region, that is the retry loop for partial mappings moves to the caller - handles cleanup on failures as well, and thus also replaces the discard_folio method only implemented by XFS. This will allow to use the iomap writeback code also for file systems that are not block based like fuse. Co-developed-by: Joanne Koong <[email protected]> Signed-off-by: Joanne Koong <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Brian Foster <[email protected]> Acked-by: Damien Le Moal <[email protected]> # zonefs Reviewed-by: "Darrick J. Wong" <[email protected]>
1 parent bfa1a58 commit 28b3ab7

8 files changed

Lines changed: 197 additions & 161 deletions

File tree

Documentation/filesystems/iomap/operations.rst

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ writeback.
271271
It does not lock ``i_rwsem`` or ``invalidate_lock``.
272272

273273
The dirty bit will be cleared for all folios run through the
274-
``->map_blocks`` machinery described below even if the writeback fails.
274+
``->writeback_range`` machinery described below even if the writeback fails.
275275
This is to prevent dirty folio clots when storage devices fail; an
276276
``-EIO`` is recorded for userspace to collect via ``fsync``.
277277

@@ -283,15 +283,14 @@ The ``ops`` structure must be specified and is as follows:
283283
.. code-block:: c
284284
285285
struct iomap_writeback_ops {
286-
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
287-
loff_t offset, unsigned len);
288-
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
289-
void (*discard_folio)(struct folio *folio, loff_t pos);
286+
int (*writeback_range)(struct iomap_writepage_ctx *wpc,
287+
struct folio *folio, u64 pos, unsigned int len, u64 end_pos);
288+
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
290289
};
291290
292291
The fields are as follows:
293292

294-
- ``map_blocks``: Sets ``wpc->iomap`` to the space mapping of the file
293+
- ``writeback_range``: Sets ``wpc->iomap`` to the space mapping of the file
295294
range (in bytes) given by ``offset`` and ``len``.
296295
iomap calls this function for each dirty fs block in each dirty folio,
297296
though it will `reuse mappings
@@ -306,6 +305,15 @@ The fields are as follows:
306305
This revalidation must be open-coded by the filesystem; it is
307306
unclear if ``iomap::validity_cookie`` can be reused for this
308307
purpose.
308+
309+
If this methods fails to schedule I/O for any part of a dirty folio, it
310+
should throw away any reservations that may have been made for the write.
311+
The folio will be marked clean and an ``-EIO`` recorded in the
312+
pagecache.
313+
Filesystems can use this callback to `remove
314+
<https://lore.kernel.org/all/[email protected]/>`_
315+
delalloc reservations to avoid having delalloc reservations for
316+
clean pagecache.
309317
This function must be supplied by the filesystem.
310318

311319
- ``submit_ioend``: Allows the file systems to hook into writeback bio
@@ -316,18 +324,6 @@ The fields are as follows:
316324
transactions from process context before submitting the bio.
317325
This function is optional.
318326

319-
- ``discard_folio``: iomap calls this function after ``->map_blocks``
320-
fails to schedule I/O for any part of a dirty folio.
321-
The function should throw away any reservations that may have been
322-
made for the write.
323-
The folio will be marked clean and an ``-EIO`` recorded in the
324-
pagecache.
325-
Filesystems can use this callback to `remove
326-
<https://lore.kernel.org/all/[email protected]/>`_
327-
delalloc reservations to avoid having delalloc reservations for
328-
clean pagecache.
329-
This function is optional.
330-
331327
Pagecache Writeback Completion
332328
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
333329

block/fops.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -537,22 +537,29 @@ static void blkdev_readahead(struct readahead_control *rac)
537537
iomap_readahead(rac, &blkdev_iomap_ops);
538538
}
539539

540-
static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
541-
struct inode *inode, loff_t offset, unsigned int len)
540+
static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc,
541+
struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
542542
{
543-
loff_t isize = i_size_read(inode);
543+
loff_t isize = i_size_read(wpc->inode);
544544

545545
if (WARN_ON_ONCE(offset >= isize))
546546
return -EIO;
547-
if (offset >= wpc->iomap.offset &&
548-
offset < wpc->iomap.offset + wpc->iomap.length)
549-
return 0;
550-
return blkdev_iomap_begin(inode, offset, isize - offset,
551-
IOMAP_WRITE, &wpc->iomap, NULL);
547+
548+
if (offset < wpc->iomap.offset ||
549+
offset >= wpc->iomap.offset + wpc->iomap.length) {
550+
int error;
551+
552+
error = blkdev_iomap_begin(wpc->inode, offset, isize - offset,
553+
IOMAP_WRITE, &wpc->iomap, NULL);
554+
if (error)
555+
return error;
556+
}
557+
558+
return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
552559
}
553560

554561
static const struct iomap_writeback_ops blkdev_writeback_ops = {
555-
.map_blocks = blkdev_map_blocks,
562+
.writeback_range = blkdev_writeback_range,
556563
};
557564

558565
static int blkdev_writepages(struct address_space *mapping,

fs/gfs2/bmap.c

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2469,23 +2469,25 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
24692469
return error;
24702470
}
24712471

2472-
static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
2473-
loff_t offset, unsigned int len)
2472+
static ssize_t gfs2_writeback_range(struct iomap_writepage_ctx *wpc,
2473+
struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
24742474
{
2475-
int ret;
2476-
2477-
if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
2475+
if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(wpc->inode))))
24782476
return -EIO;
24792477

2480-
if (offset >= wpc->iomap.offset &&
2481-
offset < wpc->iomap.offset + wpc->iomap.length)
2482-
return 0;
2478+
if (offset < wpc->iomap.offset ||
2479+
offset >= wpc->iomap.offset + wpc->iomap.length) {
2480+
int ret;
24832481

2484-
memset(&wpc->iomap, 0, sizeof(wpc->iomap));
2485-
ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap);
2486-
return ret;
2482+
memset(&wpc->iomap, 0, sizeof(wpc->iomap));
2483+
ret = gfs2_iomap_get(wpc->inode, offset, INT_MAX, &wpc->iomap);
2484+
if (ret)
2485+
return ret;
2486+
}
2487+
2488+
return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
24872489
}
24882490

24892491
const struct iomap_writeback_ops gfs2_writeback_ops = {
2490-
.map_blocks = gfs2_map_blocks,
2492+
.writeback_range = gfs2_writeback_range,
24912493
};

fs/iomap/buffered-io.c

Lines changed: 44 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,14 +1666,30 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos,
16661666
* At the end of a writeback pass, there will be a cached ioend remaining on the
16671667
* writepage context that the caller will need to submit.
16681668
*/
1669-
static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
1670-
struct folio *folio, loff_t pos, loff_t end_pos, unsigned len)
1669+
ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
1670+
loff_t pos, loff_t end_pos, unsigned int dirty_len)
16711671
{
16721672
struct iomap_folio_state *ifs = folio->private;
16731673
size_t poff = offset_in_folio(folio, pos);
16741674
unsigned int ioend_flags = 0;
1675+
unsigned int map_len = min_t(u64, dirty_len,
1676+
wpc->iomap.offset + wpc->iomap.length - pos);
16751677
int error;
16761678

1679+
trace_iomap_add_to_ioend(wpc->inode, pos, dirty_len, &wpc->iomap);
1680+
1681+
WARN_ON_ONCE(!folio->private && map_len < dirty_len);
1682+
1683+
switch (wpc->iomap.type) {
1684+
case IOMAP_INLINE:
1685+
WARN_ON_ONCE(1);
1686+
return -EIO;
1687+
case IOMAP_HOLE:
1688+
return map_len;
1689+
default:
1690+
break;
1691+
}
1692+
16771693
if (wpc->iomap.type == IOMAP_UNWRITTEN)
16781694
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
16791695
if (wpc->iomap.flags & IOMAP_F_SHARED)
@@ -1691,11 +1707,11 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
16911707
wpc->ioend = iomap_alloc_ioend(wpc, pos, ioend_flags);
16921708
}
16931709

1694-
if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff))
1710+
if (!bio_add_folio(&wpc->ioend->io_bio, folio, map_len, poff))
16951711
goto new_ioend;
16961712

16971713
if (ifs)
1698-
atomic_add(len, &ifs->write_bytes_pending);
1714+
atomic_add(map_len, &ifs->write_bytes_pending);
16991715

17001716
/*
17011717
* Clamp io_offset and io_size to the incore EOF so that ondisk
@@ -1738,63 +1754,39 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
17381754
* Note that this defeats the ability to chain the ioends of
17391755
* appending writes.
17401756
*/
1741-
wpc->ioend->io_size += len;
1757+
wpc->ioend->io_size += map_len;
17421758
if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos)
17431759
wpc->ioend->io_size = end_pos - wpc->ioend->io_offset;
17441760

1745-
wbc_account_cgroup_owner(wpc->wbc, folio, len);
1746-
return 0;
1761+
wbc_account_cgroup_owner(wpc->wbc, folio, map_len);
1762+
return map_len;
17471763
}
1764+
EXPORT_SYMBOL_GPL(iomap_add_to_ioend);
17481765

1749-
static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
1750-
struct folio *folio, u64 pos, u64 end_pos, unsigned dirty_len,
1766+
static int iomap_writeback_range(struct iomap_writepage_ctx *wpc,
1767+
struct folio *folio, u64 pos, u32 rlen, u64 end_pos,
17511768
bool *wb_pending)
17521769
{
1753-
int error;
1754-
17551770
do {
1756-
unsigned map_len;
1757-
1758-
error = wpc->ops->map_blocks(wpc, wpc->inode, pos, dirty_len);
1759-
if (error)
1760-
break;
1761-
trace_iomap_writepage_map(wpc->inode, pos, dirty_len,
1762-
&wpc->iomap);
1771+
ssize_t ret;
17631772

1764-
map_len = min_t(u64, dirty_len,
1765-
wpc->iomap.offset + wpc->iomap.length - pos);
1766-
WARN_ON_ONCE(!folio->private && map_len < dirty_len);
1773+
ret = wpc->ops->writeback_range(wpc, folio, pos, rlen, end_pos);
1774+
if (WARN_ON_ONCE(ret == 0 || ret > rlen))
1775+
return -EIO;
1776+
if (ret < 0)
1777+
return ret;
1778+
rlen -= ret;
1779+
pos += ret;
17671780

1768-
switch (wpc->iomap.type) {
1769-
case IOMAP_INLINE:
1770-
WARN_ON_ONCE(1);
1771-
error = -EIO;
1772-
break;
1773-
case IOMAP_HOLE:
1774-
break;
1775-
default:
1776-
error = iomap_add_to_ioend(wpc, folio, pos, end_pos,
1777-
map_len);
1778-
if (!error)
1779-
*wb_pending = true;
1780-
break;
1781-
}
1782-
dirty_len -= map_len;
1783-
pos += map_len;
1784-
} while (dirty_len && !error);
1781+
/*
1782+
* Holes are not be written back by ->writeback_range, so track
1783+
* if we did handle anything that is not a hole here.
1784+
*/
1785+
if (wpc->iomap.type != IOMAP_HOLE)
1786+
*wb_pending = true;
1787+
} while (rlen);
17851788

1786-
/*
1787-
* We cannot cancel the ioend directly here on error. We may have
1788-
* already set other pages under writeback and hence we have to run I/O
1789-
* completion to mark the error state of the pages under writeback
1790-
* appropriately.
1791-
*
1792-
* Just let the file system know what portion of the folio failed to
1793-
* map.
1794-
*/
1795-
if (error && wpc->ops->discard_folio)
1796-
wpc->ops->discard_folio(folio, pos);
1797-
return error;
1789+
return 0;
17981790
}
17991791

18001792
/*
@@ -1906,8 +1898,8 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
19061898
*/
19071899
end_aligned = round_up(end_pos, i_blocksize(inode));
19081900
while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) {
1909-
error = iomap_writepage_map_blocks(wpc, folio, pos, end_pos,
1910-
rlen, &wb_pending);
1901+
error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos,
1902+
&wb_pending);
19111903
if (error)
19121904
break;
19131905
pos += rlen;

fs/iomap/trace.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ DEFINE_EVENT(iomap_class, name, \
169169
DEFINE_IOMAP_EVENT(iomap_iter_dstmap);
170170
DEFINE_IOMAP_EVENT(iomap_iter_srcmap);
171171

172-
TRACE_EVENT(iomap_writepage_map,
172+
TRACE_EVENT(iomap_add_to_ioend,
173173
TP_PROTO(struct inode *inode, u64 pos, unsigned int dirty_len,
174174
struct iomap *iomap),
175175
TP_ARGS(inode, pos, dirty_len, iomap),

0 commit comments

Comments
 (0)