Skip to content

Commit dc3588c

Browse files
zhangyi089gregkh
authored andcommitted
ext4: process folios writeback in bytes
commit 1bfe635 upstream. Since ext4 supports large folios, processing writebacks in pages is no longer appropriate, it can be modified to process writebacks in bytes. Suggested-by: Jan Kara <[email protected]> Signed-off-by: Zhang Yi <[email protected]> Reviewed-by: Jan Kara <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Theodore Ts'o <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 25bf10b commit dc3588c

2 files changed

Lines changed: 42 additions & 41 deletions

File tree

fs/ext4/inode.c

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,11 +1668,12 @@ struct mpage_da_data {
16681668
unsigned int can_map:1; /* Can writepages call map blocks? */
16691669

16701670
/* These are internal state of ext4_do_writepages() */
1671-
pgoff_t first_page; /* The first page to write */
1672-
pgoff_t next_page; /* Current page to examine */
1673-
pgoff_t last_page; /* Last page to examine */
1671+
loff_t start_pos; /* The start pos to write */
1672+
loff_t next_pos; /* Current pos to examine */
1673+
loff_t end_pos; /* Last pos to examine */
1674+
16741675
/*
1675-
* Extent to map - this can be after first_page because that can be
1676+
* Extent to map - this can be after start_pos because that can be
16761677
* fully mapped. We somewhat abuse m_flags to store whether the extent
16771678
* is delalloc or unwritten.
16781679
*/
@@ -1692,38 +1693,38 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
16921693
struct inode *inode = mpd->inode;
16931694
struct address_space *mapping = inode->i_mapping;
16941695

1695-
/* This is necessary when next_page == 0. */
1696-
if (mpd->first_page >= mpd->next_page)
1696+
/* This is necessary when next_pos == 0. */
1697+
if (mpd->start_pos >= mpd->next_pos)
16971698
return;
16981699

16991700
mpd->scanned_until_end = 0;
1700-
index = mpd->first_page;
1701-
end = mpd->next_page - 1;
17021701
if (invalidate) {
17031702
ext4_lblk_t start, last;
1704-
start = index << (PAGE_SHIFT - inode->i_blkbits);
1705-
last = end << (PAGE_SHIFT - inode->i_blkbits);
1703+
start = EXT4_B_TO_LBLK(inode, mpd->start_pos);
1704+
last = mpd->next_pos >> inode->i_blkbits;
17061705

17071706
/*
17081707
* avoid racing with extent status tree scans made by
17091708
* ext4_insert_delayed_block()
17101709
*/
17111710
down_write(&EXT4_I(inode)->i_data_sem);
1712-
ext4_es_remove_extent(inode, start, last - start + 1);
1711+
ext4_es_remove_extent(inode, start, last - start);
17131712
up_write(&EXT4_I(inode)->i_data_sem);
17141713
}
17151714

17161715
folio_batch_init(&fbatch);
1717-
while (index <= end) {
1718-
nr = filemap_get_folios(mapping, &index, end, &fbatch);
1716+
index = mpd->start_pos >> PAGE_SHIFT;
1717+
end = mpd->next_pos >> PAGE_SHIFT;
1718+
while (index < end) {
1719+
nr = filemap_get_folios(mapping, &index, end - 1, &fbatch);
17191720
if (nr == 0)
17201721
break;
17211722
for (i = 0; i < nr; i++) {
17221723
struct folio *folio = fbatch.folios[i];
17231724

1724-
if (folio->index < mpd->first_page)
1725+
if (folio_pos(folio) < mpd->start_pos)
17251726
continue;
1726-
if (folio_next_index(folio) - 1 > end)
1727+
if (folio_next_index(folio) > end)
17271728
continue;
17281729
BUG_ON(!folio_test_locked(folio));
17291730
BUG_ON(folio_test_writeback(folio));
@@ -2025,7 +2026,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
20252026

20262027
static void mpage_folio_done(struct mpage_da_data *mpd, struct folio *folio)
20272028
{
2028-
mpd->first_page += folio_nr_pages(folio);
2029+
mpd->start_pos += folio_size(folio);
20292030
folio_unlock(folio);
20302031
}
20312032

@@ -2035,7 +2036,7 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio)
20352036
loff_t size;
20362037
int err;
20372038

2038-
BUG_ON(folio->index != mpd->first_page);
2039+
WARN_ON_ONCE(folio_pos(folio) != mpd->start_pos);
20392040
folio_clear_dirty_for_io(folio);
20402041
/*
20412042
* We have to be very careful here! Nothing protects writeback path
@@ -2447,7 +2448,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
24472448
* Update on-disk size after IO is submitted. Races with
24482449
* truncate are avoided by checking i_size under i_data_sem.
24492450
*/
2450-
disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
2451+
disksize = mpd->start_pos;
24512452
if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
24522453
int err2;
24532454
loff_t i_size;
@@ -2550,8 +2551,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
25502551
struct address_space *mapping = mpd->inode->i_mapping;
25512552
struct folio_batch fbatch;
25522553
unsigned int nr_folios;
2553-
pgoff_t index = mpd->first_page;
2554-
pgoff_t end = mpd->last_page;
2554+
pgoff_t index = mpd->start_pos >> PAGE_SHIFT;
2555+
pgoff_t end = mpd->end_pos >> PAGE_SHIFT;
25552556
xa_mark_t tag;
25562557
int i, err = 0;
25572558
int blkbits = mpd->inode->i_blkbits;
@@ -2566,7 +2567,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
25662567
tag = PAGECACHE_TAG_DIRTY;
25672568

25682569
mpd->map.m_len = 0;
2569-
mpd->next_page = index;
2570+
mpd->next_pos = mpd->start_pos;
25702571
if (ext4_should_journal_data(mpd->inode)) {
25712572
handle = ext4_journal_start(mpd->inode, EXT4_HT_WRITE_PAGE,
25722573
bpp);
@@ -2597,7 +2598,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
25972598
goto out;
25982599

25992600
/* If we can't merge this page, we are done. */
2600-
if (mpd->map.m_len > 0 && mpd->next_page != folio->index)
2601+
if (mpd->map.m_len > 0 &&
2602+
mpd->next_pos != folio_pos(folio))
26012603
goto out;
26022604

26032605
if (handle) {
@@ -2643,8 +2645,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
26432645
}
26442646

26452647
if (mpd->map.m_len == 0)
2646-
mpd->first_page = folio->index;
2647-
mpd->next_page = folio_next_index(folio);
2648+
mpd->start_pos = folio_pos(folio);
2649+
mpd->next_pos = folio_pos(folio) + folio_size(folio);
26482650
/*
26492651
* Writeout when we cannot modify metadata is simple.
26502652
* Just submit the page. For data=journal mode we
@@ -2787,18 +2789,18 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
27872789
writeback_index = mapping->writeback_index;
27882790
if (writeback_index)
27892791
cycled = 0;
2790-
mpd->first_page = writeback_index;
2791-
mpd->last_page = -1;
2792+
mpd->start_pos = writeback_index << PAGE_SHIFT;
2793+
mpd->end_pos = LLONG_MAX;
27922794
} else {
2793-
mpd->first_page = wbc->range_start >> PAGE_SHIFT;
2794-
mpd->last_page = wbc->range_end >> PAGE_SHIFT;
2795+
mpd->start_pos = wbc->range_start;
2796+
mpd->end_pos = wbc->range_end;
27952797
}
27962798

27972799
ext4_io_submit_init(&mpd->io_submit, wbc);
27982800
retry:
27992801
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2800-
tag_pages_for_writeback(mapping, mpd->first_page,
2801-
mpd->last_page);
2802+
tag_pages_for_writeback(mapping, mpd->start_pos >> PAGE_SHIFT,
2803+
mpd->end_pos >> PAGE_SHIFT);
28022804
blk_start_plug(&plug);
28032805

28042806
/*
@@ -2858,7 +2860,7 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
28582860
}
28592861
mpd->do_map = 1;
28602862

2861-
trace_ext4_da_write_pages(inode, mpd->first_page, wbc);
2863+
trace_ext4_da_write_pages(inode, mpd->start_pos, wbc);
28622864
ret = mpage_prepare_extent_to_map(mpd);
28632865
if (!ret && mpd->map.m_len)
28642866
ret = mpage_map_and_submit_extent(handle, mpd,
@@ -2915,8 +2917,8 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
29152917
blk_finish_plug(&plug);
29162918
if (!ret && !cycled && wbc->nr_to_write > 0) {
29172919
cycled = 1;
2918-
mpd->last_page = writeback_index - 1;
2919-
mpd->first_page = 0;
2920+
mpd->end_pos = (writeback_index << PAGE_SHIFT) - 1;
2921+
mpd->start_pos = 0;
29202922
goto retry;
29212923
}
29222924

@@ -2926,7 +2928,7 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
29262928
* Set the writeback_index so that range_cyclic
29272929
* mode will write it back later
29282930
*/
2929-
mapping->writeback_index = mpd->first_page;
2931+
mapping->writeback_index = mpd->start_pos >> PAGE_SHIFT;
29302932

29312933
out_writepages:
29322934
trace_ext4_writepages_result(inode, wbc, ret,

include/trace/events/ext4.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -483,31 +483,30 @@ TRACE_EVENT(ext4_writepages,
483483
);
484484

485485
TRACE_EVENT(ext4_da_write_pages,
486-
TP_PROTO(struct inode *inode, pgoff_t first_page,
486+
TP_PROTO(struct inode *inode, loff_t start_pos,
487487
struct writeback_control *wbc),
488488

489-
TP_ARGS(inode, first_page, wbc),
489+
TP_ARGS(inode, start_pos, wbc),
490490

491491
TP_STRUCT__entry(
492492
__field( dev_t, dev )
493493
__field( ino_t, ino )
494-
__field( pgoff_t, first_page )
494+
__field( loff_t, start_pos )
495495
__field( long, nr_to_write )
496496
__field( int, sync_mode )
497497
),
498498

499499
TP_fast_assign(
500500
__entry->dev = inode->i_sb->s_dev;
501501
__entry->ino = inode->i_ino;
502-
__entry->first_page = first_page;
502+
__entry->start_pos = start_pos;
503503
__entry->nr_to_write = wbc->nr_to_write;
504504
__entry->sync_mode = wbc->sync_mode;
505505
),
506506

507-
TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld "
508-
"sync_mode %d",
507+
TP_printk("dev %d,%d ino %lu start_pos 0x%llx nr_to_write %ld sync_mode %d",
509508
MAJOR(__entry->dev), MINOR(__entry->dev),
510-
(unsigned long) __entry->ino, __entry->first_page,
509+
(unsigned long) __entry->ino, __entry->start_pos,
511510
__entry->nr_to_write, __entry->sync_mode)
512511
);
513512

0 commit comments

Comments
 (0)