@@ -1909,6 +1909,137 @@ static bool should_reclaim_block_group(const struct btrfs_block_group *bg, u64 b
19091909 return true;
19101910}
19111911
1912+ static int btrfs_reclaim_block_group (struct btrfs_block_group * bg )
1913+ {
1914+ struct btrfs_fs_info * fs_info = bg -> fs_info ;
1915+ struct btrfs_space_info * space_info = bg -> space_info ;
1916+ u64 used ;
1917+ u64 reserved ;
1918+ u64 old_total ;
1919+ int ret = 0 ;
1920+
1921+ /* Don't race with allocators so take the groups_sem */
1922+ down_write (& space_info -> groups_sem );
1923+
1924+ spin_lock (& space_info -> lock );
1925+ spin_lock (& bg -> lock );
1926+ if (bg -> reserved || bg -> pinned || bg -> ro ) {
1927+ /*
1928+ * We want to bail if we made new allocations or have
1929+ * outstanding allocations in this block group. We do
1930+ * the ro check in case balance is currently acting on
1931+ * this block group.
1932+ */
1933+ spin_unlock (& bg -> lock );
1934+ spin_unlock (& space_info -> lock );
1935+ up_write (& space_info -> groups_sem );
1936+ return 0 ;
1937+ }
1938+
1939+ if (bg -> used == 0 ) {
1940+ /*
1941+ * It is possible that we trigger relocation on a block
1942+ * group as its extents are deleted and it first goes
1943+ * below the threshold, then shortly after goes empty.
1944+ *
1945+ * In this case, relocating it does delete it, but has
1946+ * some overhead in relocation specific metadata, looking
1947+ * for the non-existent extents and running some extra
1948+ * transactions, which we can avoid by using one of the
1949+ * other mechanisms for dealing with empty block groups.
1950+ */
1951+ if (!btrfs_test_opt (fs_info , DISCARD_ASYNC ))
1952+ btrfs_mark_bg_unused (bg );
1953+ spin_unlock (& bg -> lock );
1954+ spin_unlock (& space_info -> lock );
1955+ up_write (& space_info -> groups_sem );
1956+ return 0 ;
1957+ }
1958+
1959+ /*
1960+ * The block group might no longer meet the reclaim condition by
1961+ * the time we get around to reclaiming it, so to avoid
1962+ * reclaiming overly full block_groups, skip reclaiming them.
1963+ *
1964+ * Since the decision making process also depends on the amount
1965+ * being freed, pass in a fake giant value to skip that extra
1966+ * check, which is more meaningful when adding to the list in
1967+ * the first place.
1968+ */
1969+ if (!should_reclaim_block_group (bg , bg -> length )) {
1970+ spin_unlock (& bg -> lock );
1971+ spin_unlock (& space_info -> lock );
1972+ up_write (& space_info -> groups_sem );
1973+ return 0 ;
1974+ }
1975+
1976+ spin_unlock (& bg -> lock );
1977+ old_total = space_info -> total_bytes ;
1978+ spin_unlock (& space_info -> lock );
1979+
1980+ /*
1981+ * Get out fast, in case we're read-only or unmounting the
1982+ * filesystem. It is OK to drop block groups from the list even
1983+ * for the read-only case. As we did take the super write lock,
1984+ * "mount -o remount,ro" won't happen and read-only filesystem
1985+ * means it is forced read-only due to a fatal error. So, it
1986+ * never gets back to read-write to let us reclaim again.
1987+ */
1988+ if (btrfs_need_cleaner_sleep (fs_info )) {
1989+ up_write (& space_info -> groups_sem );
1990+ return 0 ;
1991+ }
1992+
1993+ ret = inc_block_group_ro (bg , false);
1994+ up_write (& space_info -> groups_sem );
1995+ if (ret < 0 )
1996+ return ret ;
1997+
1998+ /*
1999+ * The amount of bytes reclaimed corresponds to the sum of the
2000+ * "used" and "reserved" counters. We have set the block group
2001+ * to RO above, which prevents reservations from happening but
2002+ * we may have existing reservations for which allocation has
2003+ * not yet been done - btrfs_update_block_group() was not yet
2004+ * called, which is where we will transfer a reserved extent's
2005+ * size from the "reserved" counter to the "used" counter - this
2006+ * happens when running delayed references. When we relocate the
2007+ * chunk below, relocation first flushes delalloc, waits for
2008+ * ordered extent completion (which is where we create delayed
2009+ * references for data extents) and commits the current
2010+ * transaction (which runs delayed references), and only after
2011+ * it does the actual work to move extents out of the block
2012+ * group. So the reported amount of reclaimed bytes is
2013+ * effectively the sum of the 'used' and 'reserved' counters.
2014+ */
2015+ spin_lock (& bg -> lock );
2016+ used = bg -> used ;
2017+ reserved = bg -> reserved ;
2018+ spin_unlock (& bg -> lock );
2019+
2020+ trace_btrfs_reclaim_block_group (bg );
2021+ ret = btrfs_relocate_chunk (fs_info , bg -> start , false);
2022+ if (ret ) {
2023+ btrfs_dec_block_group_ro (bg );
2024+ btrfs_err (fs_info , "error relocating chunk %llu" ,
2025+ bg -> start );
2026+ used = 0 ;
2027+ reserved = 0 ;
2028+ spin_lock (& space_info -> lock );
2029+ space_info -> reclaim_errors ++ ;
2030+ spin_unlock (& space_info -> lock );
2031+ }
2032+ spin_lock (& space_info -> lock );
2033+ space_info -> reclaim_count ++ ;
2034+ space_info -> reclaim_bytes += used ;
2035+ space_info -> reclaim_bytes += reserved ;
2036+ if (space_info -> total_bytes < old_total )
2037+ btrfs_set_periodic_reclaim_ready (space_info , true);
2038+ spin_unlock (& space_info -> lock );
2039+
2040+ return ret ;
2041+ }
2042+
19122043void btrfs_reclaim_bgs_work (struct work_struct * work )
19132044{
19142045 struct btrfs_fs_info * fs_info =
@@ -1942,10 +2073,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
19422073 */
19432074 list_sort (NULL , & fs_info -> reclaim_bgs , reclaim_bgs_cmp );
19442075 while (!list_empty (& fs_info -> reclaim_bgs )) {
1945- u64 used ;
1946- u64 reserved ;
1947- u64 old_total ;
1948- int ret = 0 ;
2076+ int ret ;
19492077
19502078 bg = list_first_entry (& fs_info -> reclaim_bgs ,
19512079 struct btrfs_block_group ,
@@ -1954,126 +2082,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
19542082
19552083 space_info = bg -> space_info ;
19562084 spin_unlock (& fs_info -> unused_bgs_lock );
2085+ ret = btrfs_reclaim_block_group (bg );
19572086
1958- /* Don't race with allocators so take the groups_sem */
1959- down_write (& space_info -> groups_sem );
1960-
1961- spin_lock (& space_info -> lock );
1962- spin_lock (& bg -> lock );
1963- if (bg -> reserved || bg -> pinned || bg -> ro ) {
1964- /*
1965- * We want to bail if we made new allocations or have
1966- * outstanding allocations in this block group. We do
1967- * the ro check in case balance is currently acting on
1968- * this block group.
1969- */
1970- spin_unlock (& bg -> lock );
1971- spin_unlock (& space_info -> lock );
1972- up_write (& space_info -> groups_sem );
1973- goto next ;
1974- }
1975- if (bg -> used == 0 ) {
1976- /*
1977- * It is possible that we trigger relocation on a block
1978- * group as its extents are deleted and it first goes
1979- * below the threshold, then shortly after goes empty.
1980- *
1981- * In this case, relocating it does delete it, but has
1982- * some overhead in relocation specific metadata, looking
1983- * for the non-existent extents and running some extra
1984- * transactions, which we can avoid by using one of the
1985- * other mechanisms for dealing with empty block groups.
1986- */
1987- if (!btrfs_test_opt (fs_info , DISCARD_ASYNC ))
1988- btrfs_mark_bg_unused (bg );
1989- spin_unlock (& bg -> lock );
1990- spin_unlock (& space_info -> lock );
1991- up_write (& space_info -> groups_sem );
1992- goto next ;
1993-
1994- }
1995- /*
1996- * The block group might no longer meet the reclaim condition by
1997- * the time we get around to reclaiming it, so to avoid
1998- * reclaiming overly full block_groups, skip reclaiming them.
1999- *
2000- * Since the decision making process also depends on the amount
2001- * being freed, pass in a fake giant value to skip that extra
2002- * check, which is more meaningful when adding to the list in
2003- * the first place.
2004- */
2005- if (!should_reclaim_block_group (bg , bg -> length )) {
2006- spin_unlock (& bg -> lock );
2007- spin_unlock (& space_info -> lock );
2008- up_write (& space_info -> groups_sem );
2009- goto next ;
2010- }
2011-
2012- spin_unlock (& bg -> lock );
2013- old_total = space_info -> total_bytes ;
2014- spin_unlock (& space_info -> lock );
2015-
2016- /*
2017- * Get out fast, in case we're read-only or unmounting the
2018- * filesystem. It is OK to drop block groups from the list even
2019- * for the read-only case. As we did take the super write lock,
2020- * "mount -o remount,ro" won't happen and read-only filesystem
2021- * means it is forced read-only due to a fatal error. So, it
2022- * never gets back to read-write to let us reclaim again.
2023- */
2024- if (btrfs_need_cleaner_sleep (fs_info )) {
2025- up_write (& space_info -> groups_sem );
2026- goto next ;
2027- }
2028-
2029- ret = inc_block_group_ro (bg , false);
2030- up_write (& space_info -> groups_sem );
2031- if (ret < 0 )
2032- goto next ;
2033-
2034- /*
2035- * The amount of bytes reclaimed corresponds to the sum of the
2036- * "used" and "reserved" counters. We have set the block group
2037- * to RO above, which prevents reservations from happening but
2038- * we may have existing reservations for which allocation has
2039- * not yet been done - btrfs_update_block_group() was not yet
2040- * called, which is where we will transfer a reserved extent's
2041- * size from the "reserved" counter to the "used" counter - this
2042- * happens when running delayed references. When we relocate the
2043- * chunk below, relocation first flushes delalloc, waits for
2044- * ordered extent completion (which is where we create delayed
2045- * references for data extents) and commits the current
2046- * transaction (which runs delayed references), and only after
2047- * it does the actual work to move extents out of the block
2048- * group. So the reported amount of reclaimed bytes is
2049- * effectively the sum of the 'used' and 'reserved' counters.
2050- */
2051- spin_lock (& bg -> lock );
2052- used = bg -> used ;
2053- reserved = bg -> reserved ;
2054- spin_unlock (& bg -> lock );
2055-
2056- trace_btrfs_reclaim_block_group (bg );
2057- ret = btrfs_relocate_chunk (fs_info , bg -> start , false);
2058- if (ret ) {
2059- btrfs_dec_block_group_ro (bg );
2060- btrfs_err (fs_info , "error relocating chunk %llu" ,
2061- bg -> start );
2062- used = 0 ;
2063- reserved = 0 ;
2064- spin_lock (& space_info -> lock );
2065- space_info -> reclaim_errors ++ ;
2066- spin_unlock (& space_info -> lock );
2067- }
2068- spin_lock (& space_info -> lock );
2069- space_info -> reclaim_count ++ ;
2070- space_info -> reclaim_bytes += used ;
2071- space_info -> reclaim_bytes += reserved ;
2072- if (space_info -> total_bytes < old_total )
2073- btrfs_set_periodic_reclaim_ready (space_info , true);
2074- spin_unlock (& space_info -> lock );
2075-
2076- next :
20772087 if (ret && !READ_ONCE (space_info -> periodic_reclaim ))
20782088 btrfs_link_bg_list (bg , & retry_list );
20792089 btrfs_put_block_group (bg );
0 commit comments