Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ config X86
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64
select ARCH_WANT_HUGETLB_VMEMMAP_PREINIT if X86_64
select ARCH_WANTS_THP_SWAP if X86_64
select ARCH_WANTS_STATIC_HUGE_ZERO_FOLIO if X86_64
select ARCH_HAS_PARANOID_L1D_FLUSH
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select BUILDTIME_TABLE_SORT
Expand Down
15 changes: 8 additions & 7 deletions block/blk-lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ static void __blkdev_issue_zero_pages(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
struct bio **biop, unsigned int flags)
{
struct folio *zero_folio = largest_zero_folio();

while (nr_sects) {
unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects);
struct bio *bio;
Expand All @@ -208,15 +210,14 @@ static void __blkdev_issue_zero_pages(struct block_device *bdev,
break;

do {
unsigned int len, added;
unsigned int len;

len = min_t(sector_t,
PAGE_SIZE, nr_sects << SECTOR_SHIFT);
added = bio_add_page(bio, ZERO_PAGE(0), len, 0);
if (added < len)
len = min_t(sector_t, folio_size(zero_folio),
nr_sects << SECTOR_SHIFT);
if (!bio_add_folio(bio, zero_folio, len, 0))
break;
nr_sects -= added >> SECTOR_SHIFT;
sector += added >> SECTOR_SHIFT;
nr_sects -= len >> SECTOR_SHIFT;
sector += len >> SECTOR_SHIFT;
} while (nr_sects);

*biop = bio_chain_and_submit(*biop, bio);
Expand Down
35 changes: 35 additions & 0 deletions include/linux/huge_mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);

extern struct folio *huge_zero_folio;
extern unsigned long huge_zero_pfn;
extern atomic_t huge_zero_folio_is_static;

static inline bool is_huge_zero_folio(const struct folio *folio)
{
Expand All @@ -494,6 +495,18 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)

struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
void mm_put_huge_zero_folio(struct mm_struct *mm);
struct folio *__get_static_huge_zero_folio(void);

static inline struct folio *get_static_huge_zero_folio(void)
{
if (!IS_ENABLED(CONFIG_STATIC_HUGE_ZERO_FOLIO))
return NULL;

if (likely(atomic_read(&huge_zero_folio_is_static)))
return huge_zero_folio;

return __get_static_huge_zero_folio();
}

static inline bool thp_migration_supported(void)
{
Expand Down Expand Up @@ -685,6 +698,11 @@ static inline int change_huge_pud(struct mmu_gather *tlb,
{
return 0;
}

static inline struct folio *get_static_huge_zero_folio(void)
{
return NULL;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

static inline int split_folio_to_list_to_order(struct folio *folio,
Expand All @@ -698,4 +716,21 @@ static inline int split_folio_to_order(struct folio *folio, int new_order)
return split_folio_to_list_to_order(folio, NULL, new_order);
}

/*
* largest_zero_folio - Get the largest zero size folio available
*
* This function will return huge_zero_folio if CONFIG_STATIC_HUGE_ZERO_FOLIO
* is enabled. Otherwise, a ZERO_PAGE folio is returned.
*
* Deduce the size of the folio with folio_size instead of assuming the
* folio size.
*/
static inline struct folio *largest_zero_folio(void)
{
struct folio *folio = get_static_huge_zero_folio();

if (folio)
return folio;
return page_folio(ZERO_PAGE(0));
}
#endif /* _LINUX_HUGE_MM_H */
2 changes: 1 addition & 1 deletion include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1758,7 +1758,7 @@ enum {
#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */
#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */
#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
#define MMF_HUGE_ZERO_FOLIO 23 /* mm has ever used the global huge zero folio */
#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
#define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */
Expand Down
21 changes: 21 additions & 0 deletions mm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,27 @@ config ARCH_WANT_GENERAL_HUGETLB
config ARCH_WANTS_THP_SWAP
def_bool n

config ARCH_WANTS_STATIC_HUGE_ZERO_FOLIO
def_bool n

config STATIC_HUGE_ZERO_FOLIO
bool "Allocate a PMD sized folio for zeroing"
depends on ARCH_WANTS_STATIC_HUGE_ZERO_FOLIO && TRANSPARENT_HUGEPAGE
help
Without this config enabled, the huge zero folio is allocated on
demand and freed under memory pressure once no longer in use.
To detect remaining users reliably, references to the huge zero folio
must be tracked precisely, so it is commonly only available for mapping
it into user page tables.

With this config enabled, the huge zero folio can also be used
for other purposes that do not implement precise reference counting:
it is still allocated on demand, but never freed, allowing for more
wide-spread use, for example, when performing I/O similar to the
traditional shared zeropage.

Not suitable for memory constrained systems.

config MM_ID
def_bool n

Expand Down
86 changes: 65 additions & 21 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
static bool split_underused_thp = true;

static atomic_t huge_zero_refcount;
atomic_t huge_zero_folio_is_static __read_mostly;
struct folio *huge_zero_folio __read_mostly;
unsigned long huge_zero_pfn __read_mostly = ~0UL;
unsigned long huge_anon_orders_always __read_mostly;
Expand Down Expand Up @@ -207,7 +208,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
return orders;
}

static bool get_huge_zero_page(void)
static bool get_huge_zero_folio(void)
{
struct folio *zero_folio;
retry:
Expand Down Expand Up @@ -237,7 +238,7 @@ static bool get_huge_zero_page(void)
return true;
}

static void put_huge_zero_page(void)
static void put_huge_zero_folio(void)
{
/*
* Counter should never go to zero here. Only shrinker can put
Expand All @@ -248,36 +249,79 @@ static void put_huge_zero_page(void)

struct folio *mm_get_huge_zero_folio(struct mm_struct *mm)
{
if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
if (test_bit(MMF_HUGE_ZERO_FOLIO, &mm->flags))
return READ_ONCE(huge_zero_folio);

if (!get_huge_zero_page())
if (!get_huge_zero_folio())
return NULL;

if (test_and_set_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
put_huge_zero_page();
if (test_and_set_bit(MMF_HUGE_ZERO_FOLIO, &mm->flags))
put_huge_zero_folio();

return READ_ONCE(huge_zero_folio);
}

void mm_put_huge_zero_folio(struct mm_struct *mm)
{
if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
put_huge_zero_page();
if (test_bit(MMF_HUGE_ZERO_FOLIO, &mm->flags))
put_huge_zero_folio();
}

static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink,
struct shrink_control *sc)
#ifdef CONFIG_STATIC_HUGE_ZERO_FOLIO

struct folio *__get_static_huge_zero_folio(void)
{
static unsigned long fail_count_clear_timer;
static atomic_t huge_zero_static_fail_count __read_mostly;

if (unlikely(!slab_is_available()))
return NULL;

/*
* If we failed to allocate a huge zero folio, just refrain from
* trying for one minute before retrying to get a reference again.
*/
if (atomic_read(&huge_zero_static_fail_count) > 1) {
if (time_before(jiffies, fail_count_clear_timer))
return NULL;
atomic_set(&huge_zero_static_fail_count, 0);
}
/*
* Our raised reference will prevent the shrinker from ever having
* success.
*/
if (!get_huge_zero_folio()) {
int count = atomic_inc_return(&huge_zero_static_fail_count);

if (count > 1)
fail_count_clear_timer = get_jiffies_64() + 60 * HZ;

return NULL;
}

if (atomic_cmpxchg(&huge_zero_folio_is_static, 0, 1) != 0)
put_huge_zero_folio();

return huge_zero_folio;
}
#endif /* CONFIG_STATIC_HUGE_ZERO_FOLIO */

static unsigned long shrink_huge_zero_folio_count(struct shrinker *shrink,
struct shrink_control *sc)
{
/* we can free zero page only if last reference remains */
return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;
}

static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
struct shrink_control *sc)
static unsigned long shrink_huge_zero_folio_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
struct folio *zero_folio = xchg(&huge_zero_folio, NULL);
struct folio *zero_folio;

if (WARN_ON_ONCE(atomic_read(&huge_zero_folio_is_static)))
return 0;
zero_folio = xchg(&huge_zero_folio, NULL);
BUG_ON(zero_folio == NULL);
WRITE_ONCE(huge_zero_pfn, ~0UL);
folio_put(zero_folio);
Expand All @@ -287,7 +331,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
return 0;
}

static struct shrinker *huge_zero_page_shrinker;
static struct shrinker *huge_zero_folio_shrinker;

#ifdef CONFIG_SYSFS
static ssize_t enabled_show(struct kobject *kobj,
Expand Down Expand Up @@ -849,22 +893,22 @@ static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj)

static int __init thp_shrinker_init(void)
{
huge_zero_page_shrinker = shrinker_alloc(0, "thp-zero");
if (!huge_zero_page_shrinker)
huge_zero_folio_shrinker = shrinker_alloc(0, "thp-zero");
if (!huge_zero_folio_shrinker)
return -ENOMEM;

deferred_split_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE |
SHRINKER_MEMCG_AWARE |
SHRINKER_NONSLAB,
"thp-deferred_split");
if (!deferred_split_shrinker) {
shrinker_free(huge_zero_page_shrinker);
shrinker_free(huge_zero_folio_shrinker);
return -ENOMEM;
}

huge_zero_page_shrinker->count_objects = shrink_huge_zero_page_count;
huge_zero_page_shrinker->scan_objects = shrink_huge_zero_page_scan;
shrinker_register(huge_zero_page_shrinker);
huge_zero_folio_shrinker->count_objects = shrink_huge_zero_folio_count;
huge_zero_folio_shrinker->scan_objects = shrink_huge_zero_folio_scan;
shrinker_register(huge_zero_folio_shrinker);

deferred_split_shrinker->count_objects = deferred_split_count;
deferred_split_shrinker->scan_objects = deferred_split_scan;
Expand All @@ -875,7 +919,7 @@ static int __init thp_shrinker_init(void)

static void __init thp_shrinker_exit(void)
{
shrinker_free(huge_zero_page_shrinker);
shrinker_free(huge_zero_folio_shrinker);
shrinker_free(deferred_split_shrinker);
}

Expand Down
Loading