Skip to content

Commit 4af54a5

Browse files
Panky-codeskawasaki
authored andcommitted
mm: add static PMD zero page
There are many places in the kernel where we need to zeroout larger chunks but the maximum segment we can zeroout at a time by ZERO_PAGE is limited by PAGE_SIZE. This is especially annoying in block devices and filesystems where we attach multiple ZERO_PAGEs to the bio in different bvecs. With multipage bvec support in block layer, it is much more efficient to send out larger zero pages as a part of single bvec. This concern was raised during the review of adding LBS support to XFS[1][2]. Usually huge_zero_folio is allocated on demand, and it will be deallocated by the shrinker if there are no users of it left. At moment, huge_zero_folio infrastructure refcount is tied to the process lifetime that created it. This might not work for bio layer as the completitions can be async and the process that created the huge_zero_folio might no longer be alive. Add a config option STATIC_PMD_ZERO_PAGE that will always allocate the huge_zero_folio, and it will never be freed. This makes using the huge_zero_folio without having to pass any mm struct and does not tie the lifetime of the zero folio to anything. memblock is used to allocated this PMD zero page during early boot. If STATIC_PMD_ZERO_PAGE config option is enabled, then mm_get_huge_zero_folio() will simply return this page instead of dynamically allocating a new PMD page. As STATIC_PMD_ZERO_PAGE does not depend on THP, declare huge_zero_folio and huge_zero_pfn outside the THP config. [1] https://lore.kernel.org/linux-xfs/[email protected]/ [2] https://lore.kernel.org/linux-xfs/[email protected]/ Suggested-by: David Hildenbrand <[email protected]> Signed-off-by: Pankaj Raghav <[email protected]>
1 parent 375936e commit 4af54a5

5 files changed

Lines changed: 79 additions & 5 deletions

File tree

include/linux/mm.h

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4024,10 +4024,19 @@ static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
40244024

40254025
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
40264026

4027-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
4027+
#ifdef CONFIG_STATIC_PMD_ZERO_PAGE
4028+
extern void __init static_pmd_zero_init(void);
4029+
#else
4030+
static inline void __init static_pmd_zero_init(void)
4031+
{
4032+
return;
4033+
}
4034+
#endif
4035+
40284036
extern struct folio *huge_zero_folio;
40294037
extern unsigned long huge_zero_pfn;
40304038

4039+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
40314040
static inline bool is_huge_zero_folio(const struct folio *folio)
40324041
{
40334042
return READ_ONCE(huge_zero_folio) == folio;
@@ -4038,9 +4047,23 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
40384047
return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd);
40394048
}
40404049

4050+
#ifdef CONFIG_STATIC_PMD_ZERO_PAGE
4051+
static inline struct folio *mm_get_huge_zero_folio(struct mm_struct *mm)
4052+
{
4053+
return READ_ONCE(huge_zero_folio);
4054+
}
4055+
4056+
static inline void mm_put_huge_zero_folio(struct mm_struct *mm)
4057+
{
4058+
return;
4059+
}
4060+
4061+
#else
40414062
struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
40424063
void mm_put_huge_zero_folio(struct mm_struct *mm);
40434064

4065+
#endif /* CONFIG_STATIC_PMD_ZERO_PAGE */
4066+
40444067
#else
40454068
static inline bool is_huge_zero_folio(const struct folio *folio)
40464069
{

mm/Kconfig

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,15 @@ config ARCH_WANTS_THP_SWAP
826826
config MM_ID
827827
def_bool n
828828

829+
config STATIC_PMD_ZERO_PAGE
830+
bool "Allocate a PMD page for zeroing"
831+
help
832+
Typically huge_zero_folio, which is a PMD page of zeroes, is allocated
833+
on demand and deallocated when not in use. This option will
834+
allocate a PMD sized zero page during early boot and huge_zero_folio will
835+
use it instead allocating dynamically.
836+
Not suitable for memory constrained systems.
837+
829838
menuconfig TRANSPARENT_HUGEPAGE
830839
bool "Transparent Hugepage Support"
831840
depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT

mm/huge_memory.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
7575
struct shrink_control *sc);
7676
static bool split_underused_thp = true;
7777

78-
static atomic_t huge_zero_refcount;
79-
struct folio *huge_zero_folio __read_mostly;
80-
unsigned long huge_zero_pfn __read_mostly = ~0UL;
8178
unsigned long huge_anon_orders_always __read_mostly;
8279
unsigned long huge_anon_orders_madvise __read_mostly;
8380
unsigned long huge_anon_orders_inherit __read_mostly;
@@ -208,6 +205,23 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
208205
return orders;
209206
}
210207

208+
#ifdef CONFIG_STATIC_PMD_ZERO_PAGE
209+
static int huge_zero_page_shrinker_init(void)
210+
{
211+
return 0;
212+
}
213+
214+
static void huge_zero_page_shrinker_exit(void)
215+
{
216+
return;
217+
}
218+
#else
219+
220+
static struct shrinker *huge_zero_page_shrinker;
221+
static atomic_t huge_zero_refcount;
222+
struct folio *huge_zero_folio __read_mostly;
223+
unsigned long huge_zero_pfn __read_mostly = ~0UL;
224+
211225
static bool get_huge_zero_page(void)
212226
{
213227
struct folio *zero_folio;
@@ -288,7 +302,6 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
288302
return 0;
289303
}
290304

291-
static struct shrinker *huge_zero_page_shrinker;
292305
static int huge_zero_page_shrinker_init(void)
293306
{
294307
huge_zero_page_shrinker = shrinker_alloc(0, "thp-zero");
@@ -307,6 +320,7 @@ static void huge_zero_page_shrinker_exit(void)
307320
return;
308321
}
309322

323+
#endif
310324

311325
#ifdef CONFIG_SYSFS
312326
static ssize_t enabled_show(struct kobject *kobj,
@@ -2843,6 +2857,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
28432857
pte_t *pte;
28442858
int i;
28452859

2860+
// FIXME: can this be called with static zero page?
2861+
VM_BUG_ON(IS_ENABLED(CONFIG_STATIC_PMD_ZERO_PAGE));
28462862
/*
28472863
* Leave pmd empty until pte is filled note that it is fine to delay
28482864
* notification until mmu_notifier_invalidate_range_end() as we are

mm/memory.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <linux/kernel_stat.h>
4343
#include <linux/mm.h>
4444
#include <linux/mm_inline.h>
45+
#include <linux/memblock.h>
4546
#include <linux/sched/mm.h>
4647
#include <linux/sched/numa_balancing.h>
4748
#include <linux/sched/task.h>
@@ -177,6 +178,30 @@ static int __init init_zero_pfn(void)
177178
}
178179
early_initcall(init_zero_pfn);
179180

181+
#ifdef CONFIG_STATIC_PMD_ZERO_PAGE
182+
struct folio *huge_zero_folio __read_mostly = NULL;
183+
unsigned long huge_zero_pfn __read_mostly = ~0UL;
184+
185+
void __init static_pmd_zero_init(void)
186+
{
187+
void *alloc = memblock_alloc(PMD_SIZE, PAGE_SIZE);
188+
189+
if (!alloc)
190+
return;
191+
192+
huge_zero_folio = virt_to_folio(alloc);
193+
huge_zero_pfn = page_to_pfn(virt_to_page(alloc));
194+
195+
__folio_set_head(huge_zero_folio);
196+
prep_compound_head((struct page *)huge_zero_folio, PMD_ORDER);
197+
/* Ensure zero folio won't have large_rmappable flag set. */
198+
folio_clear_large_rmappable(huge_zero_folio);
199+
folio_zero_range(huge_zero_folio, 0, PMD_SIZE);
200+
201+
return;
202+
}
203+
#endif
204+
180205
void mm_trace_rss_stat(struct mm_struct *mm, int member)
181206
{
182207
trace_rss_stat(mm, member);

mm/mm_init.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2765,6 +2765,7 @@ void __init mm_core_init(void)
27652765
*/
27662766
kho_memory_init();
27672767

2768+
static_pmd_zero_init();
27682769
memblock_free_all();
27692770
mem_init();
27702771
kmem_cache_init();

0 commit comments

Comments
 (0)