Skip to content

Commit 213f7f2

Browse files
Panky-codeskawasaki
authored andcommitted
mm: add static PMD zero page
There are many places in the kernel where we need to zeroout larger chunks but the maximum segment we can zeroout at a time by ZERO_PAGE is limited by PAGE_SIZE. This is especially annoying in block devices and filesystems where we attach multiple ZERO_PAGEs to the bio in different bvecs. With multipage bvec support in block layer, it is much more efficient to send out larger zero pages as a part of single bvec. This concern was raised during the review of adding LBS support to XFS[1][2]. Usually huge_zero_folio is allocated on demand, and it will be deallocated by the shrinker if there are no users of it left. At moment, huge_zero_folio infrastructure refcount is tied to the process lifetime that created it. This might not work for bio layer as the completitions can be async and the process that created the huge_zero_folio might no longer be alive. Add a config option STATIC_PMD_ZERO_PAGE that will always allocate the huge_zero_folio, and it will never be freed. This makes using the huge_zero_folio without having to pass any mm struct and does not tie the lifetime of the zero folio to anything. memblock is used to allocated this PMD zero page during early boot. If STATIC_PMD_ZERO_PAGE config option is enabled, then mm_get_huge_zero_folio() will simply return this page instead of dynamically allocating a new PMD page. As STATIC_PMD_ZERO_PAGE does not depend on THP, declare huge_zero_folio and huge_zero_pfn outside the THP config. [1] https://lore.kernel.org/linux-xfs/[email protected]/ [2] https://lore.kernel.org/linux-xfs/[email protected]/ Suggested-by: David Hildenbrand <[email protected]> Signed-off-by: Pankaj Raghav <[email protected]>
1 parent ee20a41 commit 213f7f2

5 files changed

Lines changed: 79 additions & 5 deletions

File tree

include/linux/mm.h

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4018,10 +4018,19 @@ static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
40184018

40194019
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
40204020

4021-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
4021+
#ifdef CONFIG_STATIC_PMD_ZERO_PAGE
4022+
extern void __init static_pmd_zero_init(void);
4023+
#else
4024+
static inline void __init static_pmd_zero_init(void)
4025+
{
4026+
return;
4027+
}
4028+
#endif
4029+
40224030
extern struct folio *huge_zero_folio;
40234031
extern unsigned long huge_zero_pfn;
40244032

4033+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
40254034
static inline bool is_huge_zero_folio(const struct folio *folio)
40264035
{
40274036
return READ_ONCE(huge_zero_folio) == folio;
@@ -4032,9 +4041,23 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
40324041
return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd);
40334042
}
40344043

4044+
#ifdef CONFIG_STATIC_PMD_ZERO_PAGE
4045+
static inline struct folio *mm_get_huge_zero_folio(struct mm_struct *mm)
4046+
{
4047+
return READ_ONCE(huge_zero_folio);
4048+
}
4049+
4050+
static inline void mm_put_huge_zero_folio(struct mm_struct *mm)
4051+
{
4052+
return;
4053+
}
4054+
4055+
#else
40354056
struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
40364057
void mm_put_huge_zero_folio(struct mm_struct *mm);
40374058

4059+
#endif /* CONFIG_STATIC_PMD_ZERO_PAGE */
4060+
40384061
#else
40394062
static inline bool is_huge_zero_folio(const struct folio *folio)
40404063
{

mm/Kconfig

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,15 @@ config ARCH_WANTS_THP_SWAP
826826
config MM_ID
827827
def_bool n
828828

829+
config STATIC_PMD_ZERO_PAGE
830+
bool "Allocate a PMD page for zeroing"
831+
help
832+
Typically huge_zero_folio, which is a PMD page of zeroes, is allocated
833+
on demand and deallocated when not in use. This option will
834+
allocate a PMD sized zero page during early boot and huge_zero_folio will
835+
use it instead allocating dynamically.
836+
Not suitable for memory constrained systems.
837+
829838
menuconfig TRANSPARENT_HUGEPAGE
830839
bool "Transparent Hugepage Support"
831840
depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT

mm/huge_memory.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
7575
struct shrink_control *sc);
7676
static bool split_underused_thp = true;
7777

78-
static atomic_t huge_zero_refcount;
79-
struct folio *huge_zero_folio __read_mostly;
80-
unsigned long huge_zero_pfn __read_mostly = ~0UL;
8178
unsigned long huge_anon_orders_always __read_mostly;
8279
unsigned long huge_anon_orders_madvise __read_mostly;
8380
unsigned long huge_anon_orders_inherit __read_mostly;
@@ -208,6 +205,23 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
208205
return orders;
209206
}
210207

208+
#ifdef CONFIG_STATIC_PMD_ZERO_PAGE
209+
static int huge_zero_page_shrinker_init(void)
210+
{
211+
return 0;
212+
}
213+
214+
static void huge_zero_page_shrinker_exit(void)
215+
{
216+
return;
217+
}
218+
#else
219+
220+
static struct shrinker *huge_zero_page_shrinker;
221+
static atomic_t huge_zero_refcount;
222+
struct folio *huge_zero_folio __read_mostly;
223+
unsigned long huge_zero_pfn __read_mostly = ~0UL;
224+
211225
static bool get_huge_zero_page(void)
212226
{
213227
struct folio *zero_folio;
@@ -288,7 +302,6 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
288302
return 0;
289303
}
290304

291-
static struct shrinker *huge_zero_page_shrinker;
292305
static int huge_zero_page_shrinker_init(void)
293306
{
294307
huge_zero_page_shrinker = shrinker_alloc(0, "thp-zero");
@@ -307,6 +320,7 @@ static void huge_zero_page_shrinker_exit(void)
307320
return;
308321
}
309322

323+
#endif
310324

311325
#ifdef CONFIG_SYSFS
312326
static ssize_t enabled_show(struct kobject *kobj,
@@ -2843,6 +2857,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
28432857
pte_t *pte;
28442858
int i;
28452859

2860+
// FIXME: can this be called with static zero page?
2861+
VM_BUG_ON(IS_ENABLED(CONFIG_STATIC_PMD_ZERO_PAGE));
28462862
/*
28472863
* Leave pmd empty until pte is filled note that it is fine to delay
28482864
* notification until mmu_notifier_invalidate_range_end() as we are

mm/memory.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <linux/kernel_stat.h>
4343
#include <linux/mm.h>
4444
#include <linux/mm_inline.h>
45+
#include <linux/memblock.h>
4546
#include <linux/sched/mm.h>
4647
#include <linux/sched/numa_balancing.h>
4748
#include <linux/sched/task.h>
@@ -159,6 +160,30 @@ static int __init init_zero_pfn(void)
159160
}
160161
early_initcall(init_zero_pfn);
161162

163+
#ifdef CONFIG_STATIC_PMD_ZERO_PAGE
164+
struct folio *huge_zero_folio __read_mostly = NULL;
165+
unsigned long huge_zero_pfn __read_mostly = ~0UL;
166+
167+
void __init static_pmd_zero_init(void)
168+
{
169+
void *alloc = memblock_alloc(PMD_SIZE, PAGE_SIZE);
170+
171+
if (!alloc)
172+
return;
173+
174+
huge_zero_folio = virt_to_folio(alloc);
175+
huge_zero_pfn = page_to_pfn(virt_to_page(alloc));
176+
177+
__folio_set_head(huge_zero_folio);
178+
prep_compound_head((struct page *)huge_zero_folio, PMD_ORDER);
179+
/* Ensure zero folio won't have large_rmappable flag set. */
180+
folio_clear_large_rmappable(huge_zero_folio);
181+
folio_zero_range(huge_zero_folio, 0, PMD_SIZE);
182+
183+
return;
184+
}
185+
#endif
186+
162187
void mm_trace_rss_stat(struct mm_struct *mm, int member)
163188
{
164189
trace_rss_stat(mm, member);

mm/mm_init.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2765,6 +2765,7 @@ void __init mm_core_init(void)
27652765
*/
27662766
kho_memory_init();
27672767

2768+
static_pmd_zero_init();
27682769
memblock_free_all();
27692770
mem_init();
27702771
kmem_cache_init();

0 commit comments

Comments
 (0)