Skip to content

Commit b3039c5

Browse files
hansendcgregkh
authored andcommitted
mm: introduce deferred freeing for kernel page tables
commit 5ba2f0a upstream. This introduces a conditional asynchronous mechanism, enabled by CONFIG_ASYNC_KERNEL_PGTABLE_FREE. When enabled, this mechanism defers the freeing of pages that are used as page tables for kernel address mappings. These pages are now queued to a work struct instead of being freed immediately. This deferred freeing allows for batch-freeing of page tables, providing a safe context for performing a single expensive operation (TLB flush) for a batch of kernel page tables instead of performing that expensive operation for each page table. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Dave Hansen <[email protected]> Signed-off-by: Lu Baolu <[email protected]> Reviewed-by: Jason Gunthorpe <[email protected]> Reviewed-by: Kevin Tian <[email protected]> Acked-by: David Hildenbrand <[email protected]> Acked-by: Mike Rapoport (Microsoft) <[email protected]> Cc: Alistair Popple <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Borislav Betkov <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jann Horn <[email protected]> Cc: Jean-Philippe Brucker <[email protected]> Cc: Joerg Roedel <[email protected]> Cc: Liam Howlett <[email protected]> Cc: Lorenzo Stoakes <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Robin Murohy <[email protected]> Cc: Thomas Gleinxer <[email protected]> Cc: "Uladzislau Rezki (Sony)" <[email protected]> Cc: Vasant Hegde <[email protected]> Cc: Vinicius Costa Gomes <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: Will Deacon <[email protected]> Cc: Yi Lai <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent a1593c9 commit b3039c5

3 files changed

Lines changed: 53 additions & 3 deletions

File tree

include/linux/mm.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3038,6 +3038,14 @@ static inline void __pagetable_free(struct ptdesc *pt)
30383038
__free_pages(page, compound_order(page));
30393039
}
30403040

3041+
#ifdef CONFIG_ASYNC_KERNEL_PGTABLE_FREE
3042+
void pagetable_free_kernel(struct ptdesc *pt);
3043+
#else
3044+
static inline void pagetable_free_kernel(struct ptdesc *pt)
3045+
{
3046+
__pagetable_free(pt);
3047+
}
3048+
#endif
30413049
/**
30423050
* pagetable_free - Free pagetables
30433051
* @pt: The page table descriptor
@@ -3047,10 +3055,12 @@ static inline void __pagetable_free(struct ptdesc *pt)
30473055
*/
30483056
static inline void pagetable_free(struct ptdesc *pt)
30493057
{
3050-
if (ptdesc_test_kernel(pt))
3058+
if (ptdesc_test_kernel(pt)) {
30513059
ptdesc_clear_kernel(pt);
3052-
3053-
__pagetable_free(pt);
3060+
pagetable_free_kernel(pt);
3061+
} else {
3062+
__pagetable_free(pt);
3063+
}
30543064
}
30553065

30563066
#if defined(CONFIG_SPLIT_PTE_PTLOCKS)

mm/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,9 @@ config HAVE_GIGANTIC_FOLIOS
915915
def_bool (HUGETLB_PAGE && ARCH_HAS_GIGANTIC_PAGE) || \
916916
(ZONE_DEVICE && HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
917917

918+
config ASYNC_KERNEL_PGTABLE_FREE
919+
def_bool n
920+
918921
# TODO: Allow to be enabled without THP
919922
config ARCH_SUPPORTS_HUGE_PFNMAP
920923
def_bool n

mm/pgtable-generic.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,3 +406,40 @@ pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
406406
pte_unmap_unlock(pte, ptl);
407407
goto again;
408408
}
409+
410+
#ifdef CONFIG_ASYNC_KERNEL_PGTABLE_FREE
411+
static void kernel_pgtable_work_func(struct work_struct *work);
412+
413+
static struct {
414+
struct list_head list;
415+
/* protect above ptdesc lists */
416+
spinlock_t lock;
417+
struct work_struct work;
418+
} kernel_pgtable_work = {
419+
.list = LIST_HEAD_INIT(kernel_pgtable_work.list),
420+
.lock = __SPIN_LOCK_UNLOCKED(kernel_pgtable_work.lock),
421+
.work = __WORK_INITIALIZER(kernel_pgtable_work.work, kernel_pgtable_work_func),
422+
};
423+
424+
static void kernel_pgtable_work_func(struct work_struct *work)
425+
{
426+
struct ptdesc *pt, *next;
427+
LIST_HEAD(page_list);
428+
429+
spin_lock(&kernel_pgtable_work.lock);
430+
list_splice_tail_init(&kernel_pgtable_work.list, &page_list);
431+
spin_unlock(&kernel_pgtable_work.lock);
432+
433+
list_for_each_entry_safe(pt, next, &page_list, pt_list)
434+
__pagetable_free(pt);
435+
}
436+
437+
void pagetable_free_kernel(struct ptdesc *pt)
438+
{
439+
spin_lock(&kernel_pgtable_work.lock);
440+
list_add(&pt->pt_list, &kernel_pgtable_work.list);
441+
spin_unlock(&kernel_pgtable_work.lock);
442+
443+
schedule_work(&kernel_pgtable_work.work);
444+
}
445+
#endif

0 commit comments

Comments
 (0)