Skip to content

Commit 48478b9

Browse files
Anshuman Khandualctmarinas
authored andcommitted
arm64/mm: Enable batched TLB flush in unmap_hotplug_range()
During a memory hot remove operation, both linear and vmemmap mappings for the memory range being removed, get unmapped via unmap_hotplug_range() but mapped pages get freed only for vmemmap mapping. This is just a sequential operation where each table entry gets cleared, followed by a leaf specific TLB flush, and then followed by memory free operation when applicable. This approach was simple and uniform both for vmemmap and linear mappings. But linear mapping might contain CONT marked block memory where it becomes necessary to first clear out all entire in the range before a TLB flush. This is as per the architecture requirement. Hence batch all TLB flushes during the table tear down walk and finally do it in unmap_hotplug_range(). Prior to this fix, it was hypothetically possible for a speculative access to a higher address in the contiguous block to fill the TLB with shattered entries for the entire contiguous range after a lower address had already been cleared and invalidated. Due to the table entries being shattered, the subsequent TLB invalidation for the higher address would not then clear the TLB entries for the lower address, meaning stale TLB entries could persist. Besides it also helps in improving the performance via TLBI range operation along with reduced synchronization instructions. The time spent executing unmap_hotplug_range() improved 97% measured over a 2GB memory hot removal in KVM guest. This scheme is not applicable during vmemmap mapping tear down where memory needs to be freed and hence a TLB flush is required after clearing out page table entry. Cc: Will Deacon <[email protected]> Cc: [email protected] Cc: [email protected] Closes: https://lore.kernel.org/all/aWZYXhrT6D2M-7-N@willie-the-truck/ Fixes: bbd6ec6 ("arm64/mm: Enable memory hot remove") Cc: [email protected] Reviewed-by: David Hildenbrand (Arm) <[email protected]> Reviewed-by: Ryan Roberts <[email protected]> Signed-off-by: Ryan Roberts <[email protected]> Signed-off-by: Anshuman Khandual <[email protected]> Signed-off-by: Catalin Marinas <[email protected]>
1 parent 1f318b9 commit 48478b9

1 file changed

Lines changed: 20 additions & 16 deletions

File tree

arch/arm64/mm/mmu.c

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,10 +1458,14 @@ static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
14581458

14591459
WARN_ON(!pte_present(pte));
14601460
__pte_clear(&init_mm, addr, ptep);
1461-
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
1462-
if (free_mapped)
1461+
if (free_mapped) {
1462+
/* CONT blocks are not supported in the vmemmap */
1463+
WARN_ON(pte_cont(pte));
1464+
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
14631465
free_hotplug_page_range(pte_page(pte),
14641466
PAGE_SIZE, altmap);
1467+
}
1468+
/* unmap_hotplug_range() flushes TLB for !free_mapped */
14651469
} while (addr += PAGE_SIZE, addr < end);
14661470
}
14671471

@@ -1482,15 +1486,14 @@ static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
14821486
WARN_ON(!pmd_present(pmd));
14831487
if (pmd_sect(pmd)) {
14841488
pmd_clear(pmdp);
1485-
1486-
/*
1487-
* One TLBI should be sufficient here as the PMD_SIZE
1488-
* range is mapped with a single block entry.
1489-
*/
1490-
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
1491-
if (free_mapped)
1489+
if (free_mapped) {
1490+
/* CONT blocks are not supported in the vmemmap */
1491+
WARN_ON(pmd_cont(pmd));
1492+
flush_tlb_kernel_range(addr, addr + PMD_SIZE);
14921493
free_hotplug_page_range(pmd_page(pmd),
14931494
PMD_SIZE, altmap);
1495+
}
1496+
/* unmap_hotplug_range() flushes TLB for !free_mapped */
14941497
continue;
14951498
}
14961499
WARN_ON(!pmd_table(pmd));
@@ -1515,15 +1518,12 @@ static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr,
15151518
WARN_ON(!pud_present(pud));
15161519
if (pud_sect(pud)) {
15171520
pud_clear(pudp);
1518-
1519-
/*
1520-
* One TLBI should be sufficient here as the PUD_SIZE
1521-
* range is mapped with a single block entry.
1522-
*/
1523-
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
1524-
if (free_mapped)
1521+
if (free_mapped) {
1522+
flush_tlb_kernel_range(addr, addr + PUD_SIZE);
15251523
free_hotplug_page_range(pud_page(pud),
15261524
PUD_SIZE, altmap);
1525+
}
1526+
/* unmap_hotplug_range() flushes TLB for !free_mapped */
15271527
continue;
15281528
}
15291529
WARN_ON(!pud_table(pud));
@@ -1553,6 +1553,7 @@ static void unmap_hotplug_p4d_range(pgd_t *pgdp, unsigned long addr,
15531553
static void unmap_hotplug_range(unsigned long addr, unsigned long end,
15541554
bool free_mapped, struct vmem_altmap *altmap)
15551555
{
1556+
unsigned long start = addr;
15561557
unsigned long next;
15571558
pgd_t *pgdp, pgd;
15581559

@@ -1574,6 +1575,9 @@ static void unmap_hotplug_range(unsigned long addr, unsigned long end,
15741575
WARN_ON(!pgd_present(pgd));
15751576
unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped, altmap);
15761577
} while (addr = next, addr < end);
1578+
1579+
if (!free_mapped)
1580+
flush_tlb_kernel_range(start, end);
15771581
}
15781582

15791583
static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,

0 commit comments

Comments
 (0)