Skip to content

Commit 95a5885

Browse files
Anshuman Khandualctmarinas
authored andcommitted
arm64/mm: Reject memory removal that splits a kernel leaf mapping
Linear and vmemmap mappings that get torn down during a memory hot remove operation might contain leaf level entries on any page table level. If the requested memory range's linear or vmemmap mappings falls within such leaf entries, new mappings need to be created for the remaining memory mapped on the leaf entry earlier, following standard break before make aka BBM rules. But kernel cannot tolerate BBM and hence remapping to fine grained leaves would not be possible on systems without BBML2_NOABORT. Currently memory hot remove operation does not perform such restructuring, and so removing memory ranges that could split a kernel leaf level mapping need to be rejected. While memory_hotplug.c does appear to permit hot removing arbitrary ranges of memory, the higher layers that drive memory_hotplug (e.g. ACPI, virtio, ...) all appear to treat memory as fixed size devices. So it is impossible to hot unplug a different amount than was previously hot plugged, and hence we should never see a rejection in practice, but adding the check makes us robust against a future change. Cc: Will Deacon <[email protected]> Cc: [email protected] Cc: [email protected] Link: https://lore.kernel.org/all/aWZYXhrT6D2M-7-N@willie-the-truck/ Reviewed-by: David Hildenbrand (Arm) <[email protected]> Reviewed-by: Ryan Roberts <[email protected]> Suggested-by: Ryan Roberts <[email protected]> Signed-off-by: Anshuman Khandual <[email protected]> Signed-off-by: Catalin Marinas <[email protected]>
1 parent 48478b9 commit 95a5885

1 file changed

Lines changed: 114 additions & 6 deletions

File tree

arch/arm64/mm/mmu.c

Lines changed: 114 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2014,6 +2014,107 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
20142014
__remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size);
20152015
}
20162016

2017+
2018+
static bool addr_splits_kernel_leaf(unsigned long addr)
2019+
{
2020+
pgd_t *pgdp, pgd;
2021+
p4d_t *p4dp, p4d;
2022+
pud_t *pudp, pud;
2023+
pmd_t *pmdp, pmd;
2024+
pte_t *ptep, pte;
2025+
2026+
/*
2027+
* If the given address points at a the start address of
2028+
* a possible leaf, we certainly won't split. Otherwise,
2029+
* check if we would actually split a leaf by traversing
2030+
* the page tables further.
2031+
*/
2032+
if (IS_ALIGNED(addr, PGDIR_SIZE))
2033+
return false;
2034+
2035+
pgdp = pgd_offset_k(addr);
2036+
pgd = pgdp_get(pgdp);
2037+
if (!pgd_present(pgd))
2038+
return false;
2039+
2040+
if (IS_ALIGNED(addr, P4D_SIZE))
2041+
return false;
2042+
2043+
p4dp = p4d_offset(pgdp, addr);
2044+
p4d = p4dp_get(p4dp);
2045+
if (!p4d_present(p4d))
2046+
return false;
2047+
2048+
if (IS_ALIGNED(addr, PUD_SIZE))
2049+
return false;
2050+
2051+
pudp = pud_offset(p4dp, addr);
2052+
pud = pudp_get(pudp);
2053+
if (!pud_present(pud))
2054+
return false;
2055+
2056+
if (pud_leaf(pud))
2057+
return true;
2058+
2059+
if (IS_ALIGNED(addr, CONT_PMD_SIZE))
2060+
return false;
2061+
2062+
pmdp = pmd_offset(pudp, addr);
2063+
pmd = pmdp_get(pmdp);
2064+
if (!pmd_present(pmd))
2065+
return false;
2066+
2067+
if (pmd_cont(pmd))
2068+
return true;
2069+
2070+
if (IS_ALIGNED(addr, PMD_SIZE))
2071+
return false;
2072+
2073+
if (pmd_leaf(pmd))
2074+
return true;
2075+
2076+
if (IS_ALIGNED(addr, CONT_PTE_SIZE))
2077+
return false;
2078+
2079+
ptep = pte_offset_kernel(pmdp, addr);
2080+
pte = __ptep_get(ptep);
2081+
if (!pte_present(pte))
2082+
return false;
2083+
2084+
if (pte_cont(pte))
2085+
return true;
2086+
2087+
return !IS_ALIGNED(addr, PAGE_SIZE);
2088+
}
2089+
2090+
static bool can_unmap_without_split(unsigned long pfn, unsigned long nr_pages)
2091+
{
2092+
unsigned long phys_start, phys_end, start, end;
2093+
2094+
phys_start = PFN_PHYS(pfn);
2095+
phys_end = phys_start + nr_pages * PAGE_SIZE;
2096+
2097+
/* PFN range's linear map edges are leaf entry aligned */
2098+
start = __phys_to_virt(phys_start);
2099+
end = __phys_to_virt(phys_end);
2100+
if (addr_splits_kernel_leaf(start) || addr_splits_kernel_leaf(end)) {
2101+
pr_warn("[%lx %lx] splits a leaf entry in linear map\n",
2102+
phys_start, phys_end);
2103+
return false;
2104+
}
2105+
2106+
/* PFN range's vmemmap edges are leaf entry aligned */
2107+
BUILD_BUG_ON(!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP));
2108+
start = (unsigned long)pfn_to_page(pfn);
2109+
end = (unsigned long)pfn_to_page(pfn + nr_pages);
2110+
if (addr_splits_kernel_leaf(start) || addr_splits_kernel_leaf(end)) {
2111+
pr_warn("[%lx %lx] splits a leaf entry in vmemmap\n",
2112+
phys_start, phys_end);
2113+
return false;
2114+
}
2115+
return true;
2116+
}
2117+
20172118
/*
20182119
* This memory hotplug notifier helps prevent boot memory from being
20192120
* inadvertently removed as it blocks pfn range offlining process in
@@ -2022,8 +2123,11 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
20222123
* In future if and when boot memory could be removed, this notifier
20232124
* should be dropped and free_hotplug_page_range() should handle any
20242125
* reserved pages allocated during boot.
2126+
*
2127+
* This also blocks any memory remove that would have caused a split
2128+
* in leaf entry in kernel linear or vmemmap mapping.
20252129
*/
2026-
static int prevent_bootmem_remove_notifier(struct notifier_block *nb,
2130+
static int prevent_memory_remove_notifier(struct notifier_block *nb,
20272131
unsigned long action, void *data)
20282132
{
20292133
struct mem_section *ms;
@@ -2069,11 +2173,15 @@ static int prevent_bootmem_remove_notifier(struct notifier_block *nb,
20692173
return NOTIFY_DONE;
20702174
}
20712175
}
2176+
2177+
if (!can_unmap_without_split(pfn, arg->nr_pages))
2178+
return NOTIFY_BAD;
2179+
20722180
return NOTIFY_OK;
20732181
}
20742182

2075-
static struct notifier_block prevent_bootmem_remove_nb = {
2076-
.notifier_call = prevent_bootmem_remove_notifier,
2183+
static struct notifier_block prevent_memory_remove_nb = {
2184+
.notifier_call = prevent_memory_remove_notifier,
20772185
};
20782186

20792187
/*
@@ -2123,21 +2231,21 @@ static void validate_bootmem_online(void)
21232231
}
21242232
}
21252233

2126-
static int __init prevent_bootmem_remove_init(void)
2234+
static int __init prevent_memory_remove_init(void)
21272235
{
21282236
int ret = 0;
21292237

21302238
if (!IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
21312239
return ret;
21322240

21332241
validate_bootmem_online();
2134-
ret = register_memory_notifier(&prevent_bootmem_remove_nb);
2242+
ret = register_memory_notifier(&prevent_memory_remove_nb);
21352243
if (ret)
21362244
pr_err("%s: Notifier registration failed %d\n", __func__, ret);
21372245

21382246
return ret;
21392247
}
2140-
early_initcall(prevent_bootmem_remove_init);
2248+
early_initcall(prevent_memory_remove_init);
21412249
#endif
21422250

21432251
pte_t modify_prot_start_ptes(struct vm_area_struct *vma, unsigned long addr,

0 commit comments

Comments
 (0)