Skip to content

Commit c535d13

Browse files
committed
Merge branch 'kvm-arm64/cacheable-pfnmap' into kvmarm/next
* kvm-arm64/cacheable-pfnmap: : Cacheable PFNMAP support at stage-2, courtesy of Ankit Agrawal : : For historical reasons, KVM only allows cacheable mappings at stage-2 : when a kernel alias exists in the direct map for the memory region. On : hardware without FEAT_S2FWB, this is necessary as KVM must do cache : maintenance to keep guest/host accesses coherent. : : This is unnecessarily restrictive on systems with FEAT_S2FWB and : CTR_EL0.DIC, as KVM no longer needs to perform cache maintenance to : maintain correctness. : : Allow cacheable mappings at stage-2 on supporting hardware when the : corresponding VMA has cacheable memory attributes and advertise a : capability to userspace such that a VMM can determine if a stage-2 : mapping can be established (e.g. VFIO device). KVM: arm64: Expose new KVM cap for cacheable PFNMAP KVM: arm64: Allow cacheable stage 2 mapping using VMA flags KVM: arm64: Block cacheable PFNMAP mapping KVM: arm64: Assume non-PFNMAP/MIXEDMAP VMAs can be mapped cacheable KVM: arm64: Rename the device variable to s2_force_noncacheable Signed-off-by: Oliver Upton <[email protected]>
2 parents 86731a2 + f55ce5a commit c535d13

5 files changed

Lines changed: 103 additions & 24 deletions

File tree

Documentation/virt/kvm/api.rst

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8585,7 +8585,7 @@ ENOSYS for the others.
85858585
When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
85868586
type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
85878587

8588-
7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS
8588+
7.42 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS
85898589
-------------------------------------
85908590

85918591
:Architectures: arm64
@@ -8614,6 +8614,17 @@ given VM.
86148614
When this capability is enabled, KVM resets the VCPU when setting
86158615
MP_STATE_INIT_RECEIVED through IOCTL. The original MP_STATE is preserved.
86168616

8617+
7.43 KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED
8618+
-------------------------------------------
8619+
8620+
:Architectures: arm64
8621+
:Target: VM
8622+
:Parameters: None
8623+
8624+
This capability indicate to the userspace whether a PFNMAP memory region
8625+
can be safely mapped as cacheable. This relies on the presence of
8626+
force write back (FWB) feature support on the hardware.
8627+
86178628
8. Other capabilities.
86188629
======================
86198630

arch/arm64/include/asm/kvm_mmu.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,24 @@ static inline void kvm_fault_unlock(struct kvm *kvm)
371371
read_unlock(&kvm->mmu_lock);
372372
}
373373

374+
/*
375+
* ARM64 KVM relies on a simple conversion from physaddr to a kernel
376+
* virtual address (KVA) when it does cache maintenance as the CMO
377+
* instructions work on virtual addresses. This is incompatible with
378+
* VM_PFNMAP VMAs which may not have a kernel direct mapping to a
379+
* virtual address.
380+
*
381+
* With S2FWB and CACHE DIC features, KVM need not do cache flushing
382+
* and CMOs are NOP'd. This has the effect of no longer requiring a
383+
* KVA for addresses mapped into the S2. The presence of these features
384+
* are thus necessary to support cacheable S2 mapping of VM_PFNMAP.
385+
*/
386+
static inline bool kvm_supports_cacheable_pfnmap(void)
387+
{
388+
return cpus_have_final_cap(ARM64_HAS_STAGE2_FWB) &&
389+
cpus_have_final_cap(ARM64_HAS_CACHE_DIC);
390+
}
391+
374392
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
375393
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
376394
#else

arch/arm64/kvm/arm.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
408408
case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES:
409409
r = BIT(0);
410410
break;
411+
case KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED:
412+
if (!kvm)
413+
r = -EINVAL;
414+
else
415+
r = kvm_supports_cacheable_pfnmap();
416+
break;
417+
411418
default:
412419
r = 0;
413420
}

arch/arm64/kvm/mmu.c

Lines changed: 65 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,6 @@ int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
193193
return 0;
194194
}
195195

196-
static bool kvm_is_device_pfn(unsigned long pfn)
197-
{
198-
return !pfn_is_map_memory(pfn);
199-
}
200-
201196
static void *stage2_memcache_zalloc_page(void *arg)
202197
{
203198
struct kvm_mmu_memory_cache *mc = arg;
@@ -1470,15 +1465,27 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
14701465
return vma->vm_flags & VM_MTE_ALLOWED;
14711466
}
14721467

1468+
static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
1469+
{
1470+
switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
1471+
case MT_NORMAL_NC:
1472+
case MT_DEVICE_nGnRnE:
1473+
case MT_DEVICE_nGnRE:
1474+
return false;
1475+
default:
1476+
return true;
1477+
}
1478+
}
1479+
14731480
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
14741481
struct kvm_s2_trans *nested,
14751482
struct kvm_memory_slot *memslot, unsigned long hva,
14761483
bool fault_is_perm)
14771484
{
14781485
int ret = 0;
14791486
bool write_fault, writable, force_pte = false;
1480-
bool exec_fault, mte_allowed;
1481-
bool device = false, vfio_allow_any_uc = false;
1487+
bool exec_fault, mte_allowed, is_vma_cacheable;
1488+
bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
14821489
unsigned long mmu_seq;
14831490
phys_addr_t ipa = fault_ipa;
14841491
struct kvm *kvm = vcpu->kvm;
@@ -1492,6 +1499,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
14921499
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
14931500
struct kvm_pgtable *pgt;
14941501
struct page *page;
1502+
vm_flags_t vm_flags;
14951503
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED;
14961504

14971505
if (fault_is_perm)
@@ -1619,6 +1627,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16191627

16201628
vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
16211629

1630+
vm_flags = vma->vm_flags;
1631+
1632+
is_vma_cacheable = kvm_vma_is_cacheable(vma);
1633+
16221634
/* Don't use the VMA after the unlock -- it may have vanished */
16231635
vma = NULL;
16241636

@@ -1642,18 +1654,39 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16421654
if (is_error_noslot_pfn(pfn))
16431655
return -EFAULT;
16441656

1645-
if (kvm_is_device_pfn(pfn)) {
1646-
/*
1647-
* If the page was identified as device early by looking at
1648-
* the VMA flags, vma_pagesize is already representing the
1649-
* largest quantity we can map. If instead it was mapped
1650-
* via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
1651-
* and must not be upgraded.
1652-
*
1653-
* In both cases, we don't let transparent_hugepage_adjust()
1654-
* change things at the last minute.
1655-
*/
1656-
device = true;
1657+
/*
1658+
* Check if this is non-struct page memory PFN, and cannot support
1659+
* CMOs. It could potentially be unsafe to access as cachable.
1660+
*/
1661+
if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) {
1662+
if (is_vma_cacheable) {
1663+
/*
1664+
* Whilst the VMA owner expects cacheable mapping to this
1665+
* PFN, hardware also has to support the FWB and CACHE DIC
1666+
* features.
1667+
*
1668+
* ARM64 KVM relies on kernel VA mapping to the PFN to
1669+
* perform cache maintenance as the CMO instructions work on
1670+
* virtual addresses. VM_PFNMAP region are not necessarily
1671+
* mapped to a KVA and hence the presence of hardware features
1672+
* S2FWB and CACHE DIC are mandatory to avoid the need for
1673+
* cache maintenance.
1674+
*/
1675+
if (!kvm_supports_cacheable_pfnmap())
1676+
return -EFAULT;
1677+
} else {
1678+
/*
1679+
* If the page was identified as device early by looking at
1680+
* the VMA flags, vma_pagesize is already representing the
1681+
* largest quantity we can map. If instead it was mapped
1682+
* via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
1683+
* and must not be upgraded.
1684+
*
1685+
* In both cases, we don't let transparent_hugepage_adjust()
1686+
* change things at the last minute.
1687+
*/
1688+
s2_force_noncacheable = true;
1689+
}
16571690
} else if (logging_active && !write_fault) {
16581691
/*
16591692
* Only actually map the page as writable if this was a write
@@ -1662,7 +1695,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16621695
writable = false;
16631696
}
16641697

1665-
if (exec_fault && device)
1698+
if (exec_fault && s2_force_noncacheable)
16661699
return -ENOEXEC;
16671700

16681701
/*
@@ -1695,7 +1728,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16951728
* If we are not forced to use page mapping, check if we are
16961729
* backed by a THP and thus use block mapping if possible.
16971730
*/
1698-
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
1731+
if (vma_pagesize == PAGE_SIZE && !(force_pte || s2_force_noncacheable)) {
16991732
if (fault_is_perm && fault_granule > PAGE_SIZE)
17001733
vma_pagesize = fault_granule;
17011734
else
@@ -1709,7 +1742,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
17091742
}
17101743
}
17111744

1712-
if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
1745+
if (!fault_is_perm && !s2_force_noncacheable && kvm_has_mte(kvm)) {
17131746
/* Check the VMM hasn't introduced a new disallowed VMA */
17141747
if (mte_allowed) {
17151748
sanitise_mte_tags(kvm, pfn, vma_pagesize);
@@ -1725,7 +1758,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
17251758
if (exec_fault)
17261759
prot |= KVM_PGTABLE_PROT_X;
17271760

1728-
if (device) {
1761+
if (s2_force_noncacheable) {
17291762
if (vfio_allow_any_uc)
17301763
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
17311764
else
@@ -2221,6 +2254,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
22212254
ret = -EINVAL;
22222255
break;
22232256
}
2257+
2258+
/*
2259+
* Cacheable PFNMAP is allowed only if the hardware
2260+
* supports it.
2261+
*/
2262+
if (kvm_vma_is_cacheable(vma) && !kvm_supports_cacheable_pfnmap()) {
2263+
ret = -EINVAL;
2264+
break;
2265+
}
22242266
}
22252267
hva = min(reg_end, vma->vm_end);
22262268
} while (hva < reg_end);

include/uapi/linux/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,7 @@ struct kvm_enable_cap {
956956
#define KVM_CAP_ARM_EL2 240
957957
#define KVM_CAP_ARM_EL2_E2H0 241
958958
#define KVM_CAP_RISCV_MP_STATE_RESET 242
959+
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
959960

960961
struct kvm_irq_routing_irqchip {
961962
__u32 irqchip;

0 commit comments

Comments
 (0)