@@ -193,11 +193,6 @@ int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
193193 return 0 ;
194194}
195195
196- static bool kvm_is_device_pfn (unsigned long pfn )
197- {
198- return !pfn_is_map_memory (pfn );
199- }
200-
201196static void * stage2_memcache_zalloc_page (void * arg )
202197{
203198 struct kvm_mmu_memory_cache * mc = arg ;
@@ -1470,15 +1465,27 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
14701465 return vma -> vm_flags & VM_MTE_ALLOWED ;
14711466}
14721467
1468+ static bool kvm_vma_is_cacheable (struct vm_area_struct * vma )
1469+ {
1470+ switch (FIELD_GET (PTE_ATTRINDX_MASK , pgprot_val (vma -> vm_page_prot ))) {
1471+ case MT_NORMAL_NC :
1472+ case MT_DEVICE_nGnRnE :
1473+ case MT_DEVICE_nGnRE :
1474+ return false;
1475+ default :
1476+ return true;
1477+ }
1478+ }
1479+
14731480static int user_mem_abort (struct kvm_vcpu * vcpu , phys_addr_t fault_ipa ,
14741481 struct kvm_s2_trans * nested ,
14751482 struct kvm_memory_slot * memslot , unsigned long hva ,
14761483 bool fault_is_perm )
14771484{
14781485 int ret = 0 ;
14791486 bool write_fault , writable , force_pte = false;
1480- bool exec_fault , mte_allowed ;
1481- bool device = false, vfio_allow_any_uc = false;
1487+ bool exec_fault , mte_allowed , is_vma_cacheable ;
1488+ bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
14821489 unsigned long mmu_seq ;
14831490 phys_addr_t ipa = fault_ipa ;
14841491 struct kvm * kvm = vcpu -> kvm ;
@@ -1492,6 +1499,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
14921499 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R ;
14931500 struct kvm_pgtable * pgt ;
14941501 struct page * page ;
1502+ vm_flags_t vm_flags ;
14951503 enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED ;
14961504
14971505 if (fault_is_perm )
@@ -1619,6 +1627,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16191627
16201628 vfio_allow_any_uc = vma -> vm_flags & VM_ALLOW_ANY_UNCACHED ;
16211629
1630+ vm_flags = vma -> vm_flags ;
1631+
1632+ is_vma_cacheable = kvm_vma_is_cacheable (vma );
1633+
16221634 /* Don't use the VMA after the unlock -- it may have vanished */
16231635 vma = NULL ;
16241636
@@ -1642,18 +1654,39 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16421654 if (is_error_noslot_pfn (pfn ))
16431655 return - EFAULT ;
16441656
1645- if (kvm_is_device_pfn (pfn )) {
1646- /*
1647- * If the page was identified as device early by looking at
1648- * the VMA flags, vma_pagesize is already representing the
1649- * largest quantity we can map. If instead it was mapped
1650- * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
1651- * and must not be upgraded.
1652- *
1653- * In both cases, we don't let transparent_hugepage_adjust()
1654- * change things at the last minute.
1655- */
1656- device = true;
1657+ /*
1658+ * Check if this is non-struct page memory PFN, and cannot support
1659+ * CMOs. It could potentially be unsafe to access as cachable.
1660+ */
1661+ if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP ) && !pfn_is_map_memory (pfn )) {
1662+ if (is_vma_cacheable ) {
1663+ /*
1664+ * Whilst the VMA owner expects cacheable mapping to this
1665+ * PFN, hardware also has to support the FWB and CACHE DIC
1666+ * features.
1667+ *
1668+ * ARM64 KVM relies on kernel VA mapping to the PFN to
1669+ * perform cache maintenance as the CMO instructions work on
1670+ * virtual addresses. VM_PFNMAP region are not necessarily
1671+ * mapped to a KVA and hence the presence of hardware features
1672+ * S2FWB and CACHE DIC are mandatory to avoid the need for
1673+ * cache maintenance.
1674+ */
1675+ if (!kvm_supports_cacheable_pfnmap ())
1676+ return - EFAULT ;
1677+ } else {
1678+ /*
1679+ * If the page was identified as device early by looking at
1680+ * the VMA flags, vma_pagesize is already representing the
1681+ * largest quantity we can map. If instead it was mapped
1682+ * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
1683+ * and must not be upgraded.
1684+ *
1685+ * In both cases, we don't let transparent_hugepage_adjust()
1686+ * change things at the last minute.
1687+ */
1688+ s2_force_noncacheable = true;
1689+ }
16571690 } else if (logging_active && !write_fault ) {
16581691 /*
16591692 * Only actually map the page as writable if this was a write
@@ -1662,7 +1695,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16621695 writable = false;
16631696 }
16641697
1665- if (exec_fault && device )
1698+ if (exec_fault && s2_force_noncacheable )
16661699 return - ENOEXEC ;
16671700
16681701 /*
@@ -1695,7 +1728,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16951728 * If we are not forced to use page mapping, check if we are
16961729 * backed by a THP and thus use block mapping if possible.
16971730 */
1698- if (vma_pagesize == PAGE_SIZE && !(force_pte || device )) {
1731+ if (vma_pagesize == PAGE_SIZE && !(force_pte || s2_force_noncacheable )) {
16991732 if (fault_is_perm && fault_granule > PAGE_SIZE )
17001733 vma_pagesize = fault_granule ;
17011734 else
@@ -1709,7 +1742,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
17091742 }
17101743 }
17111744
1712- if (!fault_is_perm && !device && kvm_has_mte (kvm )) {
1745+ if (!fault_is_perm && !s2_force_noncacheable && kvm_has_mte (kvm )) {
17131746 /* Check the VMM hasn't introduced a new disallowed VMA */
17141747 if (mte_allowed ) {
17151748 sanitise_mte_tags (kvm , pfn , vma_pagesize );
@@ -1725,7 +1758,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
17251758 if (exec_fault )
17261759 prot |= KVM_PGTABLE_PROT_X ;
17271760
1728- if (device ) {
1761+ if (s2_force_noncacheable ) {
17291762 if (vfio_allow_any_uc )
17301763 prot |= KVM_PGTABLE_PROT_NORMAL_NC ;
17311764 else
@@ -2221,6 +2254,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
22212254 ret = - EINVAL ;
22222255 break ;
22232256 }
2257+
2258+ /*
2259+ * Cacheable PFNMAP is allowed only if the hardware
2260+ * supports it.
2261+ */
2262+ if (kvm_vma_is_cacheable (vma ) && !kvm_supports_cacheable_pfnmap ()) {
2263+ ret = - EINVAL ;
2264+ break ;
2265+ }
22242266 }
22252267 hva = min (reg_end , vma -> vm_end );
22262268 } while (hva < reg_end );
0 commit comments