@@ -1983,14 +1983,35 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp)
19831983 return true;
19841984}
19851985
1986+ static __ro_after_init HLIST_HEAD (empty_page_hash );
1987+
1988+ static struct hlist_head * kvm_get_mmu_page_hash (struct kvm * kvm , gfn_t gfn )
1989+ {
1990+ /*
1991+ * Ensure the load of the hash table pointer itself is ordered before
1992+ * loads to walk the table. The pointer is set at runtime outside of
1993+ * mmu_lock when the TDP MMU is enabled, i.e. when the hash table of
1994+ * shadow pages becomes necessary only when KVM needs to shadow L1's
1995+ * TDP for an L2 guest. Pairs with the smp_store_release() in
1996+ * kvm_mmu_alloc_page_hash().
1997+ */
1998+ struct hlist_head * page_hash = smp_load_acquire (& kvm -> arch .mmu_page_hash );
1999+
2000+ lockdep_assert_held (& kvm -> mmu_lock );
2001+
2002+ if (!page_hash )
2003+ return & empty_page_hash ;
2004+
2005+ return & page_hash [kvm_page_table_hashfn (gfn )];
2006+ }
2007+
19862008#define for_each_valid_sp (_kvm , _sp , _list ) \
19872009 hlist_for_each_entry(_sp, _list, hash_link) \
19882010 if (is_obsolete_sp((_kvm), (_sp))) { \
19892011 } else
19902012
19912013#define for_each_gfn_valid_sp_with_gptes (_kvm , _sp , _gfn ) \
1992- for_each_valid_sp(_kvm, _sp, \
1993- &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
2014+ for_each_valid_sp(_kvm, _sp, kvm_get_mmu_page_hash(_kvm, _gfn)) \
19942015 if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else
19952016
19962017static bool kvm_sync_page_check (struct kvm_vcpu * vcpu , struct kvm_mmu_page * sp )
@@ -2358,6 +2379,12 @@ static struct kvm_mmu_page *__kvm_mmu_get_shadow_page(struct kvm *kvm,
23582379 struct kvm_mmu_page * sp ;
23592380 bool created = false;
23602381
2382+ /*
2383+ * No need for memory barriers, unlike in kvm_get_mmu_page_hash(), as
2384+ * mmu_page_hash must be set prior to creating the first shadow root,
2385+ * i.e. reaching this point is fully serialized by slots_arch_lock.
2386+ */
2387+ BUG_ON (!kvm -> arch .mmu_page_hash );
23612388 sp_list = & kvm -> arch .mmu_page_hash [kvm_page_table_hashfn (gfn )];
23622389
23632390 sp = kvm_mmu_find_shadow_page (kvm , vcpu , gfn , sp_list , role );
@@ -3882,6 +3909,28 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
38823909 return r ;
38833910}
38843911
3912+ static int kvm_mmu_alloc_page_hash (struct kvm * kvm )
3913+ {
3914+ struct hlist_head * h ;
3915+
3916+ if (kvm -> arch .mmu_page_hash )
3917+ return 0 ;
3918+
3919+ h = kvcalloc (KVM_NUM_MMU_PAGES , sizeof (* h ), GFP_KERNEL_ACCOUNT );
3920+ if (!h )
3921+ return - ENOMEM ;
3922+
3923+ /*
3924+ * Ensure the hash table pointer is set only after all stores to zero
3925+ * the memory are retired. Pairs with the smp_load_acquire() in
3926+ * kvm_get_mmu_page_hash(). Note, mmu_lock must be held for write to
3927+ * add (or remove) shadow pages, and so readers are guaranteed to see
3928+ * an empty list for their current mmu_lock critical section.
3929+ */
3930+ smp_store_release (& kvm -> arch .mmu_page_hash , h );
3931+ return 0 ;
3932+ }
3933+
38853934static int mmu_first_shadow_root_alloc (struct kvm * kvm )
38863935{
38873936 struct kvm_memslots * slots ;
@@ -3901,9 +3950,13 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
39013950 if (kvm_shadow_root_allocated (kvm ))
39023951 goto out_unlock ;
39033952
3953+ r = kvm_mmu_alloc_page_hash (kvm );
3954+ if (r )
3955+ goto out_unlock ;
3956+
39043957 /*
3905- * Check if anything actually needs to be allocated, e.g. all metadata
3906- * will be allocated upfront if TDP is disabled.
3958+ * Check if memslot metadata actually needs to be allocated, e.g. all
3959+ * metadata will be allocated upfront if TDP is disabled.
39073960 */
39083961 if (kvm_memslots_have_rmaps (kvm ) &&
39093962 kvm_page_track_write_tracking_enabled (kvm ))
@@ -6682,15 +6735,22 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
66826735 kvm_tdp_mmu_zap_invalidated_roots (kvm , true);
66836736}
66846737
6685- void kvm_mmu_init_vm (struct kvm * kvm )
6738+ int kvm_mmu_init_vm (struct kvm * kvm )
66866739{
6740+ int r ;
6741+
66876742 kvm -> arch .shadow_mmio_value = shadow_mmio_value ;
66886743 INIT_LIST_HEAD (& kvm -> arch .active_mmu_pages );
66896744 INIT_LIST_HEAD (& kvm -> arch .possible_nx_huge_pages );
66906745 spin_lock_init (& kvm -> arch .mmu_unsync_pages_lock );
66916746
6692- if (tdp_mmu_enabled )
6747+ if (tdp_mmu_enabled ) {
66936748 kvm_mmu_init_tdp_mmu (kvm );
6749+ } else {
6750+ r = kvm_mmu_alloc_page_hash (kvm );
6751+ if (r )
6752+ return r ;
6753+ }
66946754
66956755 kvm -> arch .split_page_header_cache .kmem_cache = mmu_page_header_cache ;
66966756 kvm -> arch .split_page_header_cache .gfp_zero = __GFP_ZERO ;
@@ -6699,6 +6759,7 @@ void kvm_mmu_init_vm(struct kvm *kvm)
66996759
67006760 kvm -> arch .split_desc_cache .kmem_cache = pte_list_desc_cache ;
67016761 kvm -> arch .split_desc_cache .gfp_zero = __GFP_ZERO ;
6762+ return 0 ;
67026763}
67036764
67046765static void mmu_free_vm_memory_caches (struct kvm * kvm )
@@ -6710,6 +6771,8 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
67106771
67116772void kvm_mmu_uninit_vm (struct kvm * kvm )
67126773{
6774+ kvfree (kvm -> arch .mmu_page_hash );
6775+
67136776 if (tdp_mmu_enabled )
67146777 kvm_mmu_uninit_tdp_mmu (kvm );
67156778
0 commit comments