diff options
-rw-r--r-- | Documentation/virt/kvm/locking.rst | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 11 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 95 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.h | 3 |
5 files changed, 57 insertions, 67 deletions
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index 3a034db5e55f..02880d5552d5 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -43,10 +43,9 @@ On x86: - vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock and kvm->arch.xen.xen_lock -- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and - kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and - cannot be taken without already holding kvm->arch.mmu_lock (typically with - ``read_lock`` for the TDP MMU, thus the need for additional spinlocks). +- kvm->arch.mmu_lock is an rwlock; critical sections for + kvm->arch.tdp_mmu_pages_lock and kvm->arch.mmu_unsync_pages_lock must + also take kvm->arch.mmu_lock Everything else is a leaf: no other lock is taken inside the critical sections. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c5e362e2116f..7bc1daf68741 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1433,9 +1433,8 @@ struct kvm_arch { * the MMU lock in read mode + RCU or * the MMU lock in write mode * - * For writes, this list is protected by: - * the MMU lock in read mode + the tdp_mmu_pages_lock or - * the MMU lock in write mode + * For writes, this list is protected by tdp_mmu_pages_lock; see + * below for the details. * * Roots will remain in the list until their tdp_mmu_root_count * drops to zero, at which point the thread that decremented the @@ -1452,8 +1451,10 @@ struct kvm_arch { * - possible_nx_huge_pages; * - the possible_nx_huge_page_link field of kvm_mmu_page structs used * by the TDP MMU - * It is acceptable, but not necessary, to acquire this lock when - * the thread holds the MMU lock in write mode. + * Because the lock is only taken within the MMU lock, strictly + * speaking it is redundant to acquire this lock when the thread + * holds the MMU lock in write mode. However it often simplifies + * the code to do so. */ spinlock_t tdp_mmu_pages_lock; #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d0590b417d30..3c844e428684 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1388,7 +1388,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, gfn_t end = slot->base_gfn + gfn_offset + __fls(mask); if (READ_ONCE(eager_page_split)) - kvm_mmu_try_split_huge_pages(kvm, slot, start, end, PG_LEVEL_4K); + kvm_mmu_try_split_huge_pages(kvm, slot, start, end + 1, PG_LEVEL_4K); kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M); @@ -2846,9 +2846,9 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, /* * Recheck after taking the spinlock, a different vCPU * may have since marked the page unsync. A false - * positive on the unprotected check above is not + * negative on the unprotected check above is not * possible as clearing sp->unsync _must_ hold mmu_lock - * for write, i.e. unsync cannot transition from 0->1 + * for write, i.e. unsync cannot transition from 1->0 * while this CPU holds mmu_lock for read (or write). */ if (READ_ONCE(sp->unsync)) @@ -3576,7 +3576,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, return; if (is_tdp_mmu_page(sp)) - kvm_tdp_mmu_put_root(kvm, sp, false); + kvm_tdp_mmu_put_root(kvm, sp); else if (!--sp->root_count && sp->role.invalid) kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 6cd4dd631a2f..6ae19b4ee5b1 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -73,11 +73,8 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) tdp_mmu_free_sp(sp); } -void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, - bool shared) +void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) { - kvm_lockdep_assert_mmu_lock_held(kvm, shared); - if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) return; @@ -106,10 +103,16 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, */ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, struct kvm_mmu_page *prev_root, - bool shared, bool only_valid) + bool only_valid) { struct kvm_mmu_page *next_root; + /* + * While the roots themselves are RCU-protected, fields such as + * role.invalid are protected by mmu_lock. + */ + lockdep_assert_held(&kvm->mmu_lock); + rcu_read_lock(); if (prev_root) @@ -132,7 +135,7 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, rcu_read_unlock(); if (prev_root) - kvm_tdp_mmu_put_root(kvm, prev_root, shared); + kvm_tdp_mmu_put_root(kvm, prev_root); return next_root; } @@ -144,26 +147,22 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, * recent root. (Unless keeping a live reference is desirable.) * * If shared is set, this function is operating under the MMU lock in read - * mode. In the unlikely event that this thread must free a root, the lock - * will be temporarily dropped and reacquired in write mode. + * mode. */ -#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, _only_valid)\ - for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, _only_valid); \ - _root; \ - _root = tdp_mmu_next_root(_kvm, _root, _shared, _only_valid)) \ - if (kvm_lockdep_assert_mmu_lock_held(_kvm, _shared) && \ - kvm_mmu_page_as_id(_root) != _as_id) { \ +#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid)\ + for (_root = tdp_mmu_next_root(_kvm, NULL, _only_valid); \ + ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \ + _root = tdp_mmu_next_root(_kvm, _root, _only_valid)) \ + if (kvm_mmu_page_as_id(_root) != _as_id) { \ } else -#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ - __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true) +#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \ + __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, true) -#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \ - for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \ - _root; \ - _root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \ - if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \ - } else +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ + for (_root = tdp_mmu_next_root(_kvm, NULL, false); \ + ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \ + _root = tdp_mmu_next_root(_kvm, _root, false)) /* * Iterate over all TDP MMU roots. Requires that mmu_lock be held for write, @@ -276,28 +275,18 @@ static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) * * @kvm: kvm instance * @sp: the page to be removed - * @shared: This operation may not be running under the exclusive use of - * the MMU lock and the operation must synchronize with other - * threads that might be adding or removing pages. */ -static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, - bool shared) +static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp) { tdp_unaccount_mmu_page(kvm, sp); if (!sp->nx_huge_page_disallowed) return; - if (shared) - spin_lock(&kvm->arch.tdp_mmu_pages_lock); - else - lockdep_assert_held_write(&kvm->mmu_lock); - + spin_lock(&kvm->arch.tdp_mmu_pages_lock); sp->nx_huge_page_disallowed = false; untrack_possible_nx_huge_page(kvm, sp); - - if (shared) - spin_unlock(&kvm->arch.tdp_mmu_pages_lock); + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } /** @@ -326,7 +315,7 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) trace_kvm_mmu_prepare_zap_page(sp); - tdp_mmu_unlink_sp(kvm, sp, shared); + tdp_mmu_unlink_sp(kvm, sp); for (i = 0; i < SPTE_ENT_PER_PAGE; i++) { tdp_ptep_t sptep = pt + i; @@ -832,7 +821,8 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush) { struct kvm_mmu_page *root; - for_each_tdp_mmu_root_yield_safe(kvm, root, false) + lockdep_assert_held_write(&kvm->mmu_lock); + for_each_tdp_mmu_root_yield_safe(kvm, root) flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush); return flush; @@ -854,7 +844,8 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) * is being destroyed or the userspace VMM has exited. In both cases, * KVM_RUN is unreachable, i.e. no vCPUs will ever service the request. */ - for_each_tdp_mmu_root_yield_safe(kvm, root, false) + lockdep_assert_held_write(&kvm->mmu_lock); + for_each_tdp_mmu_root_yield_safe(kvm, root) tdp_mmu_zap_root(kvm, root, false); } @@ -868,7 +859,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) read_lock(&kvm->mmu_lock); - for_each_tdp_mmu_root_yield_safe(kvm, root, true) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { if (!root->tdp_mmu_scheduled_root_to_zap) continue; @@ -891,7 +882,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) * the root must be reachable by mmu_notifiers while it's being * zapped */ - kvm_tdp_mmu_put_root(kvm, root, true); + kvm_tdp_mmu_put_root(kvm, root); } read_unlock(&kvm->mmu_lock); @@ -1125,7 +1116,7 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, { struct kvm_mmu_page *root; - __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false) + __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false) flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end, range->may_block, flush); @@ -1314,7 +1305,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, lockdep_assert_held_read(&kvm->mmu_lock); - for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) + for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages, min_level); @@ -1346,6 +1337,8 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm, { struct kvm_mmu_page *sp; + kvm_lockdep_assert_mmu_lock_held(kvm, shared); + /* * Since we are allocating while under the MMU lock we have to be * careful about GFP flags. Use GFP_NOWAIT to avoid blocking on direct @@ -1496,11 +1489,10 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm, int r = 0; kvm_lockdep_assert_mmu_lock_held(kvm, shared); - - for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, shared) { + for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) { r = tdp_mmu_split_huge_pages_root(kvm, root, start, end, target_level, shared); if (r) { - kvm_tdp_mmu_put_root(kvm, root, shared); + kvm_tdp_mmu_put_root(kvm, root); break; } } @@ -1522,12 +1514,13 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, rcu_read_lock(); - tdp_root_for_each_leaf_pte(iter, root, start, end) { + tdp_root_for_each_pte(iter, root, start, end) { retry: - if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) + if (!is_shadow_present_pte(iter.old_spte) || + !is_last_spte(iter.old_spte, iter.level)) continue; - if (!is_shadow_present_pte(iter.old_spte)) + if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; KVM_MMU_WARN_ON(kvm_ad_enabled() && @@ -1560,8 +1553,7 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, bool spte_set = false; lockdep_assert_held_read(&kvm->mmu_lock); - - for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) + for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); @@ -1695,8 +1687,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, struct kvm_mmu_page *root; lockdep_assert_held_read(&kvm->mmu_lock); - - for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) + for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) zap_collapsible_spte_range(kvm, root, slot); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 733a3aef3a96..20d97aa46c49 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -17,8 +17,7 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) return refcount_inc_not_zero(&root->tdp_mmu_root_count); } -void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, - bool shared); +void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root); bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush); bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp); |