summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/locking.rst7
-rw-r--r--arch/x86/include/asm/kvm_host.h11
-rw-r--r--arch/x86/kvm/mmu/mmu.c8
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c95
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h3
5 files changed, 57 insertions, 67 deletions
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index 3a034db5e55f..02880d5552d5 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -43,10 +43,9 @@ On x86:
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock and kvm->arch.xen.xen_lock
-- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and
- kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
- cannot be taken without already holding kvm->arch.mmu_lock (typically with
- ``read_lock`` for the TDP MMU, thus the need for additional spinlocks).
+- kvm->arch.mmu_lock is an rwlock; critical sections for
+ kvm->arch.tdp_mmu_pages_lock and kvm->arch.mmu_unsync_pages_lock must
+ also take kvm->arch.mmu_lock
Everything else is a leaf: no other lock is taken inside the critical
sections.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c5e362e2116f..7bc1daf68741 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1433,9 +1433,8 @@ struct kvm_arch {
* the MMU lock in read mode + RCU or
* the MMU lock in write mode
*
- * For writes, this list is protected by:
- * the MMU lock in read mode + the tdp_mmu_pages_lock or
- * the MMU lock in write mode
+ * For writes, this list is protected by tdp_mmu_pages_lock; see
+ * below for the details.
*
* Roots will remain in the list until their tdp_mmu_root_count
* drops to zero, at which point the thread that decremented the
@@ -1452,8 +1451,10 @@ struct kvm_arch {
* - possible_nx_huge_pages;
* - the possible_nx_huge_page_link field of kvm_mmu_page structs used
* by the TDP MMU
- * It is acceptable, but not necessary, to acquire this lock when
- * the thread holds the MMU lock in write mode.
+ * Because the lock is only taken within the MMU lock, strictly
+ * speaking it is redundant to acquire this lock when the thread
+ * holds the MMU lock in write mode. However it often simplifies
+ * the code to do so.
*/
spinlock_t tdp_mmu_pages_lock;
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index d0590b417d30..3c844e428684 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1388,7 +1388,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
gfn_t end = slot->base_gfn + gfn_offset + __fls(mask);
if (READ_ONCE(eager_page_split))
- kvm_mmu_try_split_huge_pages(kvm, slot, start, end, PG_LEVEL_4K);
+ kvm_mmu_try_split_huge_pages(kvm, slot, start, end + 1, PG_LEVEL_4K);
kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M);
@@ -2846,9 +2846,9 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
/*
* Recheck after taking the spinlock, a different vCPU
* may have since marked the page unsync. A false
- * positive on the unprotected check above is not
+ * negative on the unprotected check above is not
* possible as clearing sp->unsync _must_ hold mmu_lock
- * for write, i.e. unsync cannot transition from 0->1
+ * for write, i.e. unsync cannot transition from 1->0
* while this CPU holds mmu_lock for read (or write).
*/
if (READ_ONCE(sp->unsync))
@@ -3576,7 +3576,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
return;
if (is_tdp_mmu_page(sp))
- kvm_tdp_mmu_put_root(kvm, sp, false);
+ kvm_tdp_mmu_put_root(kvm, sp);
else if (!--sp->root_count && sp->role.invalid)
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6cd4dd631a2f..6ae19b4ee5b1 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -73,11 +73,8 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
tdp_mmu_free_sp(sp);
}
-void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
- bool shared)
+void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
{
- kvm_lockdep_assert_mmu_lock_held(kvm, shared);
-
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
return;
@@ -106,10 +103,16 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
*/
static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
struct kvm_mmu_page *prev_root,
- bool shared, bool only_valid)
+ bool only_valid)
{
struct kvm_mmu_page *next_root;
+ /*
+ * While the roots themselves are RCU-protected, fields such as
+ * role.invalid are protected by mmu_lock.
+ */
+ lockdep_assert_held(&kvm->mmu_lock);
+
rcu_read_lock();
if (prev_root)
@@ -132,7 +135,7 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
rcu_read_unlock();
if (prev_root)
- kvm_tdp_mmu_put_root(kvm, prev_root, shared);
+ kvm_tdp_mmu_put_root(kvm, prev_root);
return next_root;
}
@@ -144,26 +147,22 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
* recent root. (Unless keeping a live reference is desirable.)
*
* If shared is set, this function is operating under the MMU lock in read
- * mode. In the unlikely event that this thread must free a root, the lock
- * will be temporarily dropped and reacquired in write mode.
+ * mode.
*/
-#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, _only_valid)\
- for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, _only_valid); \
- _root; \
- _root = tdp_mmu_next_root(_kvm, _root, _shared, _only_valid)) \
- if (kvm_lockdep_assert_mmu_lock_held(_kvm, _shared) && \
- kvm_mmu_page_as_id(_root) != _as_id) { \
+#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid)\
+ for (_root = tdp_mmu_next_root(_kvm, NULL, _only_valid); \
+ ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
+ _root = tdp_mmu_next_root(_kvm, _root, _only_valid)) \
+ if (kvm_mmu_page_as_id(_root) != _as_id) { \
} else
-#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
- __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true)
+#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \
+ __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, true)
-#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \
- for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \
- _root; \
- _root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \
- if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \
- } else
+#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \
+ for (_root = tdp_mmu_next_root(_kvm, NULL, false); \
+ ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
+ _root = tdp_mmu_next_root(_kvm, _root, false))
/*
* Iterate over all TDP MMU roots. Requires that mmu_lock be held for write,
@@ -276,28 +275,18 @@ static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
*
* @kvm: kvm instance
* @sp: the page to be removed
- * @shared: This operation may not be running under the exclusive use of
- * the MMU lock and the operation must synchronize with other
- * threads that might be adding or removing pages.
*/
-static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
- bool shared)
+static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
{
tdp_unaccount_mmu_page(kvm, sp);
if (!sp->nx_huge_page_disallowed)
return;
- if (shared)
- spin_lock(&kvm->arch.tdp_mmu_pages_lock);
- else
- lockdep_assert_held_write(&kvm->mmu_lock);
-
+ spin_lock(&kvm->arch.tdp_mmu_pages_lock);
sp->nx_huge_page_disallowed = false;
untrack_possible_nx_huge_page(kvm, sp);
-
- if (shared)
- spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+ spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
}
/**
@@ -326,7 +315,7 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
trace_kvm_mmu_prepare_zap_page(sp);
- tdp_mmu_unlink_sp(kvm, sp, shared);
+ tdp_mmu_unlink_sp(kvm, sp);
for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
tdp_ptep_t sptep = pt + i;
@@ -832,7 +821,8 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
{
struct kvm_mmu_page *root;
- for_each_tdp_mmu_root_yield_safe(kvm, root, false)
+ lockdep_assert_held_write(&kvm->mmu_lock);
+ for_each_tdp_mmu_root_yield_safe(kvm, root)
flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
return flush;
@@ -854,7 +844,8 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
* is being destroyed or the userspace VMM has exited. In both cases,
* KVM_RUN is unreachable, i.e. no vCPUs will ever service the request.
*/
- for_each_tdp_mmu_root_yield_safe(kvm, root, false)
+ lockdep_assert_held_write(&kvm->mmu_lock);
+ for_each_tdp_mmu_root_yield_safe(kvm, root)
tdp_mmu_zap_root(kvm, root, false);
}
@@ -868,7 +859,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
read_lock(&kvm->mmu_lock);
- for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
if (!root->tdp_mmu_scheduled_root_to_zap)
continue;
@@ -891,7 +882,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
* the root must be reachable by mmu_notifiers while it's being
* zapped
*/
- kvm_tdp_mmu_put_root(kvm, root, true);
+ kvm_tdp_mmu_put_root(kvm, root);
}
read_unlock(&kvm->mmu_lock);
@@ -1125,7 +1116,7 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
{
struct kvm_mmu_page *root;
- __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false)
+ __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end,
range->may_block, flush);
@@ -1314,7 +1305,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
lockdep_assert_held_read(&kvm->mmu_lock);
- for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
+ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages, min_level);
@@ -1346,6 +1337,8 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
{
struct kvm_mmu_page *sp;
+ kvm_lockdep_assert_mmu_lock_held(kvm, shared);
+
/*
* Since we are allocating while under the MMU lock we have to be
* careful about GFP flags. Use GFP_NOWAIT to avoid blocking on direct
@@ -1496,11 +1489,10 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
int r = 0;
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
-
- for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, shared) {
+ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) {
r = tdp_mmu_split_huge_pages_root(kvm, root, start, end, target_level, shared);
if (r) {
- kvm_tdp_mmu_put_root(kvm, root, shared);
+ kvm_tdp_mmu_put_root(kvm, root);
break;
}
}
@@ -1522,12 +1514,13 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
rcu_read_lock();
- tdp_root_for_each_leaf_pte(iter, root, start, end) {
+ tdp_root_for_each_pte(iter, root, start, end) {
retry:
- if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
+ if (!is_shadow_present_pte(iter.old_spte) ||
+ !is_last_spte(iter.old_spte, iter.level))
continue;
- if (!is_shadow_present_pte(iter.old_spte))
+ if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
@@ -1560,8 +1553,7 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
bool spte_set = false;
lockdep_assert_held_read(&kvm->mmu_lock);
-
- for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
+ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages);
@@ -1695,8 +1687,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
struct kvm_mmu_page *root;
lockdep_assert_held_read(&kvm->mmu_lock);
-
- for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
+ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
zap_collapsible_spte_range(kvm, root, slot);
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 733a3aef3a96..20d97aa46c49 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -17,8 +17,7 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
return refcount_inc_not_zero(&root->tdp_mmu_root_count);
}
-void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
- bool shared);
+void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root);
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);