Merge branch 'kvm-memslot-zap-quirk' into HEAD

Today whenever a memslot is moved or deleted, KVM invalidates the entire page tables and generates fresh ones based on the new memslot layout. This behavior traditionally was kept because of a bug which was never fully investigated and caused VM instability with assigned GeForce GPUs. It generally does not have a huge overhead, because the old MMU is able to reuse cached page tables and the new one is more scalabale and can resolve EPT violations/nested page faults in parallel, but it has worse performance if the guest frequently deletes and adds small memslots, and it's entirely not viable for TDX. This is because TDX requires re-accepting of private pages after page dropping. For non-TDX VMs, this series therefore introduces the KVM_X86_QUIRK_SLOT_ZAP_ALL quirk, enabling users to control the behavior of memslot zapping when a memslot is moved/deleted. The quirk is turned on by default, leading to the zapping of all SPTEs when a memslot is moved/deleted; users however have the option to turn off the quirk, which limits the zapping only to those SPTEs hat lie within the range of memslot being moved/deleted. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Paolo Bonzini <pbonzini@redhat.com> 2024-09-12 11:07:15 -0400
committer: Paolo Bonzini <pbonzini@redhat.com> 2024-09-17 11:38:19 -0400
commit: 55f50b2f86929ae042cd2eee8b2e8ffe00b5a885 (patch)
tree: 233e06994cdde602da0703ba6ede84a503884ff7 /arch/x86/kvm/mmu/mmu.c
parent: 356dab4efd1a661c7882010c34594fe9cb0048f3 (diff)
parent: 61de4c34b51c5b9c7ef8229eb246346254638446 (diff)
1 files changed, 41 insertions, 1 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index de05a26b0b7d..b278efb1d179 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6999,10 +6999,50 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
 	kvm_mmu_zap_all(kvm);
 }
 
+/*
+ * Zapping leaf SPTEs with memslot range when a memslot is moved/deleted.
+ *
+ * Zapping non-leaf SPTEs, a.k.a. not-last SPTEs, isn't required, worst
+ * case scenario we'll have unused shadow pages lying around until they
+ * are recycled due to age or when the VM is destroyed.
+ */
+static void kvm_mmu_zap_memslot_leafs(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	struct kvm_gfn_range range = {
+		.slot = slot,
+		.start = slot->base_gfn,
+		.end = slot->base_gfn + slot->npages,
+		.may_block = true,
+	};
+	bool flush = false;
+
+	write_lock(&kvm->mmu_lock);
+
+	if (kvm_memslots_have_rmaps(kvm))
+		flush = kvm_handle_gfn_range(kvm, &range, kvm_zap_rmap);
+
+	if (tdp_mmu_enabled)
+		flush = kvm_tdp_mmu_unmap_gfn_range(kvm, &range, flush);
+
+	if (flush)
+		kvm_flush_remote_tlbs_memslot(kvm, slot);
+
+	write_unlock(&kvm->mmu_lock);
+}
+
+static inline bool kvm_memslot_flush_zap_all(struct kvm *kvm)
+{
+	return kvm->arch.vm_type == KVM_X86_DEFAULT_VM &&
+	       kvm_check_has_quirk(kvm, KVM_X86_QUIRK_SLOT_ZAP_ALL);
+}
+
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot)
 {
-	kvm_mmu_zap_all_fast(kvm);
+	if (kvm_memslot_flush_zap_all(kvm))
+		kvm_mmu_zap_all_fast(kvm);
+	else
+		kvm_mmu_zap_memslot_leafs(kvm, slot);
 }
 
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
author	Paolo Bonzini <pbonzini@redhat.com>	2024-09-12 11:07:15 -0400
committer	Paolo Bonzini <pbonzini@redhat.com>	2024-09-17 11:38:19 -0400
commit	55f50b2f86929ae042cd2eee8b2e8ffe00b5a885 (patch)
tree	233e06994cdde602da0703ba6ede84a503884ff7 /arch/x86/kvm/mmu/mmu.c
parent	356dab4efd1a661c7882010c34594fe9cb0048f3 (diff)
parent	61de4c34b51c5b9c7ef8229eb246346254638446 (diff)