summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/kvm/mmu/mmu.c23
-rw-r--r--arch/x86/kvm/x86.c13
-rw-r--r--include/linux/kvm_host.h36
-rw-r--r--include/linux/pagemap.h1
-rw-r--r--mm/truncate.c3
-rw-r--r--virt/kvm/Kconfig8
-rw-r--r--virt/kvm/guest_memfd.c167
9 files changed, 231 insertions, 26 deletions
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 5187fcf4b610..566d19b02483 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -139,6 +139,9 @@ KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL(get_untagged_addr)
KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
+KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
+KVM_X86_OP_OPTIONAL_RET0(private_max_mapping_level)
+KVM_X86_OP_OPTIONAL(gmem_invalidate)
#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ece45b3f6f20..8340fe9c4459 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1812,6 +1812,9 @@ struct kvm_x86_ops {
gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
+ int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
+ void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
+ int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn);
};
struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 662f62dfb2aa..037b8fbf4e6c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4280,6 +4280,25 @@ static inline u8 kvm_max_level_for_order(int order)
return PG_LEVEL_4K;
}
+static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
+ u8 max_level, int gmem_order)
+{
+ u8 req_max_level;
+
+ if (max_level == PG_LEVEL_4K)
+ return PG_LEVEL_4K;
+
+ max_level = min(kvm_max_level_for_order(gmem_order), max_level);
+ if (max_level == PG_LEVEL_4K)
+ return PG_LEVEL_4K;
+
+ req_max_level = static_call(kvm_x86_private_max_mapping_level)(kvm, pfn);
+ if (req_max_level)
+ max_level = min(max_level, req_max_level);
+
+ return req_max_level;
+}
+
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault)
{
@@ -4297,9 +4316,9 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
return r;
}
- fault->max_level = min(kvm_max_level_for_order(max_order),
- fault->max_level);
fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY);
+ fault->max_level = kvm_max_private_mapping_level(vcpu->kvm, fault->pfn,
+ fault->max_level, max_order);
return RET_PF_CONTINUE;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 082ac6d95a3a..4b9ee2ca5675 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13599,6 +13599,19 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
+#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
+int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
+{
+ return static_call(kvm_x86_gmem_prepare)(kvm, pfn, gfn, max_order);
+}
+#endif
+
+#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
+void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
+{
+ static_call_cond(kvm_x86_gmem_invalidate)(start, end);
+}
+#endif
int kvm_spec_ctrl_test_value(u64 value)
{
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 692c01e41a18..7b57878c8c18 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2441,4 +2441,40 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
}
#endif /* CONFIG_KVM_PRIVATE_MEM */
+#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
+int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
+bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
+#endif
+
+/**
+ * kvm_gmem_populate() - Populate/prepare a GPA range with guest data
+ *
+ * @kvm: KVM instance
+ * @gfn: starting GFN to be populated
+ * @src: userspace-provided buffer containing data to copy into GFN range
+ * (passed to @post_populate, and incremented on each iteration
+ * if not NULL)
+ * @npages: number of pages to copy from userspace-buffer
+ * @post_populate: callback to issue for each gmem page that backs the GPA
+ * range
+ * @opaque: opaque data to pass to @post_populate callback
+ *
+ * This is primarily intended for cases where a gmem-backed GPA range needs
+ * to be initialized with userspace-provided data prior to being mapped into
+ * the guest as a private page. This should be called with the slots->lock
+ * held so that caller-enforced invariants regarding the expected memory
+ * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES.
+ *
+ * Returns the number of pages that were populated.
+ */
+typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
+ void __user *src, int order, void *opaque);
+
+long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
+ kvm_gmem_populate_cb post_populate, void *opaque);
+
+#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
+void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
+#endif
+
#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d..f879c1d54da7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -207,6 +207,7 @@ enum mapping_flags {
AS_STABLE_WRITES, /* must wait for writeback before modifying
folio contents */
AS_UNMOVABLE, /* The mapping cannot be moved, ever */
+ AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping */
};
/**
diff --git a/mm/truncate.c b/mm/truncate.c
index 725b150e47ac..c501338c7ebd 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -233,7 +233,8 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
* doing a complex calculation here, and then doing the zeroing
* anyway if the page split fails.
*/
- folio_zero_range(folio, offset, length);
+ if (!(folio->mapping->flags & AS_INACCESSIBLE))
+ folio_zero_range(folio, offset, length);
if (folio_has_private(folio))
folio_invalidate(folio, offset, length);
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 29b73eedfe74..754c6c923427 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -109,3 +109,11 @@ config KVM_GENERIC_PRIVATE_MEM
select KVM_GENERIC_MEMORY_ATTRIBUTES
select KVM_PRIVATE_MEM
bool
+
+config HAVE_KVM_GMEM_PREPARE
+ bool
+ depends on KVM_PRIVATE_MEM
+
+config HAVE_KVM_GMEM_INVALIDATE
+ bool
+ depends on KVM_PRIVATE_MEM
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 0f4e0cf4f158..dfe50c64a552 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -13,14 +13,50 @@ struct kvm_gmem {
struct list_head entry;
};
-static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
+static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio)
+{
+#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
+ struct list_head *gmem_list = &inode->i_mapping->i_private_list;
+ struct kvm_gmem *gmem;
+
+ list_for_each_entry(gmem, gmem_list, entry) {
+ struct kvm_memory_slot *slot;
+ struct kvm *kvm = gmem->kvm;
+ struct page *page;
+ kvm_pfn_t pfn;
+ gfn_t gfn;
+ int rc;
+
+ if (!kvm_arch_gmem_prepare_needed(kvm))
+ continue;
+
+ slot = xa_load(&gmem->bindings, index);
+ if (!slot)
+ continue;
+
+ page = folio_file_page(folio, index);
+ pfn = page_to_pfn(page);
+ gfn = slot->base_gfn + index - slot->gmem.pgoff;
+ rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page)));
+ if (rc) {
+ pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx, error %d.\n",
+ index, rc);
+ return rc;
+ }
+ }
+
+#endif
+ return 0;
+}
+
+static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
{
struct folio *folio;
/* TODO: Support huge pages. */
folio = filemap_grab_folio(inode->i_mapping, index);
- if (IS_ERR_OR_NULL(folio))
- return NULL;
+ if (IS_ERR(folio))
+ return folio;
/*
* Use the up-to-date flag to track whether or not the memory has been
@@ -41,6 +77,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
folio_mark_uptodate(folio);
}
+ if (prepare) {
+ int r = kvm_gmem_prepare_folio(inode, index, folio);
+ if (r < 0) {
+ folio_unlock(folio);
+ folio_put(folio);
+ return ERR_PTR(r);
+ }
+ }
+
/*
* Ignore accessed, referenced, and dirty flags. The memory is
* unevictable and there is no storage to write back to.
@@ -145,9 +190,9 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
break;
}
- folio = kvm_gmem_get_folio(inode, index);
- if (!folio) {
- r = -ENOMEM;
+ folio = kvm_gmem_get_folio(inode, index, true);
+ if (IS_ERR(folio)) {
+ r = PTR_ERR(folio);
break;
}
@@ -298,10 +343,24 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
return MF_DELAYED;
}
+#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
+static void kvm_gmem_free_folio(struct folio *folio)
+{
+ struct page *page = folio_page(folio, 0);
+ kvm_pfn_t pfn = page_to_pfn(page);
+ int order = folio_order(folio);
+
+ kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
+}
+#endif
+
static const struct address_space_operations kvm_gmem_aops = {
.dirty_folio = noop_dirty_folio,
.migrate_folio = kvm_gmem_migrate_folio,
.error_remove_folio = kvm_gmem_error_folio,
+#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
+ .free_folio = kvm_gmem_free_folio,
+#endif
};
static int kvm_gmem_getattr(struct mnt_idmap *idmap, const struct path *path,
@@ -357,6 +416,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
inode->i_private = (void *)(unsigned long)flags;
inode->i_op = &kvm_gmem_iops;
inode->i_mapping->a_ops = &kvm_gmem_aops;
+ inode->i_mapping->flags |= AS_INACCESSIBLE;
inode->i_mode |= S_IFREG;
inode->i_size = size;
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
@@ -482,32 +542,29 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
fput(file);
}
-int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
- gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
+static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
+ gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
{
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
- struct kvm_gmem *gmem;
+ struct kvm_gmem *gmem = file->private_data;
struct folio *folio;
struct page *page;
- struct file *file;
int r;
- file = kvm_gmem_get_file(slot);
- if (!file)
+ if (file != slot->gmem.file) {
+ WARN_ON_ONCE(slot->gmem.file);
return -EFAULT;
+ }
gmem = file->private_data;
-
- if (WARN_ON_ONCE(xa_load(&gmem->bindings, index) != slot)) {
- r = -EIO;
- goto out_fput;
+ if (xa_load(&gmem->bindings, index) != slot) {
+ WARN_ON_ONCE(xa_load(&gmem->bindings, index));
+ return -EIO;
}
- folio = kvm_gmem_get_folio(file_inode(file), index);
- if (!folio) {
- r = -ENOMEM;
- goto out_fput;
- }
+ folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
if (folio_test_hwpoison(folio)) {
r = -EHWPOISON;
@@ -524,9 +581,73 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
out_unlock:
folio_unlock(folio);
-out_fput:
- fput(file);
return r;
}
+
+int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
+{
+ struct file *file = kvm_gmem_get_file(slot);
+ int r;
+
+ if (!file)
+ return -EFAULT;
+
+ r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true);
+ fput(file);
+ return r;
+}
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
+
+long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
+ kvm_gmem_populate_cb post_populate, void *opaque)
+{
+ struct file *file;
+ struct kvm_memory_slot *slot;
+ void __user *p;
+
+ int ret = 0, max_order;
+ long i;
+
+ lockdep_assert_held(&kvm->slots_lock);
+ if (npages < 0)
+ return -EINVAL;
+
+ slot = gfn_to_memslot(kvm, start_gfn);
+ if (!kvm_slot_can_be_private(slot))
+ return -EINVAL;
+
+ file = kvm_gmem_get_file(slot);
+ if (!file)
+ return -EFAULT;
+
+ filemap_invalidate_lock(file->f_mapping);
+
+ npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
+ for (i = 0; i < npages; i += (1 << max_order)) {
+ gfn_t gfn = start_gfn + i;
+ kvm_pfn_t pfn;
+
+ ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false);
+ if (ret)
+ break;
+
+ if (!IS_ALIGNED(gfn, (1 << max_order)) ||
+ (npages - i) < (1 << max_order))
+ max_order = 0;
+
+ p = src ? src + i * PAGE_SIZE : NULL;
+ ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
+
+ put_page(pfn_to_page(pfn));
+ if (ret)
+ break;
+ }
+
+ filemap_invalidate_unlock(file->f_mapping);
+
+ fput(file);
+ return ret && !i ? ret : i;
+}
+EXPORT_SYMBOL_GPL(kvm_gmem_populate);