summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@ozlabs.org>2019-08-26 16:20:47 +1000
committerPaul Mackerras <paulus@ozlabs.org>2020-03-19 16:39:52 +1100
commitcd758a9b57ee85f0733c759e60f42b969c81f27b (patch)
treea6478bebe2aa750d63cb18df69efc92a34b5f367 /arch
parent1c482452d5db0f52e4e8eed95bd7314eec537d78 (diff)
KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot in HPT page fault handler
This makes the same changes in the page fault handler for HPT guests that commits 31c8b0d0694a ("KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot() in page fault handler", 2018-03-01), 71d29f43b633 ("KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size", 2018-09-11) and 6579804c4317 ("KVM: PPC: Book3S HV: Avoid crash from THP collapse during radix page fault", 2018-10-04) made for the page fault handler for radix guests. In summary, where we used to call get_user_pages_fast() and then do special handling for VM_PFNMAP vmas, we now call __get_user_pages_fast() and then __gfn_to_pfn_memslot() if that fails, followed by reading the Linux PTE to get the host PFN, host page size and mapping attributes. This also brings in the change from SetPageDirty() to set_page_dirty_lock() which was done for the radix page fault handler in commit c3856aeb2940 ("KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault handler", 2018-02-23). Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c119
1 files changed, 57 insertions, 62 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 6c372f5c61b6..3aecec890d6f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -485,18 +485,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
__be64 *hptep;
unsigned long mmu_seq, psize, pte_size;
unsigned long gpa_base, gfn_base;
- unsigned long gpa, gfn, hva, pfn;
+ unsigned long gpa, gfn, hva, pfn, hpa;
struct kvm_memory_slot *memslot;
unsigned long *rmap;
struct revmap_entry *rev;
- struct page *page, *pages[1];
- long index, ret, npages;
+ struct page *page;
+ long index, ret;
bool is_ci;
- unsigned int writing, write_ok;
- struct vm_area_struct *vma;
+ bool writing, write_ok;
+ unsigned int shift;
unsigned long rcbits;
long mmio_update;
- struct mm_struct *mm;
+ pte_t pte, *ptep;
if (kvm_is_radix(kvm))
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
@@ -570,59 +570,62 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
smp_rmb();
ret = -EFAULT;
- is_ci = false;
- pfn = 0;
page = NULL;
- mm = kvm->mm;
- pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
- npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
- if (npages < 1) {
- /* Check if it's an I/O mapping */
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, hva);
- if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
- (vma->vm_flags & VM_PFNMAP)) {
- pfn = vma->vm_pgoff +
- ((hva - vma->vm_start) >> PAGE_SHIFT);
- pte_size = psize;
- is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
- write_ok = vma->vm_flags & VM_WRITE;
- }
- up_read(&mm->mmap_sem);
- if (!pfn)
- goto out_put;
+
+ /*
+ * Do a fast check first, since __gfn_to_pfn_memslot doesn't
+ * do it with !atomic && !async, which is how we call it.
+ * We always ask for write permission since the common case
+ * is that the page is writable.
+ */
+ if (__get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ write_ok = true;
} else {
- page = pages[0];
- pfn = page_to_pfn(page);
- if (PageHuge(page)) {
- page = compound_head(page);
- pte_size <<= compound_order(page);
- }
- /* if the guest wants write access, see if that is OK */
- if (!writing && hpte_is_writable(r)) {
- pte_t *ptep, pte;
- unsigned long flags;
- /*
- * We need to protect against page table destruction
- * hugepage split and collapse.
- */
- local_irq_save(flags);
- ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
- if (ptep) {
- pte = kvmppc_read_update_linux_pte(ptep, 1);
- if (__pte_write(pte))
- write_ok = 1;
- }
- local_irq_restore(flags);
+ /* Call KVM generic code to do the slow-path check */
+ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+ writing, &write_ok);
+ if (is_error_noslot_pfn(pfn))
+ return -EFAULT;
+ page = NULL;
+ if (pfn_valid(pfn)) {
+ page = pfn_to_page(pfn);
+ if (PageReserved(page))
+ page = NULL;
}
}
+ /*
+ * Read the PTE from the process' radix tree and use that
+ * so we get the shift and attribute bits.
+ */
+ local_irq_disable();
+ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+ /*
+ * If the PTE disappeared temporarily due to a THP
+ * collapse, just return and let the guest try again.
+ */
+ if (!ptep) {
+ local_irq_enable();
+ if (page)
+ put_page(page);
+ return RESUME_GUEST;
+ }
+ pte = *ptep;
+ local_irq_enable();
+ hpa = pte_pfn(pte) << PAGE_SHIFT;
+ pte_size = PAGE_SIZE;
+ if (shift)
+ pte_size = 1ul << shift;
+ is_ci = pte_ci(pte);
+
if (psize > pte_size)
goto out_put;
+ if (pte_size > psize)
+ hpa |= hva & (pte_size - psize);
/* Check WIMG vs. the actual page we're accessing */
if (!hpte_cache_flags_ok(r, is_ci)) {
@@ -636,14 +639,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
/*
- * Set the HPTE to point to pfn.
- * Since the pfn is at PAGE_SIZE granularity, make sure we
+ * Set the HPTE to point to hpa.
+ * Since the hpa is at PAGE_SIZE granularity, make sure we
* don't mask out lower-order bits if psize < PAGE_SIZE.
*/
if (psize < PAGE_SIZE)
psize = PAGE_SIZE;
- r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
- ((pfn << PAGE_SHIFT) & ~(psize - 1));
+ r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r);
ret = RESUME_GUEST;
@@ -708,20 +710,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
asm volatile("ptesync" : : : "memory");
preempt_enable();
if (page && hpte_is_writable(r))
- SetPageDirty(page);
+ set_page_dirty_lock(page);
out_put:
trace_kvm_page_fault_exit(vcpu, hpte, ret);
- if (page) {
- /*
- * We drop pages[0] here, not page because page might
- * have been set to the head page of a compound, but
- * we have to drop the reference on the correct tail
- * page to match the get inside gup()
- */
- put_page(pages[0]);
- }
+ if (page)
+ put_page(page);
return ret;
out_unlock: