summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/44x_tlb.c5
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/Makefile14
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c219
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c23
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c50
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S17
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c2
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c44
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c246
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.c240
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.h27
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c157
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S14
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S14
-rw-r--r--arch/powerpc/kvm/book3s_pr.c43
-rw-r--r--arch/powerpc/kvm/book3s_xics.c1
-rw-r--r--arch/powerpc/kvm/booke.c27
-rw-r--r--arch/powerpc/kvm/e500_mmu.c5
-rw-r--r--arch/powerpc/kvm/e500mc.c2
-rw-r--r--arch/powerpc/kvm/emulate.c48
-rw-r--r--arch/powerpc/kvm/powerpc.c26
23 files changed, 774 insertions, 453 deletions
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 5dd3ab469976..ed0385448148 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -441,6 +441,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *tlbe;
unsigned int gtlb_index;
+ int idx;
gtlb_index = kvmppc_get_gpr(vcpu, ra);
if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) {
@@ -473,6 +474,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
return EMULATE_FAIL;
}
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
if (tlbe_is_host_safe(vcpu, tlbe)) {
gva_t eaddr;
gpa_t gpaddr;
@@ -489,6 +492,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
}
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1,
tlbe->word2);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f862579..ffaef2cb101a 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV
bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
depends on KVM_BOOK3S_64
select MMU_NOTIFIER
+ select CMA
---help---
Support running unmodified book3s_64 guest kernels in
virtual machines on POWER7 and PPC970 processors that have
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 422de3f4d46c..6646c952c5e3 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -5,9 +5,10 @@
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
+KVM := ../../../virt/kvm
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
- eventfd.o)
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
+ $(KVM)/eventfd.o
CFLAGS_44x_tlb.o := -I.
CFLAGS_e500_mmu.o := -I.
@@ -53,7 +54,7 @@ kvm-e500mc-objs := \
kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
- ../../../virt/kvm/coalesced_mmio.o \
+ $(KVM)/coalesced_mmio.o \
fpu.o \
book3s_paired_singles.o \
book3s_pr.o \
@@ -80,14 +81,15 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_64_vio_hv.o \
book3s_hv_ras.o \
book3s_hv_builtin.o \
+ book3s_hv_cma.o \
$(kvm-book3s_64-builtin-xics-objs-y)
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
kvm-book3s_64-module-objs := \
- ../../../virt/kvm/kvm_main.o \
- ../../../virt/kvm/eventfd.o \
+ $(KVM)/kvm_main.o \
+ $(KVM)/eventfd.o \
powerpc.o \
emulate.o \
book3s.o \
@@ -111,7 +113,7 @@ kvm-book3s_32-objs := \
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
-kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index b871721c0050..7e345e00661a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -26,6 +26,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
/* #define DEBUG_MMU */
@@ -76,6 +77,24 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
return NULL;
}
+static int kvmppc_slb_sid_shift(struct kvmppc_slb *slbe)
+{
+ return slbe->tb ? SID_SHIFT_1T : SID_SHIFT;
+}
+
+static u64 kvmppc_slb_offset_mask(struct kvmppc_slb *slbe)
+{
+ return (1ul << kvmppc_slb_sid_shift(slbe)) - 1;
+}
+
+static u64 kvmppc_slb_calc_vpn(struct kvmppc_slb *slb, gva_t eaddr)
+{
+ eaddr &= kvmppc_slb_offset_mask(slb);
+
+ return (eaddr >> VPN_SHIFT) |
+ ((slb->vsid) << (kvmppc_slb_sid_shift(slb) - VPN_SHIFT));
+}
+
static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
bool data)
{
@@ -85,11 +104,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
if (!slb)
return 0;
- if (slb->tb)
- return (((u64)eaddr >> 12) & 0xfffffff) |
- (((u64)slb->vsid) << 28);
-
- return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16);
+ return kvmppc_slb_calc_vpn(slb, eaddr);
}
static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
@@ -100,7 +115,8 @@ static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
{
int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
- return ((eaddr & 0xfffffff) >> p);
+
+ return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
}
static hva_t kvmppc_mmu_book3s_64_get_pteg(
@@ -109,13 +125,15 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
bool second)
{
u64 hash, pteg, htabsize;
- u32 page;
+ u32 ssize;
hva_t r;
+ u64 vpn;
- page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1);
- hash = slbe->vsid ^ page;
+ vpn = kvmppc_slb_calc_vpn(slbe, eaddr);
+ ssize = slbe->tb ? MMU_SEGSIZE_1T : MMU_SEGSIZE_256M;
+ hash = hpt_hash(vpn, kvmppc_mmu_book3s_64_get_pagesize(slbe), ssize);
if (second)
hash = ~hash;
hash &= ((1ULL << 39ULL) - 1ULL);
@@ -146,7 +164,7 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
u64 avpn;
avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
- avpn |= slbe->vsid << (28 - p);
+ avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
if (p < 24)
avpn >>= ((80 - p) - 56) - 8;
@@ -164,11 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
hva_t ptegp;
u64 pteg[16];
u64 avpn = 0;
+ u64 v, r;
+ u64 v_val, v_mask;
+ u64 eaddr_mask;
int i;
- u8 key = 0;
+ u8 pp, key = 0;
bool found = false;
- bool perm_err = false;
- int second = 0;
+ bool second = false;
ulong mp_ea = vcpu->arch.magic_page_ea;
/* Magic page override */
@@ -190,13 +210,23 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
if (!slbe)
goto no_seg_found;
+ avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
+ v_val = avpn & HPTE_V_AVPN;
+
+ if (slbe->tb)
+ v_val |= SLB_VSID_B_1T;
+ if (slbe->large)
+ v_val |= HPTE_V_LARGE;
+ v_val |= HPTE_V_VALID;
+
+ v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
+ HPTE_V_SECONDARY;
+
do_second:
ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
if (kvm_is_error_hva(ptegp))
goto no_page_found;
- avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
-
if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
goto no_page_found;
@@ -208,101 +238,75 @@ do_second:
key = 4;
for (i=0; i<16; i+=2) {
- u64 v = pteg[i];
- u64 r = pteg[i+1];
-
- /* Valid check */
- if (!(v & HPTE_V_VALID))
- continue;
- /* Hash check */
- if ((v & HPTE_V_SECONDARY) != second)
- continue;
-
- /* AVPN compare */
- if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) {
- u8 pp = (r & HPTE_R_PP) | key;
- int eaddr_mask = 0xFFF;
-
- gpte->eaddr = eaddr;
- gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu,
- eaddr,
- data);
- if (slbe->large)
- eaddr_mask = 0xFFFFFF;
- gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask);
- gpte->may_execute = ((r & HPTE_R_N) ? false : true);
- gpte->may_read = false;
- gpte->may_write = false;
-
- switch (pp) {
- case 0:
- case 1:
- case 2:
- case 6:
- gpte->may_write = true;
- /* fall through */
- case 3:
- case 5:
- case 7:
- gpte->may_read = true;
- break;
- }
-
- if (!gpte->may_read) {
- perm_err = true;
- continue;
- }
-
- dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
- "-> 0x%lx\n",
- eaddr, avpn, gpte->vpage, gpte->raddr);
+ /* Check all relevant fields of 1st dword */
+ if ((pteg[i] & v_mask) == v_val) {
found = true;
break;
}
}
- /* Update PTE R and C bits, so the guest's swapper knows we used the
- * page */
- if (found) {
- u32 oldr = pteg[i+1];
-
- if (gpte->may_read) {
- /* Set the accessed flag */
- pteg[i+1] |= HPTE_R_R;
- }
- if (gpte->may_write) {
- /* Set the dirty flag */
- pteg[i+1] |= HPTE_R_C;
- } else {
- dprintk("KVM: Mapping read-only page!\n");
- }
-
- /* Write back into the PTEG */
- if (pteg[i+1] != oldr)
- copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
+ if (!found) {
+ if (second)
+ goto no_page_found;
+ v_val |= HPTE_V_SECONDARY;
+ second = true;
+ goto do_second;
+ }
- return 0;
- } else {
- dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx "
- "ptegp=0x%lx)\n",
- eaddr, to_book3s(vcpu)->sdr1, ptegp);
- for (i = 0; i < 16; i += 2)
- dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n",
- i, pteg[i], pteg[i+1], avpn);
-
- if (!second) {
- second = HPTE_V_SECONDARY;
- goto do_second;
- }
+ v = pteg[i];
+ r = pteg[i+1];
+ pp = (r & HPTE_R_PP) | key;
+ eaddr_mask = 0xFFF;
+
+ gpte->eaddr = eaddr;
+ gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
+ if (slbe->large)
+ eaddr_mask = 0xFFFFFF;
+ gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
+ gpte->may_execute = ((r & HPTE_R_N) ? false : true);
+ gpte->may_read = false;
+ gpte->may_write = false;
+
+ switch (pp) {
+ case 0:
+ case 1:
+ case 2:
+ case 6:
+ gpte->may_write = true;
+ /* fall through */
+ case 3:
+ case 5:
+ case 7:
+ gpte->may_read = true;
+ break;
}
+ dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
+ "-> 0x%lx\n",
+ eaddr, avpn, gpte->vpage, gpte->raddr);
-no_page_found:
+ /* Update PTE R and C bits, so the guest's swapper knows we used the
+ * page */
+ if (gpte->may_read) {
+ /* Set the accessed flag */
+ r |= HPTE_R_R;
+ }
+ if (data && gpte->may_write) {
+ /* Set the dirty flag -- XXX even if not writing */
+ r |= HPTE_R_C;
+ }
+ /* Write back into the PTEG */
+ if (pteg[i+1] != r) {
+ pteg[i+1] = r;
+ copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
+ }
- if (perm_err)
+ if (!gpte->may_read)
return -EPERM;
+ return 0;
+no_page_found:
return -ENOENT;
no_seg_found:
@@ -334,7 +338,7 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
slbe->esid = slbe->tb ? esid_1t : esid;
- slbe->vsid = rs >> 12;
+ slbe->vsid = (rs & ~SLB_VSID_B) >> (kvmppc_slb_sid_shift(slbe) - 16);
slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;
slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0;
@@ -375,6 +379,7 @@ static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
{
struct kvmppc_slb *slbe;
+ u64 seg_size;
dprintk("KVM MMU: slbie(0x%llx)\n", ea);
@@ -386,8 +391,11 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid);
slbe->valid = false;
+ slbe->orige = 0;
+ slbe->origv = 0;
- kvmppc_mmu_map_segment(vcpu, ea);
+ seg_size = 1ull << kvmppc_slb_sid_shift(slbe);
+ kvmppc_mmu_flush_segment(vcpu, ea & ~(seg_size - 1), seg_size);
}
static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
@@ -396,8 +404,11 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
dprintk("KVM MMU: slbia()\n");
- for (i = 1; i < vcpu->arch.slb_nr; i++)
+ for (i = 1; i < vcpu->arch.slb_nr; i++) {
vcpu->arch.slb[i].valid = false;
+ vcpu->arch.slb[i].orige = 0;
+ vcpu->arch.slb[i].origv = 0;
+ }
if (vcpu->arch.shared->msr & MSR_IR) {
kvmppc_mmu_flush_segments(vcpu);
@@ -467,8 +478,14 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
- if (slb)
+ if (slb) {
gvsid = slb->vsid;
+ if (slb->tb) {
+ gvsid <<= SID_SHIFT_1T - SID_SHIFT;
+ gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
+ gvsid |= VSID_1T;
+ }
+ }
}
switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 3a9a1aceb14f..e5240524bf6c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -34,7 +34,7 @@
void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
- MMU_PAGE_4K, MMU_SEGSIZE_256M,
+ MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M,
false);
}
@@ -301,6 +301,23 @@ out:
return r;
}
+void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
+{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong seg_mask = -seg_size;
+ int i;
+
+ for (i = 1; i < svcpu->slb_max; i++) {
+ if ((svcpu->slb[i].esid & SLB_ESID_V) &&
+ (svcpu->slb[i].esid & seg_mask) == ea) {
+ /* Invalidate this entry */
+ svcpu->slb[i].esid = 0;
+ }
+ }
+
+ svcpu_put(svcpu);
+}
+
void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
{
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
@@ -325,9 +342,9 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
return -1;
vcpu3s->context_id[0] = err;
- vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1)
+ vcpu3s->proto_vsid_max = ((u64)(vcpu3s->context_id[0] + 1)
<< ESID_BITS) - 1;
- vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << ESID_BITS;
+ vcpu3s->proto_vsid_first = (u64)vcpu3s->context_id[0] << ESID_BITS;
vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first;
kvmppc_mmu_hpte_init(vcpu);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 5880dfb31074..043eec8461e7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,6 +37,8 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
+#include "book3s_hv_cma.h"
+
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
@@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{
unsigned long hpt;
struct revmap_entry *rev;
- struct kvmppc_linear_info *li;
- long order = kvm_hpt_order;
+ struct page *page = NULL;
+ long order = KVM_DEFAULT_HPT_ORDER;
if (htab_orderp) {
order = *htab_orderp;
@@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
order = PPC_MIN_HPT_ORDER;
}
+ kvm->arch.hpt_cma_alloc = 0;
/*
- * If the user wants a different size from default,
* try first to allocate it from the kernel page allocator.
+ * We keep the CMA reserved for failed allocation.
*/
- hpt = 0;
- if (order != kvm_hpt_order) {
- hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
- __GFP_NOWARN, order - PAGE_SHIFT);
- if (!hpt)
- --order;
- }
+ hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
+ __GFP_NOWARN, order - PAGE_SHIFT);
/* Next try to allocate from the preallocated pool */
if (!hpt) {
- li = kvm_alloc_hpt();
- if (li) {
- hpt = (ulong)li->base_virt;
- kvm->arch.hpt_li = li;
- order = kvm_hpt_order;
- }
+ VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
+ page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+ if (page) {
+ hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ kvm->arch.hpt_cma_alloc = 1;
+ } else
+ --order;
}
/* Lastly try successively smaller sizes from the page allocator */
@@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
return 0;
out_freehpt:
- if (kvm->arch.hpt_li)
- kvm_release_hpt(kvm->arch.hpt_li);
+ if (kvm->arch.hpt_cma_alloc)
+ kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
else
free_pages(hpt, order - PAGE_SHIFT);
return -ENOMEM;
@@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm)
{
kvmppc_free_lpid(kvm->arch.lpid);
vfree(kvm->arch.revmap);
- if (kvm->arch.hpt_li)
- kvm_release_hpt(kvm->arch.hpt_li);
+ if (kvm->arch.hpt_cma_alloc)
+ kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
+ 1 << (kvm->arch.hpt_order - PAGE_SHIFT));
else
free_pages(kvm->arch.hpt_virt,
kvm->arch.hpt_order - PAGE_SHIFT);
@@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
/* if the guest wants write access, see if that is OK */
if (!writing && hpte_is_writable(r)) {
+ unsigned int hugepage_shift;
pte_t *ptep, pte;
/*
@@ -683,9 +684,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
rcu_read_lock_sched();
ptep = find_linux_pte_or_hugepte(current->mm->pgd,
- hva, NULL);
- if (ptep && pte_present(*ptep)) {
- pte = kvmppc_read_update_linux_pte(ptep, 1);
+ hva, &hugepage_shift);
+ if (ptep) {
+ pte = kvmppc_read_update_linux_pte(ptep, 1,
+ hugepage_shift);
if (pte_write(pte))
write_ok = 1;
}
@@ -1577,7 +1579,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
ctx->first_pass = 1;
rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
- ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag);
+ ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
if (ret < 0) {
kvm_put_kvm(kvm);
return ret;
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 56b983e7b738..4f12e8f0c718 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -17,6 +17,10 @@
* Authors: Alexander Graf <agraf@suse.de>
*/
+#ifdef __LITTLE_ENDIAN__
+#error Need to fix SLB shadow accesses in little endian mode
+#endif
+
#define SHADOW_SLB_ESID(num) (SLBSHADOW_SAVEAREA + (num * 0x10))
#define SHADOW_SLB_VSID(num) (SLBSHADOW_SAVEAREA + (num * 0x10) + 0x8)
#define UNBOLT_SLB_ENTRY(num) \
@@ -66,10 +70,6 @@ slb_exit_skip_ ## num:
ld r12, PACA_SLBSHADOWPTR(r13)
- /* Save off the first entry so we can slbie it later */
- ld r10, SHADOW_SLB_ESID(0)(r12)
- ld r11, SHADOW_SLB_VSID(0)(r12)
-
/* Remove bolted entries */
UNBOLT_SLB_ENTRY(0)
UNBOLT_SLB_ENTRY(1)
@@ -81,15 +81,10 @@ slb_exit_skip_ ## num:
/* Flush SLB */
+ li r10, 0
+ slbmte r10, r10
slbia
- /* r0 = esid & ESID_MASK */
- rldicr r10, r10, 0, 35
- /* r0 |= CLASS_BIT(VSID) */
- rldic r12, r11, 56 - 36, 36
- or r10, r10, r12
- slbie r10
-
/* Fill SLB with our shadow */
lbz r12, SVCPU_SLB_MAX(r3)
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index b2d3f3b2de72..54cf9bc94dad 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -136,7 +136,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
mutex_unlock(&kvm->lock);
return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
- stt, O_RDWR);
+ stt, O_RDWR | O_CLOEXEC);
fail:
if (stt) {
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 1f6344c4408d..360ce68c9809 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO:
case SPRN_MSSSR0:
+ case SPRN_DABR:
break;
unprivileged:
default:
@@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO:
case SPRN_MSSSR0:
+ case SPRN_DABR:
*spr_val = 0;
break;
default:
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 550f5928b394..62a2b5ab08ed 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -217,7 +217,7 @@ struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
{
- vpa->shared_proc = 1;
+ vpa->__old_status |= LPPACA_OLD_SHARED_PROC;
vpa->yield_count = 1;
}
@@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+ struct kvm_sregs *sregs)
{
int i;
- sregs->pvr = vcpu->arch.pvr;
-
memset(sregs, 0, sizeof(struct kvm_sregs));
+ sregs->pvr = vcpu->arch.pvr;
for (i = 0; i < vcpu->arch.slb_max; i++) {
sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
@@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+ struct kvm_sregs *sregs)
{
int i, j;
@@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct kvmppc_linear_info *ri = vma->vm_file->private_data;
struct page *page;
+ struct kvm_rma_info *ri = vma->vm_file->private_data;
- if (vmf->pgoff >= ri->npages)
+ if (vmf->pgoff >= kvm_rma_pages)
return VM_FAULT_SIGBUS;
page = pfn_to_page(ri->base_pfn + vmf->pgoff);
@@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
static int kvm_rma_release(struct inode *inode, struct file *filp)
{
- struct kvmppc_linear_info *ri = filp->private_data;
+ struct kvm_rma_info *ri = filp->private_data;
kvm_release_rma(ri);
return 0;
@@ -1549,18 +1548,27 @@ static const struct file_operations kvm_rma_fops = {
long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
{
- struct kvmppc_linear_info *ri;
long fd;
+ struct kvm_rma_info *ri;
+ /*
+ * Only do this on PPC970 in HV mode
+ */
+ if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+ !cpu_has_feature(CPU_FTR_ARCH_201))
+ return -EINVAL;
+
+ if (!kvm_rma_pages)
+ return -EINVAL;
ri = kvm_alloc_rma();
if (!ri)
return -ENOMEM;
- fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
+ fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
if (fd < 0)
kvm_release_rma(ri);
- ret->rma_size = ri->npages << PAGE_SHIFT;
+ ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
return fd;
}
@@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
struct kvm *kvm = vcpu->kvm;
- struct kvmppc_linear_info *ri = NULL;
+ struct kvm_rma_info *ri = NULL;
unsigned long hva;
struct kvm_memory_slot *memslot;
struct vm_area_struct *vma;
@@ -1803,13 +1811,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
} else {
/* Set up to use an RMO region */
- rma_size = ri->npages;
+ rma_size = kvm_rma_pages;
if (rma_size > memslot->npages)
rma_size = memslot->npages;
rma_size <<= PAGE_SHIFT;
rmls = lpcr_rmls(rma_size);
err = -EINVAL;
- if (rmls < 0) {
+ if ((long)rmls < 0) {
pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
goto out_srcu;
}
@@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* POWER7 */
lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
lpcr |= rmls << LPCR_RMLS_SH;
- kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
+ kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
}
kvm->arch.lpcr = lpcr;
pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
/* Initialize phys addrs of pages in RMO */
- npages = ri->npages;
+ npages = kvm_rma_pages;
porder = __ilog2(npages);
physp = memslot->arch.slot_phys;
if (physp) {
@@ -1864,7 +1872,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
up_out:
up_read(&current->mm->mmap_sem);
- goto out;
+ goto out_srcu;
}
int kvmppc_core_init_vm(struct kvm *kvm)
@@ -1874,7 +1882,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
/* Allocate the guest's logical partition ID */
lpid = kvmppc_alloc_lpid();
- if (lpid < 0)
+ if ((long)lpid < 0)
return -ENOMEM;
kvm->arch.lpid = lpid;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ec0a9e5de100..8cd0daebb82d 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -13,33 +13,34 @@
#include <linux/spinlock.h>
#include <linux/bootmem.h>
#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
#include <asm/cputable.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#define KVM_LINEAR_RMA 0
-#define KVM_LINEAR_HPT 1
-
-static void __init kvm_linear_init_one(ulong size, int count, int type);
-static struct kvmppc_linear_info *kvm_alloc_linear(int type);
-static void kvm_release_linear(struct kvmppc_linear_info *ri);
-
-int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
-EXPORT_SYMBOL_GPL(kvm_hpt_order);
-
-/*************** RMA *************/
-
+#include "book3s_hv_cma.h"
+/*
+ * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
+ * should be power of 2.
+ */
+#define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */
+/*
+ * By default we reserve 5% of memory for hash pagetable allocation.
+ */
+static unsigned long kvm_cma_resv_ratio = 5;
/*
- * This maintains a list of RMAs (real mode areas) for KVM guests to use.
+ * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
* Each RMA has to be physically contiguous and of a size that the
* hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
* and other larger sizes. Since we are unlikely to be allocate that
* much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot for KVM to use.
+ * we preallocate a set of RMAs in early boot using CMA.
+ * should be power of 2.
*/
-static unsigned long kvm_rma_size = 64 << 20; /* 64MB */
-static unsigned long kvm_rma_count;
+unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
+EXPORT_SYMBOL_GPL(kvm_rma_pages);
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */
@@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size)
static int __init early_parse_rma_size(char *p)
{
- if (!p)
- return 1;
+ unsigned long kvm_rma_size;
+ pr_debug("%s(%s)\n", __func__, p);
+ if (!p)
+ return -EINVAL;
kvm_rma_size = memparse(p, &p);
-
+ /*
+ * Check that the requested size is one supported in hardware
+ */
+ if (lpcr_rmls(kvm_rma_size) < 0) {
+ pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+ return -EINVAL;
+ }
+ kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
return 0;
}
early_param("kvm_rma_size", early_parse_rma_size);
-static int __init early_parse_rma_count(char *p)
+struct kvm_rma_info *kvm_alloc_rma()
{
- if (!p)
- return 1;
-
- kvm_rma_count = simple_strtoul(p, NULL, 0);
-
- return 0;
-}
-early_param("kvm_rma_count", early_parse_rma_count);
-
-struct kvmppc_linear_info *kvm_alloc_rma(void)
-{
- return kvm_alloc_linear(KVM_LINEAR_RMA);
+ struct page *page;
+ struct kvm_rma_info *ri;
+
+ ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
+ if (!ri)
+ return NULL;
+ page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+ if (!page)
+ goto err_out;
+ atomic_set(&ri->use_count, 1);
+ ri->base_pfn = page_to_pfn(page);
+ return ri;
+err_out:
+ kfree(ri);
+ return NULL;
}
EXPORT_SYMBOL_GPL(kvm_alloc_rma);
-void kvm_release_rma(struct kvmppc_linear_info *ri)
+void kvm_release_rma(struct kvm_rma_info *ri)
{
- kvm_release_linear(ri);
+ if (atomic_dec_and_test(&ri->use_count)) {
+ kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+ kfree(ri);
+ }
}
EXPORT_SYMBOL_GPL(kvm_release_rma);
-/*************** HPT *************/
-
-/*
- * This maintains a list of big linear HPT tables that contain the GVA->HPA
- * memory mappings. If we don't reserve those early on, we might not be able
- * to get a big (usually 16MB) linear memory region from the kernel anymore.
- */
-
-static unsigned long kvm_hpt_count;
-
-static int __init early_parse_hpt_count(char *p)
+static int __init early_parse_kvm_cma_resv(char *p)
{
+ pr_debug("%s(%s)\n", __func__, p);
if (!p)
- return 1;
-
- kvm_hpt_count = simple_strtoul(p, NULL, 0);
-
- return 0;
+ return -EINVAL;
+ return kstrtoul(p, 0, &kvm_cma_resv_ratio);
}
-early_param("kvm_hpt_count", early_parse_hpt_count);
+early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
-struct kvmppc_linear_info *kvm_alloc_hpt(void)
+struct page *kvm_alloc_hpt(unsigned long nr_pages)
{
- return kvm_alloc_linear(KVM_LINEAR_HPT);
+ unsigned long align_pages = HPT_ALIGN_PAGES;
+
+ /* Old CPUs require HPT aligned on a multiple of its size */
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ align_pages = nr_pages;
+ return kvm_alloc_cma(nr_pages, align_pages);
}
EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
-void kvm_release_hpt(struct kvmppc_linear_info *li)
+void kvm_release_hpt(struct page *page, unsigned long nr_pages)
{
- kvm_release_linear(li);
+ kvm_release_cma(page, nr_pages);
}
EXPORT_SYMBOL_GPL(kvm_release_hpt);
-/*************** generic *************/
-
-static LIST_HEAD(free_linears);
-static DEFINE_SPINLOCK(linear_lock);
-
-static void __init kvm_linear_init_one(ulong size, int count, int type)
-{
- unsigned long i;
- unsigned long j, npages;
- void *linear;
- struct page *pg;
- const char *typestr;
- struct kvmppc_linear_info *linear_info;
-
- if (!count)
- return;
-
- typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
-
- npages = size >> PAGE_SHIFT;
- linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
- for (i = 0; i < count; ++i) {
- linear = alloc_bootmem_align(size, size);
- pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
- size >> 20);
- linear_info[i].base_virt = linear;
- linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
- linear_info[i].npages = npages;
- linear_info[i].type = type;
- list_add_tail(&linear_info[i].list, &free_linears);
- atomic_set(&linear_info[i].use_count, 0);
-
- pg = pfn_to_page(linear_info[i].base_pfn);
- for (j = 0; j < npages; ++j) {
- atomic_inc(&pg->_count);
- ++pg;
- }
- }
-}
-
-static struct kvmppc_linear_info *kvm_alloc_linear(int type)
-{
- struct kvmppc_linear_info *ri, *ret;
-
- ret = NULL;
- spin_lock(&linear_lock);
- list_for_each_entry(ri, &free_linears, list) {
- if (ri->type != type)
- continue;
-
- list_del(&ri->list);
- atomic_inc(&ri->use_count);
- memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
- ret = ri;
- break;
- }
- spin_unlock(&linear_lock);
- return ret;
-}
-
-static void kvm_release_linear(struct kvmppc_linear_info *ri)
-{
- if (atomic_dec_and_test(&ri->use_count)) {
- spin_lock(&linear_lock);
- list_add_tail(&ri->list, &free_linears);
- spin_unlock(&linear_lock);
-
- }
-}
-
-/*
- * Called at boot time while the bootmem allocator is active,
- * to allocate contiguous physical memory for the hash page
- * tables for guests.
+/**
+ * kvm_cma_reserve() - reserve area for kvm hash pagetable
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
*/
-void __init kvm_linear_init(void)
+void __init kvm_cma_reserve(void)
{
- /* HPT */
- kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
-
- /* RMA */
- /* Only do this on PPC970 in HV mode */
- if (!cpu_has_feature(CPU_FTR_HVMODE) ||
- !cpu_has_feature(CPU_FTR_ARCH_201))
- return;
-
- if (!kvm_rma_size || !kvm_rma_count)
- return;
-
- /* Check that the requested size is one supported in hardware */
- if (lpcr_rmls(kvm_rma_size) < 0) {
- pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
- return;
+ unsigned long align_size;
+ struct memblock_region *reg;
+ phys_addr_t selected_size = 0;
+ /*
+ * We cannot use memblock_phys_mem_size() here, because
+ * memblock_analyze() has not been called yet.
+ */
+ for_each_memblock(memory, reg)
+ selected_size += memblock_region_memory_end_pfn(reg) -
+ memblock_region_memory_base_pfn(reg);
+
+ selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
+ if (selected_size) {
+ pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+ (unsigned long)selected_size / SZ_1M);
+ /*
+ * Old CPUs require HPT aligned on a multiple of its size. So for them
+ * make the alignment as max size we could request.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ align_size = __rounddown_pow_of_two(selected_size);
+ else
+ align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+
+ align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+ kvm_cma_declare_contiguous(selected_size, align_size);
}
-
- kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
}
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
new file mode 100644
index 000000000000..d9d3d8553d51
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -0,0 +1,240 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+#define pr_fmt(fmt) "kvm_cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+# define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#include "book3s_hv_cma.h"
+
+struct kvm_cma {
+ unsigned long base_pfn;
+ unsigned long count;
+ unsigned long *bitmap;
+};
+
+static DEFINE_MUTEX(kvm_cma_mutex);
+static struct kvm_cma kvm_cma_area;
+
+/**
+ * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
+ * for kvm hash pagetable
+ * @size: Size of the reserved memory.
+ * @alignment: Alignment for the contiguous memory area
+ *
+ * This function reserves memory for kvm cma area. It should be
+ * called by arch code when early allocator (memblock or bootmem)
+ * is still activate.
+ */
+long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
+{
+ long base_pfn;
+ phys_addr_t addr;
+ struct kvm_cma *cma = &kvm_cma_area;
+
+ pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
+
+ if (!size)
+ return -EINVAL;
+ /*
+ * Sanitise input arguments.
+ * We should be pageblock aligned for CMA.
+ */
+ alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
+ size = ALIGN(size, alignment);
+ /*
+ * Reserve memory
+ * Use __memblock_alloc_base() since
+ * memblock_alloc_base() panic()s.
+ */
+ addr = __memblock_alloc_base(size, alignment, 0);
+ if (!addr) {
+ base_pfn = -ENOMEM;
+ goto err;
+ } else
+ base_pfn = PFN_DOWN(addr);
+
+ /*
+ * Each reserved area must be initialised later, when more kernel
+ * subsystems (like slab allocator) are available.
+ */
+ cma->base_pfn = base_pfn;
+ cma->count = size >> PAGE_SHIFT;
+ pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
+ return 0;
+err:
+ pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+ return base_pfn;
+}
+
+/**
+ * kvm_alloc_cma() - allocate pages from contiguous area
+ * @nr_pages: Requested number of pages.
+ * @align_pages: Requested alignment in number of pages
+ *
+ * This function allocates memory buffer for hash pagetable.
+ */
+struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
+{
+ int ret;
+ struct page *page = NULL;
+ struct kvm_cma *cma = &kvm_cma_area;
+ unsigned long chunk_count, nr_chunk;
+ unsigned long mask, pfn, pageno, start = 0;
+
+
+ if (!cma || !cma->count)
+ return NULL;
+
+ pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
+ (void *)cma, nr_pages, align_pages);
+
+ if (!nr_pages)
+ return NULL;
+ /*
+ * align mask with chunk size. The bit tracks pages in chunk size
+ */
+ VM_BUG_ON(!is_power_of_2(align_pages));
+ mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
+ BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
+
+ chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+ nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
+ mutex_lock(&kvm_cma_mutex);
+ for (;;) {
+ pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
+ start, nr_chunk, mask);
+ if (pageno >= chunk_count)
+ break;
+
+ pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
+ ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
+ if (ret == 0) {
+ bitmap_set(cma->bitmap, pageno, nr_chunk);
+ page = pfn_to_page(pfn);
+ memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
+ break;
+ } else if (ret != -EBUSY) {
+ break;
+ }
+ pr_debug("%s(): memory range at %p is busy, retrying\n",
+ __func__, pfn_to_page(pfn));
+ /* try again with a bit different memory target */
+ start = pageno + mask + 1;
+ }
+ mutex_unlock(&kvm_cma_mutex);
+ pr_debug("%s(): returned %p\n", __func__, page);
+ return page;
+}
+
+/**
+ * kvm_release_cma() - release allocated pages for hash pagetable
+ * @pages: Allocated pages.
+ * @nr_pages: Number of allocated pages.
+ *
+ * This function releases memory allocated by kvm_alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
+{
+ unsigned long pfn;
+ unsigned long nr_chunk;
+ struct kvm_cma *cma = &kvm_cma_area;
+
+ if (!cma || !pages)
+ return false;
+
+ pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
+
+ pfn = page_to_pfn(pages);
+
+ if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+ return false;
+
+ VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+ nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
+ mutex_lock(&kvm_cma_mutex);
+ bitmap_clear(cma->bitmap,
+ (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
+ nr_chunk);
+ free_contig_range(pfn, nr_pages);
+ mutex_unlock(&kvm_cma_mutex);
+
+ return true;
+}
+
+static int __init kvm_cma_activate_area(unsigned long base_pfn,
+ unsigned long count)
+{
+ unsigned long pfn = base_pfn;
+ unsigned i = count >> pageblock_order;
+ struct zone *zone;
+
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ zone = page_zone(pfn_to_page(pfn));
+ do {
+ unsigned j;
+ base_pfn = pfn;
+ for (j = pageblock_nr_pages; j; --j, pfn++) {
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ /*
+ * alloc_contig_range requires the pfn range
+ * specified to be in the same zone. Make this
+ * simple by forcing the entire CMA resv range
+ * to be in the same zone.
+ */
+ if (page_zone(pfn_to_page(pfn)) != zone)
+ return -EINVAL;
+ }
+ init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+ } while (--i);
+ return 0;
+}
+
+static int __init kvm_cma_init_reserved_areas(void)
+{
+ int bitmap_size, ret;
+ unsigned long chunk_count;
+ struct kvm_cma *cma = &kvm_cma_area;
+
+ pr_debug("%s()\n", __func__);
+ if (!cma->count)
+ return 0;
+ chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+ bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
+ cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+ if (!cma->bitmap)
+ return -ENOMEM;
+
+ ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
+ if (ret)
+ goto error;
+ return 0;
+
+error:
+ kfree(cma->bitmap);
+ return ret;
+}
+core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
new file mode 100644
index 000000000000..655144f75fa5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -0,0 +1,27 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_CMA_ALLOC_H__
+#define __POWERPC_KVM_CMA_ALLOC_H__
+/*
+ * Both RMA and Hash page allocation will be multiple of 256K.
+ */
+#define KVM_CMA_CHUNK_ORDER 18
+
+extern struct page *kvm_alloc_cma(unsigned long nr_pages,
+ unsigned long align_pages);
+extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
+extern long kvm_cma_declare_contiguous(phys_addr_t size,
+ phys_addr_t alignment) __init;
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 6dcbb49105a4..9c515440ad1a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -27,7 +27,7 @@ static void *real_vmalloc_addr(void *x)
unsigned long addr = (unsigned long) x;
pte_t *p;
- p = find_linux_pte(swapper_pg_dir, addr);
+ p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
if (!p || !pte_present(*p))
return NULL;
/* assume we don't have huge pages in vmalloc space... */
@@ -139,20 +139,18 @@ static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
{
pte_t *ptep;
unsigned long ps = *pte_sizep;
- unsigned int shift;
+ unsigned int hugepage_shift;
- ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
if (!ptep)
return __pte(0);
- if (shift)
- *pte_sizep = 1ul << shift;
+ if (hugepage_shift)
+ *pte_sizep = 1ul << hugepage_shift;
else
*pte_sizep = PAGE_SIZE;
if (ps > *pte_sizep)
return __pte(0);
- if (!pte_present(*ptep))
- return __pte(0);
- return kvmppc_read_update_linux_pte(ptep, writing);
+ return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
}
static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
@@ -365,7 +363,11 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
}
+#ifdef __BIG_ENDIAN__
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
+#else
+#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
+#endif
static inline int try_lock_tlbie(unsigned int *lock)
{
@@ -385,6 +387,80 @@ static inline int try_lock_tlbie(unsigned int *lock)
return old == 0;
}
+/*
+ * tlbie/tlbiel is a bit different on the PPC970 compared to later
+ * processors such as POWER7; the large page bit is in the instruction
+ * not RB, and the top 16 bits and the bottom 12 bits of the VA
+ * in RB must be 0.
+ */
+static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
+ long npages, int global, bool need_sync)
+{
+ long i;
+
+ if (global) {
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ if (need_sync)
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < npages; ++i) {
+ unsigned long rb = rbvalues[i];
+
+ if (rb & 1) /* large page */
+ asm volatile("tlbie %0,1" : :
+ "r" (rb & 0x0000fffffffff000ul));
+ else
+ asm volatile("tlbie %0,0" : :
+ "r" (rb & 0x0000fffffffff000ul));
+ }
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ if (need_sync)
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < npages; ++i) {
+ unsigned long rb = rbvalues[i];
+
+ if (rb & 1) /* large page */
+ asm volatile("tlbiel %0,1" : :
+ "r" (rb & 0x0000fffffffff000ul));
+ else
+ asm volatile("tlbiel %0,0" : :
+ "r" (rb & 0x0000fffffffff000ul));
+ }
+ asm volatile("ptesync" : : : "memory");
+ }
+}
+
+static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
+ long npages, int global, bool need_sync)
+{
+ long i;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ /* PPC970 tlbie instruction is a bit different */
+ do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
+ return;
+ }
+ if (global) {
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ if (need_sync)
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < npages; ++i)
+ asm volatile(PPC_TLBIE(%1,%0) : :
+ "r" (rbvalues[i]), "r" (kvm->arch.lpid));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ if (need_sync)
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < npages; ++i)
+ asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
+ asm volatile("ptesync" : : : "memory");
+ }
+}
+
long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
unsigned long pte_index, unsigned long avpn,
unsigned long *hpret)
@@ -410,19 +486,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if (v & HPTE_V_VALID) {
hpte[0] &= ~HPTE_V_VALID;
rb = compute_tlbie_rb(v, hpte[1], pte_index);
- if (global_invalidates(kvm, flags)) {
- while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- asm volatile("ptesync" : : : "memory");
- asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
- : : "r" (rb), "r" (kvm->arch.lpid));
- asm volatile("ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
- } else {
- asm volatile("ptesync" : : : "memory");
- asm volatile("tlbiel %0" : : "r" (rb));
- asm volatile("ptesync" : : : "memory");
- }
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
/* Read PTE low word after tlbie to get final R/C values */
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
}
@@ -450,12 +514,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
unsigned long *hp, *hptes[4], tlbrb[4];
long int i, j, k, n, found, indexes[4];
unsigned long flags, req, pte_index, rcbits;
- long int local = 0;
+ int global;
long int ret = H_SUCCESS;
struct revmap_entry *rev, *revs[4];
- if (atomic_read(&kvm->online_vcpus) == 1)
- local = 1;
+ global = global_invalidates(kvm, 0);
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
n = 0;
for (; i < 4; ++i) {
@@ -531,22 +594,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
break;
/* Now that we've collected a batch, do the tlbies */
- if (!local) {
- while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- asm volatile("ptesync" : : : "memory");
- for (k = 0; k < n; ++k)
- asm volatile(PPC_TLBIE(%1,%0) : :
- "r" (tlbrb[k]),
- "r" (kvm->arch.lpid));
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
- } else {
- asm volatile("ptesync" : : : "memory");
- for (k = 0; k < n; ++k)
- asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
- asm volatile("ptesync" : : : "memory");
- }
+ do_tlbies(kvm, tlbrb, n, global, true);
/* Read PTE low words after tlbie to get final R/C values */
for (k = 0; k < n; ++k) {
@@ -605,19 +653,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
if (v & HPTE_V_VALID) {
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = v & ~HPTE_V_VALID;
- if (global_invalidates(kvm, flags)) {
- while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- asm volatile("ptesync" : : : "memory");
- asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
- : : "r" (rb), "r" (kvm->arch.lpid));
- asm volatile("ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
- } else {
- asm volatile("ptesync" : : : "memory");
- asm volatile("tlbiel %0" : : "r" (rb));
- asm volatile("ptesync" : : : "memory");
- }
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
/*
* If the host has this page as readonly but the guest
* wants to make it read/write, reduce the permissions.
@@ -688,13 +724,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
hptep[0] &= ~HPTE_V_VALID;
rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
- while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- asm volatile("ptesync" : : : "memory");
- asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
- : : "r" (rb), "r" (kvm->arch.lpid));
- asm volatile("ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
+ do_tlbies(kvm, &rb, 1, 1, true);
}
EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
@@ -708,12 +738,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
/* modify only the second-last byte, which contains the ref bit */
*((char *)hptep + 14) = rbyte;
- while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
- : : "r" (rb), "r" (kvm->arch.lpid));
- asm volatile("ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
+ do_tlbies(kvm, &rb, 1, 1, false);
}
EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b02f91e4c70d..294b7af28cdd 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -29,6 +29,10 @@
#include <asm/kvm_book3s_asm.h>
#include <asm/mmu-hash64.h>
+#ifdef __LITTLE_ENDIAN__
+#error Need to fix lppaca and SLB shadow accesses in little endian mode
+#endif
+
/*****************************************************************************
* *
* Real Mode handlers that need to be in the linear mapping *
@@ -389,7 +393,11 @@ toc_tlbie_lock:
.tc native_tlbie_lock[TC],native_tlbie_lock
.previous
ld r3,toc_tlbie_lock@toc(2)
+#ifdef __BIG_ENDIAN__
lwz r8,PACA_LOCK_TOKEN(r13)
+#else
+ lwz r8,PACAPACAINDEX(r13)
+#endif
24: lwarx r0,0,r3
cmpwi r0,0
bne 24b
@@ -964,7 +972,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
32: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
/* Take the guest's tlbie_lock */
+#ifdef __BIG_ENDIAN__
lwz r8,PACA_LOCK_TOKEN(r13)
+#else
+ lwz r8,PACAPACAINDEX(r13)
+#endif
addi r3,r4,KVM_TLBIE_LOCK
24: lwarx r0,0,r3
cmpwi r0,0
@@ -1381,7 +1393,7 @@ hcall_try_real_mode:
cmpldi r3,hcall_real_table_end - hcall_real_table
bge guest_exit_cont
LOAD_REG_ADDR(r4, hcall_real_table)
- lwzx r3,r3,r4
+ lwax r3,r3,r4
cmpwi r3,0
beq guest_exit_cont
add r3,r3,r4
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 48cbbf862958..17cfae5497a3 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -92,6 +92,11 @@ kvm_start_lightweight:
PPC_LL r3, VCPU_HFLAGS(r4)
rldicl r3, r3, 0, 63 /* r3 &= 1 */
stb r3, HSTATE_RESTORE_HID5(r13)
+
+ /* Load up guest SPRG3 value, since it's user readable */
+ ld r3, VCPU_SHARED(r4)
+ ld r3, VCPU_SHARED_SPRG3(r3)
+ mtspr SPRN_SPRG3, r3
#endif /* CONFIG_PPC_BOOK3S_64 */
PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
@@ -123,6 +128,15 @@ kvmppc_handler_highmem:
/* R7 = vcpu */
PPC_LL r7, GPR4(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * Reload kernel SPRG3 value.
+ * No need to save guest value as usermode can't modify SPRG3.
+ */
+ ld r3, PACA_SPRG3(r13)
+ mtspr SPRN_SPRG3, r3
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
PPC_STL r14, VCPU_GPR(R14)(r7)
PPC_STL r15, VCPU_GPR(R15)(r7)
PPC_STL r16, VCPU_GPR(R16)(r7)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index bdc40b8e77d9..27db1e665959 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
* both the traditional FP registers and the added VSX
* registers into thread.fpr[].
*/
- giveup_fpu(current);
+ if (current->thread.regs->msr & MSR_FP)
+ giveup_fpu(current);
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
@@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
#ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) {
- giveup_altivec(current);
+ if (current->thread.regs->msr & MSR_VEC)
+ giveup_altivec(current);
memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
vcpu->arch.vscr = t->vscr;
}
@@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
#endif
- current->thread.regs->msr |= msr;
-
if (msr & MSR_FP) {
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
@@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
#endif
}
+ current->thread.regs->msr |= msr;
vcpu->arch.guest_owned_ext |= msr;
kvmppc_recalc_shadow_msr(vcpu);
return RESUME_GUEST;
}
+/*
+ * Kernel code using FP or VMX could have flushed guest state to
+ * the thread_struct; if so, get it back now.
+ */
+static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
+{
+ unsigned long lost_ext;
+
+ lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
+ if (!lost_ext)
+ return;
+
+ if (lost_ext & MSR_FP)
+ kvmppc_load_up_fpu();
+ if (lost_ext & MSR_VEC)
+ kvmppc_load_up_altivec();
+ current->thread.regs->msr |= lost_ext;
+}
+
int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
@@ -772,7 +792,7 @@ program_interrupt:
}
case BOOK3S_INTERRUPT_SYSCALL:
if (vcpu->arch.papr_enabled &&
- (kvmppc_get_last_inst(vcpu) == 0x44000022) &&
+ (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
!(vcpu->arch.shared->msr & MSR_PR)) {
/* SC 1 papr hypercalls */
ulong cmd = kvmppc_get_gpr(vcpu, 3);
@@ -890,8 +910,9 @@ program_interrupt:
local_irq_enable();
r = s;
} else {
- kvmppc_lazy_ee_enable();
+ kvmppc_fix_ee_before_entry();
}
+ kvmppc_handle_lost_ext(vcpu);
}
trace_kvm_book3s_reenter(r, vcpu);
@@ -1047,11 +1068,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
if (err)
goto free_shadow_vcpu;
+ err = -ENOMEM;
p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
- /* the real shared page fills the last 4k of our page */
- vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
if (!p)
goto uninit_vcpu;
+ /* the real shared page fills the last 4k of our page */
+ vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
#ifdef CONFIG_PPC_BOOK3S_64
/* default to book3s_64 (970fx) */
@@ -1161,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
- kvmppc_lazy_ee_enable();
+ kvmppc_fix_ee_before_entry();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
@@ -1239,8 +1261,7 @@ out:
#ifdef CONFIG_PPC64
int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
{
- /* No flags */
- info->flags = 0;
+ info->flags = KVM_PPC_1T_SEGMENTS;
/* SLB is always 64 entries */
info->slb_size = 64;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 94c1dd46b83d..a3a5cb8ee7ea 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -19,6 +19,7 @@
#include <asm/hvcall.h>
#include <asm/xics.h>
#include <asm/debug.h>
+#include <asm/time.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1020119226db..17722d82f1d1 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -673,9 +673,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
ret = s;
goto out;
}
- kvmppc_lazy_ee_enable();
-
- kvm_guest_enter();
#ifdef CONFIG_PPC_FPU
/* Save userspace FPU state in stack */
@@ -699,6 +696,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_load_guest_fp(vcpu);
#endif
+ kvmppc_fix_ee_before_entry();
+
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
/* No need for kvm_guest_exit. It's done in handle_exit.
@@ -795,7 +794,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
kvmppc_fill_pt_regs(&regs);
timer_interrupt(&regs);
break;
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64)
+#if defined(CONFIG_PPC_DOORBELL)
case BOOKE_INTERRUPT_DOORBELL:
kvmppc_fill_pt_regs(&regs);
doorbell_exception(&regs);
@@ -832,6 +831,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
int r = RESUME_HOST;
int s;
+ int idx;
+
+#ifdef CONFIG_PPC64
+ WARN_ON(local_paca->irq_happened != 0);
+#endif
+
+ /*
+ * We enter with interrupts disabled in hardware, but
+ * we need to call hard_irq_disable anyway to ensure that
+ * the software state is kept in sync.
+ */
+ hard_irq_disable();
/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);
@@ -1053,6 +1064,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
gfn = gpaddr >> PAGE_SHIFT;
@@ -1075,6 +1088,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_account_exit(vcpu, MMIO_EXITS);
}
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
@@ -1098,6 +1112,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
gfn = gpaddr >> PAGE_SHIFT;
@@ -1114,6 +1130,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
}
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
@@ -1149,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_enable();
r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
} else {
- kvmppc_lazy_ee_enable();
+ kvmppc_fix_ee_before_entry();
}
}
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index c41a5a96b558..6d6f153b6c1d 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -396,6 +396,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
struct kvm_book3e_206_tlb_entry *gtlbe;
int tlbsel, esel;
int recal = 0;
+ int idx;
tlbsel = get_tlb_tlbsel(vcpu);
esel = get_tlb_esel(vcpu, tlbsel);
@@ -430,6 +431,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
kvmppc_set_tlb1map_range(vcpu, gtlbe);
}
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
if (tlbe_is_host_safe(vcpu, gtlbe)) {
u64 eaddr = get_tlb_eaddr(gtlbe);
@@ -444,6 +447,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel));
}
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
return EMULATE_DONE;
}
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 753cc99eff2b..19c8379575f7 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -177,8 +177,6 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
r = 0;
- else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
- r = 0;
else
r = -ENOTSUPP;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 631a2650e4e4..751cd45f65a0 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -30,53 +30,10 @@
#include <asm/byteorder.h>
#include <asm/kvm_ppc.h>
#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
#include "timing.h"
#include "trace.h"
-#define OP_TRAP 3
-#define OP_TRAP_64 2
-
-#define OP_31_XOP_TRAP 4
-#define OP_31_XOP_LWZX 23
-#define OP_31_XOP_DCBST 54
-#define OP_31_XOP_TRAP_64 68
-#define OP_31_XOP_DCBF 86
-#define OP_31_XOP_LBZX 87
-#define OP_31_XOP_STWX 151
-#define OP_31_XOP_STBX 215
-#define OP_31_XOP_LBZUX 119
-#define OP_31_XOP_STBUX 247
-#define OP_31_XOP_LHZX 279
-#define OP_31_XOP_LHZUX 311
-#define OP_31_XOP_MFSPR 339
-#define OP_31_XOP_LHAX 343
-#define OP_31_XOP_STHX 407
-#define OP_31_XOP_STHUX 439
-#define OP_31_XOP_MTSPR 467
-#define OP_31_XOP_DCBI 470
-#define OP_31_XOP_LWBRX 534
-#define OP_31_XOP_TLBSYNC 566
-#define OP_31_XOP_STWBRX 662
-#define OP_31_XOP_LHBRX 790
-#define OP_31_XOP_STHBRX 918
-
-#define OP_LWZ 32
-#define OP_LD 58
-#define OP_LWZU 33
-#define OP_LBZ 34
-#define OP_LBZU 35
-#define OP_STW 36
-#define OP_STWU 37
-#define OP_STD 62
-#define OP_STB 38
-#define OP_STBU 39
-#define OP_LHZ 40
-#define OP_LHZU 41
-#define OP_LHA 42
-#define OP_LHAU 43
-#define OP_STH 44
-#define OP_STHU 45
-
void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
{
unsigned long dec_nsec;
@@ -169,6 +126,9 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
vcpu->arch.shared->sprg3 = spr_val;
break;
+ /* PIR can legally be written, but we ignore it */
+ case SPRN_PIR: break;
+
default:
emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
spr_val);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6316ee336e88..07c0106fab76 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
kvm_guest_exit();
continue;
}
-
- trace_hardirqs_on();
#endif
kvm_guest_enter();
@@ -420,6 +418,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
return kvmppc_core_create_memslot(slot, npages);
}
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
@@ -823,39 +825,39 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
#endif
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC: {
- struct file *filp;
+ struct fd f;
struct kvm_device *dev;
r = -EBADF;
- filp = fget(cap->args[0]);
- if (!filp)
+ f = fdget(cap->args[0]);
+ if (!f.file)
break;
r = -EPERM;
- dev = kvm_device_from_filp(filp);
+ dev = kvm_device_from_filp(f.file);
if (dev)
r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
- fput(filp);
+ fdput(f);
break;
}
#endif
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS: {
- struct file *filp;
+ struct fd f;
struct kvm_device *dev;
r = -EBADF;
- filp = fget(cap->args[0]);
- if (!filp)
+ f = fdget(cap->args[0]);
+ if (!f.file)
break;
r = -EPERM;
- dev = kvm_device_from_filp(filp);
+ dev = kvm_device_from_filp(f.file);
if (dev)
r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
- fput(filp);
+ fdput(f);
break;
}
#endif /* CONFIG_KVM_XICS */