diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/include/asm/vdso/gettimeofday.h | 3 | ||||
-rw-r--r-- | arch/riscv/include/asm/csr.h | 5 | ||||
-rw-r--r-- | arch/riscv/include/asm/kvm_host.h | 21 | ||||
-rw-r--r-- | arch/riscv/include/asm/kvm_vcpu_pmu.h | 16 | ||||
-rw-r--r-- | arch/riscv/include/asm/sbi.h | 38 | ||||
-rw-r--r-- | arch/riscv/include/uapi/asm/kvm.h | 1 | ||||
-rw-r--r-- | arch/riscv/kernel/paravirt.c | 6 | ||||
-rw-r--r-- | arch/riscv/kvm/aia.c | 5 | ||||
-rw-r--r-- | arch/riscv/kvm/main.c | 18 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu.c | 85 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_exit.c | 4 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_onereg.c | 6 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_pmu.c | 260 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi.c | 7 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_hsm.c | 42 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_pmu.c | 17 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_sta.c | 4 | ||||
-rw-r--r-- | arch/riscv/kvm/vm.c | 1 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 8 |
19 files changed, 448 insertions, 99 deletions
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index f0a4cf01e85c..78302f6c2580 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -4,7 +4,6 @@ #ifndef __ASSEMBLY__ -#include <asm/page.h> #include <asm/vdso/timebase.h> #include <asm/barrier.h> #include <asm/unistd.h> @@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void); static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { - return (void *)vd + PAGE_SIZE; + return (void *)vd + (1U << CONFIG_PAGE_SHIFT); } #endif diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 2468c55933cd..25966995da04 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -168,7 +168,8 @@ #define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT) #define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \ (_AC(1, UL) << IRQ_S_TIMER) | \ - (_AC(1, UL) << IRQ_S_EXT)) + (_AC(1, UL) << IRQ_S_EXT) | \ + (_AC(1, UL) << IRQ_PMU_OVF)) /* AIA CSR bits */ #define TOPI_IID_SHIFT 16 @@ -281,7 +282,7 @@ #define CSR_HPMCOUNTER30H 0xc9e #define CSR_HPMCOUNTER31H 0xc9f -#define CSR_SSCOUNTOVF 0xda0 +#define CSR_SCOUNTOVF 0xda0 #define CSR_SSTATUS 0x100 #define CSR_SIE 0x104 diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 484d04a92fa6..d96281278586 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -43,6 +43,17 @@ KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6) +#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \ + BIT(EXC_BREAKPOINT) | \ + BIT(EXC_SYSCALL) | \ + BIT(EXC_INST_PAGE_FAULT) | \ + BIT(EXC_LOAD_PAGE_FAULT) | \ + BIT(EXC_STORE_PAGE_FAULT)) + +#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \ + BIT(IRQ_VS_TIMER) | \ + BIT(IRQ_VS_EXT)) + enum kvm_riscv_hfence_type { KVM_RISCV_HFENCE_UNKNOWN = 0, KVM_RISCV_HFENCE_GVMA_VMID_GPA, @@ -169,6 +180,7 @@ struct kvm_vcpu_csr { struct kvm_vcpu_config { u64 henvcfg; u64 hstateen0; + unsigned long hedeleg; }; struct kvm_vcpu_smstateen_csr { @@ -211,6 +223,7 @@ struct kvm_vcpu_arch { /* CPU context upon Guest VCPU reset */ struct kvm_cpu_context guest_reset_context; + spinlock_t reset_cntx_lock; /* CPU CSR context upon Guest VCPU reset */ struct kvm_vcpu_csr guest_reset_csr; @@ -252,8 +265,9 @@ struct kvm_vcpu_arch { /* Cache pages needed to program page tables with spinlock held */ struct kvm_mmu_memory_cache mmu_page_cache; - /* VCPU power-off state */ - bool power_off; + /* VCPU power state */ + struct kvm_mp_state mp_state; + spinlock_t mp_state_lock; /* Don't run the VCPU (blocked) */ bool pause; @@ -374,8 +388,11 @@ int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask); +void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); +void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); +bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h index 395518a1664e..fa0f535bbbf0 100644 --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h @@ -20,7 +20,7 @@ static_assert(RISCV_KVM_MAX_COUNTERS <= 64); struct kvm_fw_event { /* Current value of the event */ - unsigned long value; + u64 value; /* Event monitoring status */ bool started; @@ -36,6 +36,7 @@ struct kvm_pmc { bool started; /* Monitoring event ID */ unsigned long event_idx; + struct kvm_vcpu *vcpu; }; /* PMU data structure per vcpu */ @@ -50,6 +51,12 @@ struct kvm_pmu { bool init_done; /* Bit map of all the virtual counter used */ DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS); + /* Bit map of all the virtual counter overflown */ + DECLARE_BITMAP(pmc_overflown, RISCV_KVM_MAX_COUNTERS); + /* The address of the counter snapshot area (guest physical address) */ + gpa_t snapshot_addr; + /* The actual data of the snapshot */ + struct riscv_pmu_snapshot_data *sdata; }; #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context) @@ -82,9 +89,14 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba unsigned long ctr_mask, unsigned long flags, unsigned long eidx, u64 evtdata, struct kvm_vcpu_sbi_return *retdata); -int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, +int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, struct kvm_vcpu_sbi_return *retdata); +int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long flags, + struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 6e68f8dff76b..112a0a0d9f46 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -131,6 +131,8 @@ enum sbi_ext_pmu_fid { SBI_EXT_PMU_COUNTER_START, SBI_EXT_PMU_COUNTER_STOP, SBI_EXT_PMU_COUNTER_FW_READ, + SBI_EXT_PMU_COUNTER_FW_READ_HI, + SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, }; union sbi_pmu_ctr_info { @@ -147,6 +149,13 @@ union sbi_pmu_ctr_info { }; }; +/* Data structure to contain the pmu snapshot data */ +struct riscv_pmu_snapshot_data { + u64 ctr_overflow_mask; + u64 ctr_values[64]; + u64 reserved[447]; +}; + #define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0) #define RISCV_PMU_RAW_EVENT_IDX 0x20000 @@ -232,20 +241,22 @@ enum sbi_pmu_ctr_type { #define SBI_PMU_EVENT_IDX_INVALID 0xFFFFFFFF /* Flags defined for config matching function */ -#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0) -#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1) -#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2) -#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3) -#define SBI_PMU_CFG_FLAG_SET_VSINH (1 << 4) -#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5) -#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6) -#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7) +#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0) +#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1) +#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2) +#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3) +#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4) +#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5) +#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6) +#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7) /* Flags defined for counter start function */ -#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0) +#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0) +#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1) /* Flags defined for counter stop function */ -#define SBI_PMU_STOP_FLAG_RESET (1 << 0) +#define SBI_PMU_STOP_FLAG_RESET BIT(0) +#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1) enum sbi_ext_dbcn_fid { SBI_EXT_DBCN_CONSOLE_WRITE = 0, @@ -266,7 +277,7 @@ struct sbi_sta_struct { u8 pad[47]; } __packed; -#define SBI_STA_SHMEM_DISABLE -1 +#define SBI_SHMEM_DISABLE -1 /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 @@ -284,6 +295,7 @@ struct sbi_sta_struct { #define SBI_ERR_ALREADY_AVAILABLE -6 #define SBI_ERR_ALREADY_STARTED -7 #define SBI_ERR_ALREADY_STOPPED -8 +#define SBI_ERR_NO_SHMEM -9 extern unsigned long sbi_spec_version; struct sbiret { @@ -355,8 +367,8 @@ static inline unsigned long sbi_minor_version(void) static inline unsigned long sbi_mk_version(unsigned long major, unsigned long minor) { - return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << - SBI_SPEC_VERSION_MAJOR_SHIFT) | minor; + return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT) + | (minor & SBI_SPEC_VERSION_MINOR_MASK); } int sbi_err_map_linux_errno(int err); diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index b1c503c2959c..e878e7cc3978 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZFA, KVM_RISCV_ISA_EXT_ZTSO, KVM_RISCV_ISA_EXT_ZACAS, + KVM_RISCV_ISA_EXT_SSCOFPMF, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c index 0d6225fd3194..fa6b0339a65d 100644 --- a/arch/riscv/kernel/paravirt.c +++ b/arch/riscv/kernel/paravirt.c @@ -62,7 +62,7 @@ static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi, ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM, lo, hi, flags, 0, 0, 0); if (ret.error) { - if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE) + if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE) pr_warn("Failed to disable steal-time shmem"); else pr_warn("Failed to set steal-time shmem"); @@ -84,8 +84,8 @@ static int pv_time_cpu_online(unsigned int cpu) static int pv_time_cpu_down_prepare(unsigned int cpu) { - return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE, - SBI_STA_SHMEM_DISABLE, 0); + return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE, + SBI_SHMEM_DISABLE, 0); } static u64 pv_time_steal_clock(int cpu) diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index a944294f6f23..0f0a9d11bb5f 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void) enable_percpu_irq(hgei_parent_irq, irq_get_trigger_type(hgei_parent_irq)); csr_set(CSR_HIE, BIT(IRQ_S_GEXT)); + /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */ + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) + csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF)); } void kvm_riscv_aia_disable(void) @@ -558,6 +561,8 @@ void kvm_riscv_aia_disable(void) return; hgctrl = get_cpu_ptr(&aia_hgei); + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) + csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF)); /* Disable per-CPU SGEI interrupt */ csr_clear(CSR_HIE, BIT(IRQ_S_GEXT)); disable_percpu_irq(hgei_parent_irq); diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 225a435d9c9a..bab2ec34cd87 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -22,22 +22,8 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_arch_hardware_enable(void) { - unsigned long hideleg, hedeleg; - - hedeleg = 0; - hedeleg |= (1UL << EXC_INST_MISALIGNED); - hedeleg |= (1UL << EXC_BREAKPOINT); - hedeleg |= (1UL << EXC_SYSCALL); - hedeleg |= (1UL << EXC_INST_PAGE_FAULT); - hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT); - hedeleg |= (1UL << EXC_STORE_PAGE_FAULT); - csr_write(CSR_HEDELEG, hedeleg); - - hideleg = 0; - hideleg |= (1UL << IRQ_VS_SOFT); - hideleg |= (1UL << IRQ_VS_TIMER); - hideleg |= (1UL << IRQ_VS_EXT); - csr_write(CSR_HIDELEG, hideleg); + csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); + csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); /* VS should access only the time counter directly. Everything else should trap */ csr_write(CSR_HCOUNTEREN, 0x02); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index b5ca9f2e98ac..17e21df36cc1 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -64,7 +64,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) memcpy(csr, reset_csr, sizeof(*csr)); + spin_lock(&vcpu->arch.reset_cntx_lock); memcpy(cntx, reset_cntx, sizeof(*cntx)); + spin_unlock(&vcpu->arch.reset_cntx_lock); kvm_riscv_vcpu_fp_reset(vcpu); @@ -102,6 +104,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) struct kvm_cpu_context *cntx; struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; + spin_lock_init(&vcpu->arch.mp_state_lock); + /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; @@ -119,12 +123,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) spin_lock_init(&vcpu->arch.hfence_lock); /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ + spin_lock_init(&vcpu->arch.reset_cntx_lock); + + spin_lock(&vcpu->arch.reset_cntx_lock); cntx = &vcpu->arch.guest_reset_context; cntx->sstatus = SR_SPP | SR_SPIE; cntx->hstatus = 0; cntx->hstatus |= HSTATUS_VTW; cntx->hstatus |= HSTATUS_SPVP; cntx->hstatus |= HSTATUS_SPV; + spin_unlock(&vcpu->arch.reset_cntx_lock); if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx)) return -ENOMEM; @@ -201,7 +209,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && - !vcpu->arch.power_off && !vcpu->arch.pause); + !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) @@ -365,6 +373,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) } } + /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */ + if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) { + if (!(hvip & (1UL << IRQ_PMU_OVF)) && + !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask)) + clear_bit(IRQ_PMU_OVF, v->irqs_pending); + } + /* Sync-up AIA high interrupts */ kvm_riscv_vcpu_aia_sync_interrupts(vcpu); @@ -382,7 +397,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; set_bit(irq, vcpu->arch.irqs_pending); @@ -397,14 +413,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) { /* - * We only allow VS-mode software, timer, and external + * We only allow VS-mode software, timer, counter overflow and external * interrupts when irq is one of the local interrupts * defined by RISC-V privilege specification. */ if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; clear_bit(irq, vcpu->arch.irqs_pending); @@ -429,26 +446,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask); } -void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = true; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } -void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_riscv_vcpu_power_off(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + +void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = false; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); kvm_vcpu_wake_up(vcpu); } +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_riscv_vcpu_power_on(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + +bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (vcpu->arch.power_off) - mp_state->mp_state = KVM_MP_STATE_STOPPED; - else - mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + *mp_state = READ_ONCE(vcpu->arch.mp_state); return 0; } @@ -458,25 +491,36 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; + spin_lock(&vcpu->arch.mp_state_lock); + switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.power_off = false; + WRITE_ONCE(vcpu->arch.mp_state, *mp_state); break; case KVM_MP_STATE_STOPPED: - kvm_riscv_vcpu_power_off(vcpu); + __kvm_riscv_vcpu_power_off(vcpu); break; default: ret = -EINVAL; } + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - /* TODO; To be implemented later. */ - return -EINVAL; + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT); + } else { + vcpu->guest_debug = 0; + vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT); + } + + return 0; } static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) @@ -505,6 +549,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, SMSTATEEN)) cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; } + + cfg->hedeleg = KVM_HEDELEG_DEFAULT; + if (vcpu->guest_debug) + cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -519,6 +567,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_VSEPC, csr->vsepc); csr_write(CSR_VSCAUSE, csr->vscause); csr_write(CSR_VSTVAL, csr->vstval); + csr_write(CSR_HEDELEG, cfg->hedeleg); csr_write(CSR_HVIP, csr->hvip); csr_write(CSR_VSATP, csr->vsatp); csr_write(CSR_HENVCFG, cfg->henvcfg); @@ -584,11 +633,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, - (!vcpu->arch.power_off) && (!vcpu->arch.pause), + (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); kvm_vcpu_srcu_read_lock(vcpu); - if (vcpu->arch.power_off || vcpu->arch.pause) { + if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) { /* * Awaken to handle a signal, request to * sleep again later. diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index 2415722c01b8..5761f95abb60 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -204,6 +204,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run); break; + case EXC_BREAKPOINT: + run->exit_reason = KVM_EXIT_DEBUG; + ret = 0; + break; default: break; } diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 994adc26db4b..c676275ea0a0 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = { /* Multi letter extensions (alphabetically sorted) */ KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSCOFPMF), KVM_ISA_EXT_ARR(SSTC), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), @@ -99,6 +100,9 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) switch (ext) { case KVM_RISCV_ISA_EXT_H: return false; + case KVM_RISCV_ISA_EXT_SSCOFPMF: + /* Sscofpmf depends on interrupt filtering defined in ssaia */ + return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); case KVM_RISCV_ISA_EXT_V: return riscv_v_vstate_ctrl_user_allowed(); default: @@ -116,6 +120,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: + /* There is not architectural config bit to disable sscofpmf completely */ + case KVM_RISCV_ISA_EXT_SSCOFPMF: case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 86391a5061dd..04db1f993c47 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -14,6 +14,7 @@ #include <asm/csr.h> #include <asm/kvm_vcpu_sbi.h> #include <asm/kvm_vcpu_pmu.h> +#include <asm/sbi.h> #include <linux/bitops.h> #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) @@ -39,7 +40,7 @@ static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc) u64 sample_period; if (!pmc->counter_val) - sample_period = counter_val_mask + 1; + sample_period = counter_val_mask; else sample_period = (-pmc->counter_val) & counter_val_mask; @@ -196,6 +197,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); } +static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + unsigned long *out_val) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc; + int fevent_code; + + if (!IS_ENABLED(CONFIG_32BIT)) { + pr_warn("%s: should be invoked for only RV32\n", __func__); + return -EINVAL; + } + + if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { + pr_warn("Invalid counter id [%ld]during read\n", cidx); + return -EINVAL; + } + + pmc = &kvpmu->pmc[cidx]; + + if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) + return -EINVAL; + + fevent_code = get_event_code(pmc->event_idx); + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + + *out_val = pmc->counter_val >> 32; + + return 0; +} + static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, unsigned long *out_val) { @@ -204,6 +235,11 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, u64 enabled, running; int fevent_code; + if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { + pr_warn("Invalid counter id [%ld] during read\n", cidx); + return -EINVAL; + } + pmc = &kvpmu->pmc[cidx]; if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { @@ -229,8 +265,50 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct return 0; } -static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, - unsigned long flags, unsigned long eidx, unsigned long evtdata) +static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct kvm_vcpu *vcpu = pmc->vcpu; + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); + u64 period; + + /* + * Stop the event counting by directly accessing the perf_event. + * Otherwise, this needs to deferred via a workqueue. + * That will introduce skew in the counter value because the actual + * physical counter would start after returning from this function. + * It will be stopped again once the workqueue is scheduled + */ + rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * The hw counter would start automatically when this function returns. + * Thus, the host may continue to interrupt and inject it to the guest + * even without the guest configuring the next event. Depending on the hardware + * the host may have some sluggishness only if privilege mode filtering is not + * available. In an ideal world, where qemu is not the only capable hardware, + * this can be removed. + * FYI: ARM64 does this way while x86 doesn't do anything as such. + * TODO: Should we keep it for RISC-V ? + */ + period = -(local64_read(&perf_event->count)); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; + + set_bit(pmc->idx, kvpmu->pmc_overflown); + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); + + rpmu->pmu.start(perf_event, PERF_EF_RELOAD); +} + +static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, + unsigned long flags, unsigned long eidx, + unsigned long evtdata) { struct perf_event *event; @@ -247,7 +325,7 @@ static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr */ attr->sample_period = kvm_pmu_get_sample_period(pmc); - event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc); + event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); @@ -310,6 +388,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, return ret; } +static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + + if (kvpmu->sdata) { + if (kvpmu->snapshot_addr != INVALID_GPA) { + memset(kvpmu->sdata, 0, snapshot_area_size); + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, + kvpmu->sdata, snapshot_area_size); + } else { + pr_warn("snapshot address invalid\n"); + } + kfree(kvpmu->sdata); + kvpmu->sdata = NULL; + } + kvpmu->snapshot_addr = INVALID_GPA; +} + +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long flags, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + int sbiret = 0; + gpa_t saddr; + unsigned long hva; + bool writable; + + if (!kvpmu || flags) { + sbiret = SBI_ERR_INVALID_PARAM; + goto out; + } + + if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { + kvm_pmu_clear_snapshot_area(vcpu); + return 0; + } + + saddr = saddr_low; + + if (saddr_high != 0) { + if (IS_ENABLED(CONFIG_32BIT)) + saddr |= ((gpa_t)saddr_high << 32); + else + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); + if (kvm_is_error_hva(hva) || !writable) { + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); + if (!kvpmu->sdata) + return -ENOMEM; + + if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { + kfree(kvpmu->sdata); + sbiret = SBI_ERR_FAILURE; + goto out; + } + + kvpmu->snapshot_addr = saddr; + +out: + retdata->err_val = sbiret; + + return 0; +} + int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_return *retdata) { @@ -343,20 +495,40 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, int i, pmc_index, sbiret = 0; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set) { + if (kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data))) { + pr_warn("Unable to read snapshot shared memory while starting counters\n"); + sbiret = SBI_ERR_FAILURE; + goto out; + } + } /* Start the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; if (!test_bit(pmc_index, kvpmu->pmc_in_use)) continue; + /* The guest started the counter again. Reset the overflow status */ + clear_bit(pmc_index, kvpmu->pmc_overflown); pmc = &kvpmu->pmc[pmc_index]; - if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) + if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { pmc->counter_val = ival; + } else if (snap_flag_set) { + /* The counter index in the snapshot are relative to the counter base */ + pmc->counter_val = kvpmu->sdata->ctr_values[i]; + } + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { fevent_code = get_event_code(pmc->event_idx); if (fevent_code >= SBI_PMU_FW_MAX) { @@ -400,12 +572,19 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, u64 enabled, running; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; + bool shmem_needs_update = false; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + /* Stop the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; @@ -432,21 +611,49 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, sbiret = SBI_ERR_ALREADY_STOPPED; } - if (flags & SBI_PMU_STOP_FLAG_RESET) { - /* Relase the counter if this is a reset request */ - pmc->counter_val += perf_event_read_value(pmc->perf_event, - &enabled, &running); + if (flags & SBI_PMU_STOP_FLAG_RESET) + /* Release the counter if this is a reset request */ kvm_pmu_release_perf_event(pmc); - } } else { sbiret = SBI_ERR_INVALID_PARAM; } + + if (snap_flag_set && !sbiret) { + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + else if (pmc->perf_event) + pmc->counter_val += perf_event_read_value(pmc->perf_event, + &enabled, &running); + /* + * The counter and overflow indicies in the snapshot region are w.r.to + * cbase. Modify the set bit in the counter mask instead of the pmc_index + * which indicates the absolute counter index. + */ + if (test_bit(pmc_index, kvpmu->pmc_overflown)) + kvpmu->sdata->ctr_overflow_mask |= BIT(i); + kvpmu->sdata->ctr_values[i] = pmc->counter_val; + shmem_needs_update = true; + } + if (flags & SBI_PMU_STOP_FLAG_RESET) { pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; clear_bit(pmc_index, kvpmu->pmc_in_use); + clear_bit(pmc_index, kvpmu->pmc_overflown); + if (snap_flag_set) { + /* + * Only clear the given counter as the caller is responsible to + * validate both the overflow mask and configured counters. + */ + kvpmu->sdata->ctr_overflow_mask &= ~BIT(i); + shmem_needs_update = true; + } } } + if (shmem_needs_update) + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data)); + out: retdata->err_val = sbiret; @@ -458,7 +665,8 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba unsigned long eidx, u64 evtdata, struct kvm_vcpu_sbi_return *retdata) { - int ctr_idx, ret, sbiret = 0; + int ctr_idx, sbiret = 0; + long ret; bool is_fevent; unsigned long event_code; u32 etype = kvm_pmu_get_perf_event_type(eidx); @@ -517,8 +725,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba kvpmu->fw_event[event_code].started = true; } else { ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata); - if (ret) - return ret; + if (ret) { + sbiret = SBI_ERR_NOT_SUPPORTED; + goto out; + } } set_bit(ctr_idx, kvpmu->pmc_in_use); @@ -530,7 +740,19 @@ out: return 0; } -int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, +int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + struct kvm_vcpu_sbi_return *retdata) +{ + int ret; + + ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val); + if (ret == -EINVAL) + retdata->err_val = SBI_ERR_INVALID_PARAM; + + return 0; +} + +int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, struct kvm_vcpu_sbi_return *retdata) { int ret; @@ -566,6 +788,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) kvpmu->num_hw_ctrs = num_hw_ctrs + 1; kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvpmu->snapshot_addr = INVALID_GPA; if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); @@ -585,6 +808,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc = &kvpmu->pmc[i]; pmc->idx = i; pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; + pmc->vcpu = vcpu; if (i < kvpmu->num_hw_ctrs) { pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; if (i < 3) @@ -601,7 +825,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc->cinfo.csr = CSR_CYCLE + i; } else { pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW; - pmc->cinfo.width = BITS_PER_LONG - 1; + pmc->cinfo.width = 63; } } @@ -617,14 +841,16 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) if (!kvpmu) return; - for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) { + for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { pmc = &kvpmu->pmc[i]; pmc->counter_val = 0; kvm_pmu_release_perf_event(pmc); pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; } - bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvm_pmu_clear_snapshot_area(vcpu); } void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 72a2ffb8dcd1..62f409d4176e 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -138,8 +138,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, unsigned long i; struct kvm_vcpu *tmp; - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.power_off = true; + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + spin_lock(&vcpu->arch.mp_state_lock); + WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); + spin_unlock(&vcpu->arch.mp_state_lock); + } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&run->system_event, 0, sizeof(run->system_event)); diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index 7dca0e9381d9..dce667f4b6ab 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -18,13 +18,20 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) struct kvm_cpu_context *cp = &vcpu->arch.guest_context; struct kvm_vcpu *target_vcpu; unsigned long target_vcpuid = cp->a0; + int ret = 0; target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!target_vcpu->arch.power_off) - return SBI_ERR_ALREADY_AVAILABLE; + spin_lock(&target_vcpu->arch.mp_state_lock); + + if (!kvm_riscv_vcpu_stopped(target_vcpu)) { + ret = SBI_ERR_ALREADY_AVAILABLE; + goto out; + } + + spin_lock(&target_vcpu->arch.reset_cntx_lock); reset_cntx = &target_vcpu->arch.guest_reset_context; /* start address */ reset_cntx->sepc = cp->a1; @@ -32,21 +39,35 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) reset_cntx->a0 = target_vcpuid; /* private data passed from kernel */ reset_cntx->a1 = cp->a2; + spin_unlock(&target_vcpu->arch.reset_cntx_lock); + kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu); - kvm_riscv_vcpu_power_on(target_vcpu); + __kvm_riscv_vcpu_power_on(target_vcpu); - return 0; +out: + spin_unlock(&target_vcpu->arch.mp_state_lock); + + return ret; } static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu) { - if (vcpu->arch.power_off) - return SBI_ERR_FAILURE; + int ret = 0; - kvm_riscv_vcpu_power_off(vcpu); + spin_lock(&vcpu->arch.mp_state_lock); - return 0; + if (kvm_riscv_vcpu_stopped(vcpu)) { + ret = SBI_ERR_FAILURE; + goto out; + } + + __kvm_riscv_vcpu_power_off(vcpu); + +out: + spin_unlock(&vcpu->arch.mp_state_lock); + + return ret; } static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) @@ -58,7 +79,7 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!target_vcpu->arch.power_off) + if (!kvm_riscv_vcpu_stopped(target_vcpu)) return SBI_HSM_STATE_STARTED; else if (vcpu->stat.generic.blocking) return SBI_HSM_STATE_SUSPENDED; @@ -71,14 +92,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, { int ret = 0; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; - struct kvm *kvm = vcpu->kvm; unsigned long funcid = cp->a6; switch (funcid) { case SBI_EXT_HSM_HART_START: - mutex_lock(&kvm->lock); ret = kvm_sbi_hsm_vcpu_start(vcpu); - mutex_unlock(&kvm->lock); break; case SBI_EXT_HSM_HART_STOP: ret = kvm_sbi_hsm_vcpu_stop(vcpu); diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c index 7eca72df2cbd..e4be34e03e83 100644 --- a/arch/riscv/kvm/vcpu_sbi_pmu.c +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -42,9 +42,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, #endif /* * This can fail if perf core framework fails to create an event. - * Forward the error to userspace because it's an error which - * happened within the host kernel. The other option would be - * to convert to an SBI error and forward to the guest. + * No need to forward the error to userspace and exit the guest. + * The operation can continue without profiling. Forward the + * appropriate SBI error to the guest. */ ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp, retdata); @@ -62,7 +62,16 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; case SBI_EXT_PMU_COUNTER_FW_READ: - ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata); + ret = kvm_riscv_vcpu_pmu_fw_ctr_read(vcpu, cp->a0, retdata); + break; + case SBI_EXT_PMU_COUNTER_FW_READ_HI: + if (IS_ENABLED(CONFIG_32BIT)) + ret = kvm_riscv_vcpu_pmu_fw_ctr_read_hi(vcpu, cp->a0, retdata); + else + retdata->out_val = 0; + break; + case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: + ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index d8cf9ca28c61..5f35427114c1 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -93,8 +93,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) if (flags != 0) return SBI_ERR_INVALID_PARAM; - if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE && - shmem_phys_hi == SBI_STA_SHMEM_DISABLE) { + if (shmem_phys_lo == SBI_SHMEM_DISABLE && + shmem_phys_hi == SBI_SHMEM_DISABLE) { vcpu->arch.sta.shmem = INVALID_GPA; return 0; } diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index ce58bc48e5b8..7396b8654f45 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -186,6 +186,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_NR_VCPUS: diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 2641ba620f12..e010bfed8417 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu = event->pmu; /* - * Make sure we get updated with the first PEBS - * event. It will trigger also during removal, but - * that does not hurt: + * Make sure we get updated with the first PEBS event. + * During removal, ->pebs_data_cfg is still valid for + * the last PEBS event. Don't clear it. */ - if (cpuc->n_pebs == 1) + if ((cpuc->n_pebs == 1) && add) cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW; if (needed_cb != pebs_needs_sched_cb(cpuc)) { |